aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2010-08-16 00:32:24 -0400
committerPaul Mundt <lethal@linux-sh.org>2010-08-16 00:32:24 -0400
commitbbcf6e8b66ab2fb5ddab4d0fe40c2e6a5ebe5301 (patch)
tree071fa9f86dc04a16570be367d04cff3b00c694ad /net/ipv4
parent57682827b9a5edb52e33af0be9082b51bffcd5c7 (diff)
parentda5cabf80e2433131bf0ed8993abc0f7ea618c73 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: arch/sh/include/asm/Kbuild drivers/Makefile Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c84
-rw-r--r--net/ipv4/arp.c51
-rw-r--r--net/ipv4/datagram.c4
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/fib_frontend.c13
-rw-r--r--net/ipv4/icmp.c37
-rw-r--r--net/ipv4/igmp.c32
-rw-r--r--net/ipv4/inet_connection_sock.c21
-rw-r--r--net/ipv4/inet_fragment.c1
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/inetpeer.c244
-rw-r--r--net/ipv4/ip_forward.c10
-rw-r--r--net/ipv4/ip_fragment.c27
-rw-r--r--net/ipv4/ip_gre.c16
-rw-r--r--net/ipv4/ip_input.c26
-rw-r--r--net/ipv4/ip_output.c78
-rw-r--r--net/ipv4/ip_sockglue.c45
-rw-r--r--net/ipv4/ipconfig.c7
-rw-r--r--net/ipv4/ipip.c8
-rw-r--r--net/ipv4/ipmr.c8
-rw-r--r--net/ipv4/netfilter.c12
-rw-r--r--net/ipv4/netfilter/arp_tables.c22
-rw-r--r--net/ipv4/netfilter/ip_queue.c57
-rw-r--r--net/ipv4/netfilter/ip_tables.c16
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c50
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c54
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c6
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c12
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c29
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_dccp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udplite.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c10
-rw-r--r--net/ipv4/proc.c16
-rw-r--r--net/ipv4/protocol.c3
-rw-r--r--net/ipv4/raw.c22
-rw-r--r--net/ipv4/route.c518
-rw-r--r--net/ipv4/syncookies.c105
-rw-r--r--net/ipv4/tcp.c78
-rw-r--r--net/ipv4/tcp_input.c22
-rw-r--r--net/ipv4/tcp_ipv4.c175
-rw-r--r--net/ipv4/tcp_minisocks.c9
-rw-r--r--net/ipv4/tcp_output.c77
-rw-r--r--net/ipv4/tcp_timer.c1
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/udplite.c3
-rw-r--r--net/ipv4/xfrm4_input.c1
-rw-r--r--net/ipv4/xfrm4_policy.c2
57 files changed, 1149 insertions, 955 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 551ce564b035..6a1100c25a9f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -355,6 +355,8 @@ lookup_protocol:
355 inet = inet_sk(sk); 355 inet = inet_sk(sk);
356 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; 356 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
357 357
358 inet->nodefrag = 0;
359
358 if (SOCK_RAW == sock->type) { 360 if (SOCK_RAW == sock->type) {
359 inet->inet_num = protocol; 361 inet->inet_num = protocol;
360 if (IPPROTO_RAW == protocol) 362 if (IPPROTO_RAW == protocol)
@@ -725,28 +727,31 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
725 sock_rps_record_flow(sk); 727 sock_rps_record_flow(sk);
726 728
727 /* We may need to bind the socket. */ 729 /* We may need to bind the socket. */
728 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 730 if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
731 inet_autobind(sk))
729 return -EAGAIN; 732 return -EAGAIN;
730 733
731 return sk->sk_prot->sendmsg(iocb, sk, msg, size); 734 return sk->sk_prot->sendmsg(iocb, sk, msg, size);
732} 735}
733EXPORT_SYMBOL(inet_sendmsg); 736EXPORT_SYMBOL(inet_sendmsg);
734 737
735static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, 738ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
736 size_t size, int flags) 739 size_t size, int flags)
737{ 740{
738 struct sock *sk = sock->sk; 741 struct sock *sk = sock->sk;
739 742
740 sock_rps_record_flow(sk); 743 sock_rps_record_flow(sk);
741 744
742 /* We may need to bind the socket. */ 745 /* We may need to bind the socket. */
743 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 746 if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
747 inet_autobind(sk))
744 return -EAGAIN; 748 return -EAGAIN;
745 749
746 if (sk->sk_prot->sendpage) 750 if (sk->sk_prot->sendpage)
747 return sk->sk_prot->sendpage(sk, page, offset, size, flags); 751 return sk->sk_prot->sendpage(sk, page, offset, size, flags);
748 return sock_no_sendpage(sock, page, offset, size, flags); 752 return sock_no_sendpage(sock, page, offset, size, flags);
749} 753}
754EXPORT_SYMBOL(inet_sendpage);
750 755
751int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, 756int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
752 size_t size, int flags) 757 size_t size, int flags)
@@ -892,10 +897,10 @@ const struct proto_ops inet_stream_ops = {
892 .shutdown = inet_shutdown, 897 .shutdown = inet_shutdown,
893 .setsockopt = sock_common_setsockopt, 898 .setsockopt = sock_common_setsockopt,
894 .getsockopt = sock_common_getsockopt, 899 .getsockopt = sock_common_getsockopt,
895 .sendmsg = tcp_sendmsg, 900 .sendmsg = inet_sendmsg,
896 .recvmsg = inet_recvmsg, 901 .recvmsg = inet_recvmsg,
897 .mmap = sock_no_mmap, 902 .mmap = sock_no_mmap,
898 .sendpage = tcp_sendpage, 903 .sendpage = inet_sendpage,
899 .splice_read = tcp_splice_read, 904 .splice_read = tcp_splice_read,
900#ifdef CONFIG_COMPAT 905#ifdef CONFIG_COMPAT
901 .compat_setsockopt = compat_sock_common_setsockopt, 906 .compat_setsockopt = compat_sock_common_setsockopt,
@@ -1100,7 +1105,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1100 if (err) 1105 if (err)
1101 return err; 1106 return err;
1102 1107
1103 sk_setup_caps(sk, &rt->u.dst); 1108 sk_setup_caps(sk, &rt->dst);
1104 1109
1105 new_saddr = rt->rt_src; 1110 new_saddr = rt->rt_src;
1106 1111
@@ -1166,7 +1171,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1166 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); 1171 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
1167} 1172}
1168 if (!err) 1173 if (!err)
1169 sk_setup_caps(sk, &rt->u.dst); 1174 sk_setup_caps(sk, &rt->dst);
1170 else { 1175 else {
1171 /* Routing failed... */ 1176 /* Routing failed... */
1172 sk->sk_route_caps = 0; 1177 sk->sk_route_caps = 0;
@@ -1425,13 +1430,49 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
1425} 1430}
1426EXPORT_SYMBOL_GPL(snmp_fold_field); 1431EXPORT_SYMBOL_GPL(snmp_fold_field);
1427 1432
1428int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) 1433#if BITS_PER_LONG==32
1434
1435u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
1436{
1437 u64 res = 0;
1438 int cpu;
1439
1440 for_each_possible_cpu(cpu) {
1441 void *bhptr, *userptr;
1442 struct u64_stats_sync *syncp;
1443 u64 v_bh, v_user;
1444 unsigned int start;
1445
1446 /* first mib used by softirq context, we must use _bh() accessors */
1447 bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
1448 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1449 do {
1450 start = u64_stats_fetch_begin_bh(syncp);
1451 v_bh = *(((u64 *) bhptr) + offt);
1452 } while (u64_stats_fetch_retry_bh(syncp, start));
1453
1454 /* second mib used in USER context */
1455 userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
1456 syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
1457 do {
1458 start = u64_stats_fetch_begin(syncp);
1459 v_user = *(((u64 *) userptr) + offt);
1460 } while (u64_stats_fetch_retry(syncp, start));
1461
1462 res += v_bh + v_user;
1463 }
1464 return res;
1465}
1466EXPORT_SYMBOL_GPL(snmp_fold_field64);
1467#endif
1468
1469int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
1429{ 1470{
1430 BUG_ON(ptr == NULL); 1471 BUG_ON(ptr == NULL);
1431 ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long)); 1472 ptr[0] = __alloc_percpu(mibsize, align);
1432 if (!ptr[0]) 1473 if (!ptr[0])
1433 goto err0; 1474 goto err0;
1434 ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long)); 1475 ptr[1] = __alloc_percpu(mibsize, align);
1435 if (!ptr[1]) 1476 if (!ptr[1])
1436 goto err1; 1477 goto err1;
1437 return 0; 1478 return 0;
@@ -1488,25 +1529,32 @@ static const struct net_protocol icmp_protocol = {
1488static __net_init int ipv4_mib_init_net(struct net *net) 1529static __net_init int ipv4_mib_init_net(struct net *net)
1489{ 1530{
1490 if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, 1531 if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
1491 sizeof(struct tcp_mib)) < 0) 1532 sizeof(struct tcp_mib),
1533 __alignof__(struct tcp_mib)) < 0)
1492 goto err_tcp_mib; 1534 goto err_tcp_mib;
1493 if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, 1535 if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
1494 sizeof(struct ipstats_mib)) < 0) 1536 sizeof(struct ipstats_mib),
1537 __alignof__(struct ipstats_mib)) < 0)
1495 goto err_ip_mib; 1538 goto err_ip_mib;
1496 if (snmp_mib_init((void __percpu **)net->mib.net_statistics, 1539 if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
1497 sizeof(struct linux_mib)) < 0) 1540 sizeof(struct linux_mib),
1541 __alignof__(struct linux_mib)) < 0)
1498 goto err_net_mib; 1542 goto err_net_mib;
1499 if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, 1543 if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
1500 sizeof(struct udp_mib)) < 0) 1544 sizeof(struct udp_mib),
1545 __alignof__(struct udp_mib)) < 0)
1501 goto err_udp_mib; 1546 goto err_udp_mib;
1502 if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, 1547 if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
1503 sizeof(struct udp_mib)) < 0) 1548 sizeof(struct udp_mib),
1549 __alignof__(struct udp_mib)) < 0)
1504 goto err_udplite_mib; 1550 goto err_udplite_mib;
1505 if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, 1551 if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
1506 sizeof(struct icmp_mib)) < 0) 1552 sizeof(struct icmp_mib),
1553 __alignof__(struct icmp_mib)) < 0)
1507 goto err_icmp_mib; 1554 goto err_icmp_mib;
1508 if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, 1555 if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
1509 sizeof(struct icmpmsg_mib)) < 0) 1556 sizeof(struct icmpmsg_mib),
1557 __alignof__(struct icmpmsg_mib)) < 0)
1510 goto err_icmpmsg_mib; 1558 goto err_icmpmsg_mib;
1511 1559
1512 tcp_mib_init(net); 1560 tcp_mib_init(net);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f094b75810db..96c1955b3e2f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -116,6 +116,7 @@
116#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) 116#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
117#include <net/atmclip.h> 117#include <net/atmclip.h>
118struct neigh_table *clip_tbl_hook; 118struct neigh_table *clip_tbl_hook;
119EXPORT_SYMBOL(clip_tbl_hook);
119#endif 120#endif
120 121
121#include <asm/system.h> 122#include <asm/system.h>
@@ -169,6 +170,7 @@ const struct neigh_ops arp_broken_ops = {
169 .hh_output = dev_queue_xmit, 170 .hh_output = dev_queue_xmit,
170 .queue_xmit = dev_queue_xmit, 171 .queue_xmit = dev_queue_xmit,
171}; 172};
173EXPORT_SYMBOL(arp_broken_ops);
172 174
173struct neigh_table arp_tbl = { 175struct neigh_table arp_tbl = {
174 .family = AF_INET, 176 .family = AF_INET,
@@ -198,6 +200,7 @@ struct neigh_table arp_tbl = {
198 .gc_thresh2 = 512, 200 .gc_thresh2 = 512,
199 .gc_thresh3 = 1024, 201 .gc_thresh3 = 1024,
200}; 202};
203EXPORT_SYMBOL(arp_tbl);
201 204
202int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) 205int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
203{ 206{
@@ -333,11 +336,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
333 struct net_device *dev = neigh->dev; 336 struct net_device *dev = neigh->dev;
334 __be32 target = *(__be32*)neigh->primary_key; 337 __be32 target = *(__be32*)neigh->primary_key;
335 int probes = atomic_read(&neigh->probes); 338 int probes = atomic_read(&neigh->probes);
336 struct in_device *in_dev = in_dev_get(dev); 339 struct in_device *in_dev;
337 340
338 if (!in_dev) 341 rcu_read_lock();
342 in_dev = __in_dev_get_rcu(dev);
343 if (!in_dev) {
344 rcu_read_unlock();
339 return; 345 return;
340 346 }
341 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 347 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
342 default: 348 default:
343 case 0: /* By default announce any local IP */ 349 case 0: /* By default announce any local IP */
@@ -358,9 +364,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
358 case 2: /* Avoid secondary IPs, get a primary/preferred one */ 364 case 2: /* Avoid secondary IPs, get a primary/preferred one */
359 break; 365 break;
360 } 366 }
367 rcu_read_unlock();
361 368
362 if (in_dev)
363 in_dev_put(in_dev);
364 if (!saddr) 369 if (!saddr)
365 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); 370 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
366 371
@@ -427,7 +432,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
427 432
428 if (ip_route_output_key(net, &rt, &fl) < 0) 433 if (ip_route_output_key(net, &rt, &fl) < 0)
429 return 1; 434 return 1;
430 if (rt->u.dst.dev != dev) { 435 if (rt->dst.dev != dev) {
431 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); 436 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
432 flag = 1; 437 flag = 1;
433 } 438 }
@@ -497,6 +502,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
497 kfree_skb(skb); 502 kfree_skb(skb);
498 return 1; 503 return 1;
499} 504}
505EXPORT_SYMBOL(arp_find);
500 506
501/* END OF OBSOLETE FUNCTIONS */ 507/* END OF OBSOLETE FUNCTIONS */
502 508
@@ -532,7 +538,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
532 struct in_device *out_dev; 538 struct in_device *out_dev;
533 int imi, omi = -1; 539 int imi, omi = -1;
534 540
535 if (rt->u.dst.dev == dev) 541 if (rt->dst.dev == dev)
536 return 0; 542 return 0;
537 543
538 if (!IN_DEV_PROXY_ARP(in_dev)) 544 if (!IN_DEV_PROXY_ARP(in_dev))
@@ -545,10 +551,10 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
545 551
546 /* place to check for proxy_arp for routes */ 552 /* place to check for proxy_arp for routes */
547 553
548 if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) { 554 out_dev = __in_dev_get_rcu(rt->dst.dev);
555 if (out_dev)
549 omi = IN_DEV_MEDIUM_ID(out_dev); 556 omi = IN_DEV_MEDIUM_ID(out_dev);
550 in_dev_put(out_dev); 557
551 }
552 return (omi != imi && omi != -1); 558 return (omi != imi && omi != -1);
553} 559}
554 560
@@ -576,7 +582,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev,
576 __be32 sip, __be32 tip) 582 __be32 sip, __be32 tip)
577{ 583{
578 /* Private VLAN is only concerned about the same ethernet segment */ 584 /* Private VLAN is only concerned about the same ethernet segment */
579 if (rt->u.dst.dev != dev) 585 if (rt->dst.dev != dev)
580 return 0; 586 return 0;
581 587
582 /* Don't reply on self probes (often done by windowz boxes)*/ 588 /* Don't reply on self probes (often done by windowz boxes)*/
@@ -698,6 +704,7 @@ out:
698 kfree_skb(skb); 704 kfree_skb(skb);
699 return NULL; 705 return NULL;
700} 706}
707EXPORT_SYMBOL(arp_create);
701 708
702/* 709/*
703 * Send an arp packet. 710 * Send an arp packet.
@@ -707,6 +714,7 @@ void arp_xmit(struct sk_buff *skb)
707 /* Send it off, maybe filter it using firewalling first. */ 714 /* Send it off, maybe filter it using firewalling first. */
708 NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); 715 NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
709} 716}
717EXPORT_SYMBOL(arp_xmit);
710 718
711/* 719/*
712 * Create and send an arp packet. 720 * Create and send an arp packet.
@@ -733,6 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
733 741
734 arp_xmit(skb); 742 arp_xmit(skb);
735} 743}
744EXPORT_SYMBOL(arp_send);
736 745
737/* 746/*
738 * Process an arp request. 747 * Process an arp request.
@@ -741,7 +750,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
741static int arp_process(struct sk_buff *skb) 750static int arp_process(struct sk_buff *skb)
742{ 751{
743 struct net_device *dev = skb->dev; 752 struct net_device *dev = skb->dev;
744 struct in_device *in_dev = in_dev_get(dev); 753 struct in_device *in_dev = __in_dev_get_rcu(dev);
745 struct arphdr *arp; 754 struct arphdr *arp;
746 unsigned char *arp_ptr; 755 unsigned char *arp_ptr;
747 struct rtable *rt; 756 struct rtable *rt;
@@ -890,7 +899,6 @@ static int arp_process(struct sk_buff *skb)
890 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 899 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
891 } else { 900 } else {
892 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); 901 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
893 in_dev_put(in_dev);
894 return 0; 902 return 0;
895 } 903 }
896 goto out; 904 goto out;
@@ -936,8 +944,6 @@ static int arp_process(struct sk_buff *skb)
936 } 944 }
937 945
938out: 946out:
939 if (in_dev)
940 in_dev_put(in_dev);
941 consume_skb(skb); 947 consume_skb(skb);
942 return 0; 948 return 0;
943} 949}
@@ -1045,7 +1051,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1045 struct rtable * rt; 1051 struct rtable * rt;
1046 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1052 if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
1047 return err; 1053 return err;
1048 dev = rt->u.dst.dev; 1054 dev = rt->dst.dev;
1049 ip_rt_put(rt); 1055 ip_rt_put(rt);
1050 if (!dev) 1056 if (!dev)
1051 return -EINVAL; 1057 return -EINVAL;
@@ -1152,7 +1158,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1152 struct rtable * rt; 1158 struct rtable * rt;
1153 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1159 if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
1154 return err; 1160 return err;
1155 dev = rt->u.dst.dev; 1161 dev = rt->dst.dev;
1156 ip_rt_put(rt); 1162 ip_rt_put(rt);
1157 if (!dev) 1163 if (!dev)
1158 return -EINVAL; 1164 return -EINVAL;
@@ -1453,14 +1459,3 @@ static int __init arp_proc_init(void)
1453} 1459}
1454 1460
1455#endif /* CONFIG_PROC_FS */ 1461#endif /* CONFIG_PROC_FS */
1456
1457EXPORT_SYMBOL(arp_broken_ops);
1458EXPORT_SYMBOL(arp_find);
1459EXPORT_SYMBOL(arp_create);
1460EXPORT_SYMBOL(arp_xmit);
1461EXPORT_SYMBOL(arp_send);
1462EXPORT_SYMBOL(arp_tbl);
1463
1464#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
1465EXPORT_SYMBOL(clip_tbl_hook);
1466#endif
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index fb2465811b48..f0550941df7b 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -69,9 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
69 sk->sk_state = TCP_ESTABLISHED; 69 sk->sk_state = TCP_ESTABLISHED;
70 inet->inet_id = jiffies; 70 inet->inet_id = jiffies;
71 71
72 sk_dst_set(sk, &rt->u.dst); 72 sk_dst_set(sk, &rt->dst);
73 return(0); 73 return(0);
74} 74}
75
76EXPORT_SYMBOL(ip4_datagram_connect); 75EXPORT_SYMBOL(ip4_datagram_connect);
77
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 382bc768ed56..da14c49284f4 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1081,6 +1081,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1081 } 1081 }
1082 ip_mc_up(in_dev); 1082 ip_mc_up(in_dev);
1083 /* fall through */ 1083 /* fall through */
1084 case NETDEV_NOTIFY_PEERS:
1084 case NETDEV_CHANGEADDR: 1085 case NETDEV_CHANGEADDR:
1085 /* Send gratuitous ARP to notify of link change */ 1086 /* Send gratuitous ARP to notify of link change */
1086 if (IN_DEV_ARP_NOTIFY(in_dev)) { 1087 if (IN_DEV_ARP_NOTIFY(in_dev)) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4f0ed458c883..a43968918350 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -175,6 +175,7 @@ out:
175 fib_res_put(&res); 175 fib_res_put(&res);
176 return dev; 176 return dev;
177} 177}
178EXPORT_SYMBOL(ip_dev_find);
178 179
179/* 180/*
180 * Find address type as if only "dev" was present in the system. If 181 * Find address type as if only "dev" was present in the system. If
@@ -214,12 +215,14 @@ unsigned int inet_addr_type(struct net *net, __be32 addr)
214{ 215{
215 return __inet_dev_addr_type(net, NULL, addr); 216 return __inet_dev_addr_type(net, NULL, addr);
216} 217}
218EXPORT_SYMBOL(inet_addr_type);
217 219
218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219 __be32 addr) 221 __be32 addr)
220{ 222{
221 return __inet_dev_addr_type(net, dev, addr); 223 return __inet_dev_addr_type(net, dev, addr);
222} 224}
225EXPORT_SYMBOL(inet_dev_addr_type);
223 226
224/* Given (packet source, input interface) and optional (dst, oif, tos): 227/* Given (packet source, input interface) and optional (dst, oif, tos):
225 - (main) check, that source is valid i.e. not broadcast or our local 228 - (main) check, that source is valid i.e. not broadcast or our local
@@ -284,7 +287,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
284 if (no_addr) 287 if (no_addr)
285 goto last_resort; 288 goto last_resort;
286 if (rpf == 1) 289 if (rpf == 1)
287 goto e_inval; 290 goto e_rpf;
288 fl.oif = dev->ifindex; 291 fl.oif = dev->ifindex;
289 292
290 ret = 0; 293 ret = 0;
@@ -299,7 +302,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
299 302
300last_resort: 303last_resort:
301 if (rpf) 304 if (rpf)
302 goto e_inval; 305 goto e_rpf;
303 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 306 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
304 *itag = 0; 307 *itag = 0;
305 return 0; 308 return 0;
@@ -308,6 +311,8 @@ e_inval_res:
308 fib_res_put(&res); 311 fib_res_put(&res);
309e_inval: 312e_inval:
310 return -EINVAL; 313 return -EINVAL;
314e_rpf:
315 return -EXDEV;
311} 316}
312 317
313static inline __be32 sk_extract_addr(struct sockaddr *addr) 318static inline __be32 sk_extract_addr(struct sockaddr *addr)
@@ -1075,7 +1080,3 @@ void __init ip_fib_init(void)
1075 1080
1076 fib_hash_init(); 1081 fib_hash_init();
1077} 1082}
1078
1079EXPORT_SYMBOL(inet_addr_type);
1080EXPORT_SYMBOL(inet_dev_addr_type);
1081EXPORT_SYMBOL(ip_dev_find);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index d65e9215bcd7..a0d847c7cba5 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -181,6 +181,7 @@ const struct icmp_err icmp_err_convert[] = {
181 .fatal = 1, 181 .fatal = 1,
182 }, 182 },
183}; 183};
184EXPORT_SYMBOL(icmp_err_convert);
184 185
185/* 186/*
186 * ICMP control array. This specifies what to do with each ICMP. 187 * ICMP control array. This specifies what to do with each ICMP.
@@ -267,11 +268,12 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
267 dst->rate_tokens = token; 268 dst->rate_tokens = token;
268 return rc; 269 return rc;
269} 270}
271EXPORT_SYMBOL(xrlim_allow);
270 272
271static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, 273static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
272 int type, int code) 274 int type, int code)
273{ 275{
274 struct dst_entry *dst = &rt->u.dst; 276 struct dst_entry *dst = &rt->dst;
275 int rc = 1; 277 int rc = 1;
276 278
277 if (type > NR_ICMP_TYPES) 279 if (type > NR_ICMP_TYPES)
@@ -327,7 +329,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
327 struct sock *sk; 329 struct sock *sk;
328 struct sk_buff *skb; 330 struct sk_buff *skb;
329 331
330 sk = icmp_sk(dev_net((*rt)->u.dst.dev)); 332 sk = icmp_sk(dev_net((*rt)->dst.dev));
331 if (ip_append_data(sk, icmp_glue_bits, icmp_param, 333 if (ip_append_data(sk, icmp_glue_bits, icmp_param,
332 icmp_param->data_len+icmp_param->head_len, 334 icmp_param->data_len+icmp_param->head_len,
333 icmp_param->head_len, 335 icmp_param->head_len,
@@ -359,7 +361,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
359{ 361{
360 struct ipcm_cookie ipc; 362 struct ipcm_cookie ipc;
361 struct rtable *rt = skb_rtable(skb); 363 struct rtable *rt = skb_rtable(skb);
362 struct net *net = dev_net(rt->u.dst.dev); 364 struct net *net = dev_net(rt->dst.dev);
363 struct sock *sk; 365 struct sock *sk;
364 struct inet_sock *inet; 366 struct inet_sock *inet;
365 __be32 daddr; 367 __be32 daddr;
@@ -427,7 +429,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
427 429
428 if (!rt) 430 if (!rt)
429 goto out; 431 goto out;
430 net = dev_net(rt->u.dst.dev); 432 net = dev_net(rt->dst.dev);
431 433
432 /* 434 /*
433 * Find the original header. It is expected to be valid, of course. 435 * Find the original header. It is expected to be valid, of course.
@@ -596,9 +598,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
596 /* Ugh! */ 598 /* Ugh! */
597 orefdst = skb_in->_skb_refdst; /* save old refdst */ 599 orefdst = skb_in->_skb_refdst; /* save old refdst */
598 err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, 600 err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
599 RT_TOS(tos), rt2->u.dst.dev); 601 RT_TOS(tos), rt2->dst.dev);
600 602
601 dst_release(&rt2->u.dst); 603 dst_release(&rt2->dst);
602 rt2 = skb_rtable(skb_in); 604 rt2 = skb_rtable(skb_in);
603 skb_in->_skb_refdst = orefdst; /* restore old refdst */ 605 skb_in->_skb_refdst = orefdst; /* restore old refdst */
604 } 606 }
@@ -610,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
610 XFRM_LOOKUP_ICMP); 612 XFRM_LOOKUP_ICMP);
611 switch (err) { 613 switch (err) {
612 case 0: 614 case 0:
613 dst_release(&rt->u.dst); 615 dst_release(&rt->dst);
614 rt = rt2; 616 rt = rt2;
615 break; 617 break;
616 case -EPERM: 618 case -EPERM:
@@ -629,7 +631,7 @@ route_done:
629 631
630 /* RFC says return as much as we can without exceeding 576 bytes. */ 632 /* RFC says return as much as we can without exceeding 576 bytes. */
631 633
632 room = dst_mtu(&rt->u.dst); 634 room = dst_mtu(&rt->dst);
633 if (room > 576) 635 if (room > 576)
634 room = 576; 636 room = 576;
635 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; 637 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
@@ -647,6 +649,7 @@ out_unlock:
647 icmp_xmit_unlock(sk); 649 icmp_xmit_unlock(sk);
648out:; 650out:;
649} 651}
652EXPORT_SYMBOL(icmp_send);
650 653
651 654
652/* 655/*
@@ -925,6 +928,7 @@ static void icmp_address(struct sk_buff *skb)
925/* 928/*
926 * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain 929 * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain
927 * loudly if an inconsistency is found. 930 * loudly if an inconsistency is found.
931 * called with rcu_read_lock()
928 */ 932 */
929 933
930static void icmp_address_reply(struct sk_buff *skb) 934static void icmp_address_reply(struct sk_buff *skb)
@@ -935,12 +939,12 @@ static void icmp_address_reply(struct sk_buff *skb)
935 struct in_ifaddr *ifa; 939 struct in_ifaddr *ifa;
936 940
937 if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) 941 if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC))
938 goto out; 942 return;
939 943
940 in_dev = in_dev_get(dev); 944 in_dev = __in_dev_get_rcu(dev);
941 if (!in_dev) 945 if (!in_dev)
942 goto out; 946 return;
943 rcu_read_lock(); 947
944 if (in_dev->ifa_list && 948 if (in_dev->ifa_list &&
945 IN_DEV_LOG_MARTIANS(in_dev) && 949 IN_DEV_LOG_MARTIANS(in_dev) &&
946 IN_DEV_FORWARD(in_dev)) { 950 IN_DEV_FORWARD(in_dev)) {
@@ -958,9 +962,6 @@ static void icmp_address_reply(struct sk_buff *skb)
958 mp, dev->name, &rt->rt_src); 962 mp, dev->name, &rt->rt_src);
959 } 963 }
960 } 964 }
961 rcu_read_unlock();
962 in_dev_put(in_dev);
963out:;
964} 965}
965 966
966static void icmp_discard(struct sk_buff *skb) 967static void icmp_discard(struct sk_buff *skb)
@@ -974,7 +975,7 @@ int icmp_rcv(struct sk_buff *skb)
974{ 975{
975 struct icmphdr *icmph; 976 struct icmphdr *icmph;
976 struct rtable *rt = skb_rtable(skb); 977 struct rtable *rt = skb_rtable(skb);
977 struct net *net = dev_net(rt->u.dst.dev); 978 struct net *net = dev_net(rt->dst.dev);
978 979
979 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 980 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
980 struct sec_path *sp = skb_sec_path(skb); 981 struct sec_path *sp = skb_sec_path(skb);
@@ -1216,7 +1217,3 @@ int __init icmp_init(void)
1216{ 1217{
1217 return register_pernet_subsys(&icmp_sk_ops); 1218 return register_pernet_subsys(&icmp_sk_ops);
1218} 1219}
1219
1220EXPORT_SYMBOL(icmp_err_convert);
1221EXPORT_SYMBOL(icmp_send);
1222EXPORT_SYMBOL(xrlim_allow);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5fff865a4fa7..a1ad0e7180d2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
312 return NULL; 312 return NULL;
313 } 313 }
314 314
315 skb_dst_set(skb, &rt->u.dst); 315 skb_dst_set(skb, &rt->dst);
316 skb->dev = dev; 316 skb->dev = dev;
317 317
318 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 318 skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
330 pip->saddr = rt->rt_src; 330 pip->saddr = rt->rt_src;
331 pip->protocol = IPPROTO_IGMP; 331 pip->protocol = IPPROTO_IGMP;
332 pip->tot_len = 0; /* filled in later */ 332 pip->tot_len = 0; /* filled in later */
333 ip_select_ident(pip, &rt->u.dst, NULL); 333 ip_select_ident(pip, &rt->dst, NULL);
334 ((u8*)&pip[1])[0] = IPOPT_RA; 334 ((u8*)&pip[1])[0] = IPOPT_RA;
335 ((u8*)&pip[1])[1] = 4; 335 ((u8*)&pip[1])[1] = 4;
336 ((u8*)&pip[1])[2] = 0; 336 ((u8*)&pip[1])[2] = 0;
@@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
660 return -1; 660 return -1;
661 } 661 }
662 662
663 skb_dst_set(skb, &rt->u.dst); 663 skb_dst_set(skb, &rt->dst);
664 664
665 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 665 skb_reserve(skb, LL_RESERVED_SPACE(dev));
666 666
@@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
676 iph->daddr = dst; 676 iph->daddr = dst;
677 iph->saddr = rt->rt_src; 677 iph->saddr = rt->rt_src;
678 iph->protocol = IPPROTO_IGMP; 678 iph->protocol = IPPROTO_IGMP;
679 ip_select_ident(iph, &rt->u.dst, NULL); 679 ip_select_ident(iph, &rt->dst, NULL);
680 ((u8*)&iph[1])[0] = IPOPT_RA; 680 ((u8*)&iph[1])[0] = IPOPT_RA;
681 ((u8*)&iph[1])[1] = 4; 681 ((u8*)&iph[1])[1] = 4;
682 ((u8*)&iph[1])[2] = 0; 682 ((u8*)&iph[1])[2] = 0;
@@ -916,18 +916,19 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
916 read_unlock(&in_dev->mc_list_lock); 916 read_unlock(&in_dev->mc_list_lock);
917} 917}
918 918
919/* called in rcu_read_lock() section */
919int igmp_rcv(struct sk_buff *skb) 920int igmp_rcv(struct sk_buff *skb)
920{ 921{
921 /* This basically follows the spec line by line -- see RFC1112 */ 922 /* This basically follows the spec line by line -- see RFC1112 */
922 struct igmphdr *ih; 923 struct igmphdr *ih;
923 struct in_device *in_dev = in_dev_get(skb->dev); 924 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
924 int len = skb->len; 925 int len = skb->len;
925 926
926 if (in_dev == NULL) 927 if (in_dev == NULL)
927 goto drop; 928 goto drop;
928 929
929 if (!pskb_may_pull(skb, sizeof(struct igmphdr))) 930 if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
930 goto drop_ref; 931 goto drop;
931 932
932 switch (skb->ip_summed) { 933 switch (skb->ip_summed) {
933 case CHECKSUM_COMPLETE: 934 case CHECKSUM_COMPLETE:
@@ -937,7 +938,7 @@ int igmp_rcv(struct sk_buff *skb)
937 case CHECKSUM_NONE: 938 case CHECKSUM_NONE:
938 skb->csum = 0; 939 skb->csum = 0;
939 if (__skb_checksum_complete(skb)) 940 if (__skb_checksum_complete(skb))
940 goto drop_ref; 941 goto drop;
941 } 942 }
942 943
943 ih = igmp_hdr(skb); 944 ih = igmp_hdr(skb);
@@ -957,7 +958,6 @@ int igmp_rcv(struct sk_buff *skb)
957 break; 958 break;
958 case IGMP_PIM: 959 case IGMP_PIM:
959#ifdef CONFIG_IP_PIMSM_V1 960#ifdef CONFIG_IP_PIMSM_V1
960 in_dev_put(in_dev);
961 return pim_rcv_v1(skb); 961 return pim_rcv_v1(skb);
962#endif 962#endif
963 case IGMPV3_HOST_MEMBERSHIP_REPORT: 963 case IGMPV3_HOST_MEMBERSHIP_REPORT:
@@ -971,8 +971,6 @@ int igmp_rcv(struct sk_buff *skb)
971 break; 971 break;
972 } 972 }
973 973
974drop_ref:
975 in_dev_put(in_dev);
976drop: 974drop:
977 kfree_skb(skb); 975 kfree_skb(skb);
978 return 0; 976 return 0;
@@ -1246,6 +1244,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1246out: 1244out:
1247 return; 1245 return;
1248} 1246}
1247EXPORT_SYMBOL(ip_mc_inc_group);
1249 1248
1250/* 1249/*
1251 * Resend IGMP JOIN report; used for bonding. 1250 * Resend IGMP JOIN report; used for bonding.
@@ -1268,6 +1267,7 @@ void ip_mc_rejoin_group(struct ip_mc_list *im)
1268 igmp_ifc_event(in_dev); 1267 igmp_ifc_event(in_dev);
1269#endif 1268#endif
1270} 1269}
1270EXPORT_SYMBOL(ip_mc_rejoin_group);
1271 1271
1272/* 1272/*
1273 * A socket has left a multicast group on device dev 1273 * A socket has left a multicast group on device dev
@@ -1298,6 +1298,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1298 } 1298 }
1299 } 1299 }
1300} 1300}
1301EXPORT_SYMBOL(ip_mc_dec_group);
1301 1302
1302/* Device changing type */ 1303/* Device changing type */
1303 1304
@@ -1427,7 +1428,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1427 } 1428 }
1428 1429
1429 if (!dev && !ip_route_output_key(net, &rt, &fl)) { 1430 if (!dev && !ip_route_output_key(net, &rt, &fl)) {
1430 dev = rt->u.dst.dev; 1431 dev = rt->dst.dev;
1431 ip_rt_put(rt); 1432 ip_rt_put(rt);
1432 } 1433 }
1433 if (dev) { 1434 if (dev) {
@@ -1646,8 +1647,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
1646 if (dpsf->sf_inaddr == psf->sf_inaddr) 1647 if (dpsf->sf_inaddr == psf->sf_inaddr)
1647 break; 1648 break;
1648 if (!dpsf) { 1649 if (!dpsf) {
1649 dpsf = (struct ip_sf_list *) 1650 dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
1650 kmalloc(sizeof(*dpsf), GFP_ATOMIC);
1651 if (!dpsf) 1651 if (!dpsf)
1652 continue; 1652 continue;
1653 *dpsf = *psf; 1653 *dpsf = *psf;
@@ -1807,6 +1807,7 @@ done:
1807 rtnl_unlock(); 1807 rtnl_unlock();
1808 return err; 1808 return err;
1809} 1809}
1810EXPORT_SYMBOL(ip_mc_join_group);
1810 1811
1811static void ip_sf_socklist_reclaim(struct rcu_head *rp) 1812static void ip_sf_socklist_reclaim(struct rcu_head *rp)
1812{ 1813{
@@ -2679,8 +2680,3 @@ int __init igmp_mc_proc_init(void)
2679 return register_pernet_subsys(&igmp_net_ops); 2680 return register_pernet_subsys(&igmp_net_ops);
2680} 2681}
2681#endif 2682#endif
2682
2683EXPORT_SYMBOL(ip_mc_dec_group);
2684EXPORT_SYMBOL(ip_mc_inc_group);
2685EXPORT_SYMBOL(ip_mc_join_group);
2686EXPORT_SYMBOL(ip_mc_rejoin_group);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 70eb3507c406..7174370b1195 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -84,7 +84,6 @@ int inet_csk_bind_conflict(const struct sock *sk,
84 } 84 }
85 return node != NULL; 85 return node != NULL;
86} 86}
87
88EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); 87EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
89 88
90/* Obtain a reference to a local port for the given sock, 89/* Obtain a reference to a local port for the given sock,
@@ -212,7 +211,6 @@ fail:
212 local_bh_enable(); 211 local_bh_enable();
213 return ret; 212 return ret;
214} 213}
215
216EXPORT_SYMBOL_GPL(inet_csk_get_port); 214EXPORT_SYMBOL_GPL(inet_csk_get_port);
217 215
218/* 216/*
@@ -305,7 +303,6 @@ out_err:
305 *err = error; 303 *err = error;
306 goto out; 304 goto out;
307} 305}
308
309EXPORT_SYMBOL(inet_csk_accept); 306EXPORT_SYMBOL(inet_csk_accept);
310 307
311/* 308/*
@@ -327,7 +324,6 @@ void inet_csk_init_xmit_timers(struct sock *sk,
327 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); 324 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
328 icsk->icsk_pending = icsk->icsk_ack.pending = 0; 325 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
329} 326}
330
331EXPORT_SYMBOL(inet_csk_init_xmit_timers); 327EXPORT_SYMBOL(inet_csk_init_xmit_timers);
332 328
333void inet_csk_clear_xmit_timers(struct sock *sk) 329void inet_csk_clear_xmit_timers(struct sock *sk)
@@ -340,21 +336,18 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
340 sk_stop_timer(sk, &icsk->icsk_delack_timer); 336 sk_stop_timer(sk, &icsk->icsk_delack_timer);
341 sk_stop_timer(sk, &sk->sk_timer); 337 sk_stop_timer(sk, &sk->sk_timer);
342} 338}
343
344EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 339EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
345 340
346void inet_csk_delete_keepalive_timer(struct sock *sk) 341void inet_csk_delete_keepalive_timer(struct sock *sk)
347{ 342{
348 sk_stop_timer(sk, &sk->sk_timer); 343 sk_stop_timer(sk, &sk->sk_timer);
349} 344}
350
351EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 345EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
352 346
353void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 347void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
354{ 348{
355 sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 349 sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
356} 350}
357
358EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 351EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
359 352
360struct dst_entry *inet_csk_route_req(struct sock *sk, 353struct dst_entry *inet_csk_route_req(struct sock *sk,
@@ -383,7 +376,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
383 goto no_route; 376 goto no_route;
384 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 377 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
385 goto route_err; 378 goto route_err;
386 return &rt->u.dst; 379 return &rt->dst;
387 380
388route_err: 381route_err:
389 ip_rt_put(rt); 382 ip_rt_put(rt);
@@ -391,7 +384,6 @@ no_route:
391 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 384 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
392 return NULL; 385 return NULL;
393} 386}
394
395EXPORT_SYMBOL_GPL(inet_csk_route_req); 387EXPORT_SYMBOL_GPL(inet_csk_route_req);
396 388
397static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 389static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
@@ -433,7 +425,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
433 425
434 return req; 426 return req;
435} 427}
436
437EXPORT_SYMBOL_GPL(inet_csk_search_req); 428EXPORT_SYMBOL_GPL(inet_csk_search_req);
438 429
439void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 430void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
@@ -447,11 +438,11 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
447 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); 438 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
448 inet_csk_reqsk_queue_added(sk, timeout); 439 inet_csk_reqsk_queue_added(sk, timeout);
449} 440}
441EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
450 442
451/* Only thing we need from tcp.h */ 443/* Only thing we need from tcp.h */
452extern int sysctl_tcp_synack_retries; 444extern int sysctl_tcp_synack_retries;
453 445
454EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
455 446
456/* Decide when to expire the request and when to resend SYN-ACK */ 447/* Decide when to expire the request and when to resend SYN-ACK */
457static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 448static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
@@ -569,7 +560,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
569 if (lopt->qlen) 560 if (lopt->qlen)
570 inet_csk_reset_keepalive_timer(parent, interval); 561 inet_csk_reset_keepalive_timer(parent, interval);
571} 562}
572
573EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); 563EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
574 564
575struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, 565struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
@@ -599,7 +589,6 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
599 } 589 }
600 return newsk; 590 return newsk;
601} 591}
602
603EXPORT_SYMBOL_GPL(inet_csk_clone); 592EXPORT_SYMBOL_GPL(inet_csk_clone);
604 593
605/* 594/*
@@ -630,7 +619,6 @@ void inet_csk_destroy_sock(struct sock *sk)
630 percpu_counter_dec(sk->sk_prot->orphan_count); 619 percpu_counter_dec(sk->sk_prot->orphan_count);
631 sock_put(sk); 620 sock_put(sk);
632} 621}
633
634EXPORT_SYMBOL(inet_csk_destroy_sock); 622EXPORT_SYMBOL(inet_csk_destroy_sock);
635 623
636int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 624int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
@@ -665,7 +653,6 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
665 __reqsk_queue_destroy(&icsk->icsk_accept_queue); 653 __reqsk_queue_destroy(&icsk->icsk_accept_queue);
666 return -EADDRINUSE; 654 return -EADDRINUSE;
667} 655}
668
669EXPORT_SYMBOL_GPL(inet_csk_listen_start); 656EXPORT_SYMBOL_GPL(inet_csk_listen_start);
670 657
671/* 658/*
@@ -720,7 +707,6 @@ void inet_csk_listen_stop(struct sock *sk)
720 } 707 }
721 WARN_ON(sk->sk_ack_backlog); 708 WARN_ON(sk->sk_ack_backlog);
722} 709}
723
724EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 710EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
725 711
726void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 712void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
@@ -732,7 +718,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
732 sin->sin_addr.s_addr = inet->inet_daddr; 718 sin->sin_addr.s_addr = inet->inet_daddr;
733 sin->sin_port = inet->inet_dport; 719 sin->sin_port = inet->inet_dport;
734} 720}
735
736EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 721EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
737 722
738#ifdef CONFIG_COMPAT 723#ifdef CONFIG_COMPAT
@@ -747,7 +732,6 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
747 return icsk->icsk_af_ops->getsockopt(sk, level, optname, 732 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
748 optval, optlen); 733 optval, optlen);
749} 734}
750
751EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); 735EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
752 736
753int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 737int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
@@ -761,6 +745,5 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
761 return icsk->icsk_af_ops->setsockopt(sk, level, optname, 745 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
762 optval, optlen); 746 optval, optlen);
763} 747}
764
765EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 748EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
766#endif 749#endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index a2ca6aed763b..5ff2a51b6d0c 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -114,7 +114,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
114 fq->last_in |= INET_FRAG_COMPLETE; 114 fq->last_in |= INET_FRAG_COMPLETE;
115 } 115 }
116} 116}
117
118EXPORT_SYMBOL(inet_frag_kill); 117EXPORT_SYMBOL(inet_frag_kill);
119 118
120static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, 119static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index d3e160a88219..fb7ad5a21ff3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -99,7 +99,6 @@ void inet_put_port(struct sock *sk)
99 __inet_put_port(sk); 99 __inet_put_port(sk);
100 local_bh_enable(); 100 local_bh_enable();
101} 101}
102
103EXPORT_SYMBOL(inet_put_port); 102EXPORT_SYMBOL(inet_put_port);
104 103
105void __inet_inherit_port(struct sock *sk, struct sock *child) 104void __inet_inherit_port(struct sock *sk, struct sock *child)
@@ -116,7 +115,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child)
116 inet_csk(child)->icsk_bind_hash = tb; 115 inet_csk(child)->icsk_bind_hash = tb;
117 spin_unlock(&head->lock); 116 spin_unlock(&head->lock);
118} 117}
119
120EXPORT_SYMBOL_GPL(__inet_inherit_port); 118EXPORT_SYMBOL_GPL(__inet_inherit_port);
121 119
122static inline int compute_score(struct sock *sk, struct net *net, 120static inline int compute_score(struct sock *sk, struct net *net,
@@ -546,7 +544,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
546 return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), 544 return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
547 __inet_check_established, __inet_hash_nolisten); 545 __inet_check_established, __inet_hash_nolisten);
548} 546}
549
550EXPORT_SYMBOL_GPL(inet_hash_connect); 547EXPORT_SYMBOL_GPL(inet_hash_connect);
551 548
552void inet_hashinfo_init(struct inet_hashinfo *h) 549void inet_hashinfo_init(struct inet_hashinfo *h)
@@ -560,5 +557,4 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
560 i + LISTENING_NULLS_BASE); 557 i + LISTENING_NULLS_BASE);
561 } 558 }
562} 559}
563
564EXPORT_SYMBOL_GPL(inet_hashinfo_init); 560EXPORT_SYMBOL_GPL(inet_hashinfo_init);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 6bcfe52a9c87..9ffa24b9a804 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -51,8 +51,8 @@
51 * lookups performed with disabled BHs. 51 * lookups performed with disabled BHs.
52 * 52 *
53 * Serialisation issues. 53 * Serialisation issues.
54 * 1. Nodes may appear in the tree only with the pool write lock held. 54 * 1. Nodes may appear in the tree only with the pool lock held.
55 * 2. Nodes may disappear from the tree only with the pool write lock held 55 * 2. Nodes may disappear from the tree only with the pool lock held
56 * AND reference count being 0. 56 * AND reference count being 0.
57 * 3. Nodes appears and disappears from unused node list only under 57 * 3. Nodes appears and disappears from unused node list only under
58 * "inet_peer_unused_lock". 58 * "inet_peer_unused_lock".
@@ -64,23 +64,31 @@
64 * usually under some other lock to prevent node disappearing 64 * usually under some other lock to prevent node disappearing
65 * dtime: unused node list lock 65 * dtime: unused node list lock
66 * v4daddr: unchangeable 66 * v4daddr: unchangeable
67 * ip_id_count: idlock 67 * ip_id_count: atomic value (no lock needed)
68 */ 68 */
69 69
70static struct kmem_cache *peer_cachep __read_mostly; 70static struct kmem_cache *peer_cachep __read_mostly;
71 71
72#define node_height(x) x->avl_height 72#define node_height(x) x->avl_height
73static struct inet_peer peer_fake_node = { 73
74 .avl_left = &peer_fake_node, 74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
75 .avl_right = &peer_fake_node, 75static const struct inet_peer peer_fake_node = {
76 .avl_left = peer_avl_empty,
77 .avl_right = peer_avl_empty,
76 .avl_height = 0 78 .avl_height = 0
77}; 79};
78#define peer_avl_empty (&peer_fake_node) 80
79static struct inet_peer *peer_root = peer_avl_empty; 81static struct {
80static DEFINE_RWLOCK(peer_pool_lock); 82 struct inet_peer *root;
83 spinlock_t lock;
84 int total;
85} peers = {
86 .root = peer_avl_empty,
87 .lock = __SPIN_LOCK_UNLOCKED(peers.lock),
88 .total = 0,
89};
81#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ 90#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
82 91
83static int peer_total;
84/* Exported for sysctl_net_ipv4. */ 92/* Exported for sysctl_net_ipv4. */
85int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more 93int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
86 * aggressively at this stage */ 94 * aggressively at this stage */
@@ -89,8 +97,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min
89int inet_peer_gc_mintime __read_mostly = 10 * HZ; 97int inet_peer_gc_mintime __read_mostly = 10 * HZ;
90int inet_peer_gc_maxtime __read_mostly = 120 * HZ; 98int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
91 99
92static LIST_HEAD(unused_peers); 100static struct {
93static DEFINE_SPINLOCK(inet_peer_unused_lock); 101 struct list_head list;
102 spinlock_t lock;
103} unused_peers = {
104 .list = LIST_HEAD_INIT(unused_peers.list),
105 .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock),
106};
94 107
95static void peer_check_expire(unsigned long dummy); 108static void peer_check_expire(unsigned long dummy);
96static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); 109static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
@@ -116,7 +129,7 @@ void __init inet_initpeers(void)
116 129
117 peer_cachep = kmem_cache_create("inet_peer_cache", 130 peer_cachep = kmem_cache_create("inet_peer_cache",
118 sizeof(struct inet_peer), 131 sizeof(struct inet_peer),
119 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 132 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
120 NULL); 133 NULL);
121 134
122 /* All the timers, started at system startup tend 135 /* All the timers, started at system startup tend
@@ -131,38 +144,69 @@ void __init inet_initpeers(void)
131/* Called with or without local BH being disabled. */ 144/* Called with or without local BH being disabled. */
132static void unlink_from_unused(struct inet_peer *p) 145static void unlink_from_unused(struct inet_peer *p)
133{ 146{
134 spin_lock_bh(&inet_peer_unused_lock); 147 if (!list_empty(&p->unused)) {
135 list_del_init(&p->unused); 148 spin_lock_bh(&unused_peers.lock);
136 spin_unlock_bh(&inet_peer_unused_lock); 149 list_del_init(&p->unused);
150 spin_unlock_bh(&unused_peers.lock);
151 }
137} 152}
138 153
139/* 154/*
140 * Called with local BH disabled and the pool lock held. 155 * Called with local BH disabled and the pool lock held.
141 * _stack is known to be NULL or not at compile time,
142 * so compiler will optimize the if (_stack) tests.
143 */ 156 */
144#define lookup(_daddr, _stack) \ 157#define lookup(_daddr, _stack) \
145({ \ 158({ \
146 struct inet_peer *u, **v; \ 159 struct inet_peer *u, **v; \
147 if (_stack != NULL) { \ 160 \
148 stackptr = _stack; \ 161 stackptr = _stack; \
149 *stackptr++ = &peer_root; \ 162 *stackptr++ = &peers.root; \
150 } \ 163 for (u = peers.root; u != peer_avl_empty; ) { \
151 for (u = peer_root; u != peer_avl_empty; ) { \
152 if (_daddr == u->v4daddr) \ 164 if (_daddr == u->v4daddr) \
153 break; \ 165 break; \
154 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ 166 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
155 v = &u->avl_left; \ 167 v = &u->avl_left; \
156 else \ 168 else \
157 v = &u->avl_right; \ 169 v = &u->avl_right; \
158 if (_stack != NULL) \ 170 *stackptr++ = v; \
159 *stackptr++ = v; \
160 u = *v; \ 171 u = *v; \
161 } \ 172 } \
162 u; \ 173 u; \
163}) 174})
164 175
165/* Called with local BH disabled and the pool write lock held. */ 176/*
177 * Called with rcu_read_lock_bh()
178 * Because we hold no lock against a writer, its quite possible we fall
179 * in an endless loop.
180 * But every pointer we follow is guaranteed to be valid thanks to RCU.
181 * We exit from this function if number of links exceeds PEER_MAXDEPTH
182 */
183static struct inet_peer *lookup_rcu_bh(__be32 daddr)
184{
185 struct inet_peer *u = rcu_dereference_bh(peers.root);
186 int count = 0;
187
188 while (u != peer_avl_empty) {
189 if (daddr == u->v4daddr) {
190 /* Before taking a reference, check if this entry was
191 * deleted, unlink_from_pool() sets refcnt=-1 to make
192 * distinction between an unused entry (refcnt=0) and
193 * a freed one.
194 */
195 if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1)))
196 u = NULL;
197 return u;
198 }
199 if ((__force __u32)daddr < (__force __u32)u->v4daddr)
200 u = rcu_dereference_bh(u->avl_left);
201 else
202 u = rcu_dereference_bh(u->avl_right);
203 if (unlikely(++count == PEER_MAXDEPTH))
204 break;
205 }
206 return NULL;
207}
208
209/* Called with local BH disabled and the pool lock held. */
166#define lookup_rightempty(start) \ 210#define lookup_rightempty(start) \
167({ \ 211({ \
168 struct inet_peer *u, **v; \ 212 struct inet_peer *u, **v; \
@@ -176,9 +220,10 @@ static void unlink_from_unused(struct inet_peer *p)
176 u; \ 220 u; \
177}) 221})
178 222
179/* Called with local BH disabled and the pool write lock held. 223/* Called with local BH disabled and the pool lock held.
180 * Variable names are the proof of operation correctness. 224 * Variable names are the proof of operation correctness.
181 * Look into mm/map_avl.c for more detail description of the ideas. */ 225 * Look into mm/map_avl.c for more detail description of the ideas.
226 */
182static void peer_avl_rebalance(struct inet_peer **stack[], 227static void peer_avl_rebalance(struct inet_peer **stack[],
183 struct inet_peer ***stackend) 228 struct inet_peer ***stackend)
184{ 229{
@@ -254,15 +299,21 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
254 } 299 }
255} 300}
256 301
257/* Called with local BH disabled and the pool write lock held. */ 302/* Called with local BH disabled and the pool lock held. */
258#define link_to_pool(n) \ 303#define link_to_pool(n) \
259do { \ 304do { \
260 n->avl_height = 1; \ 305 n->avl_height = 1; \
261 n->avl_left = peer_avl_empty; \ 306 n->avl_left = peer_avl_empty; \
262 n->avl_right = peer_avl_empty; \ 307 n->avl_right = peer_avl_empty; \
308 smp_wmb(); /* lockless readers can catch us now */ \
263 **--stackptr = n; \ 309 **--stackptr = n; \
264 peer_avl_rebalance(stack, stackptr); \ 310 peer_avl_rebalance(stack, stackptr); \
265} while(0) 311} while (0)
312
313static void inetpeer_free_rcu(struct rcu_head *head)
314{
315 kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
316}
266 317
267/* May be called with local BH enabled. */ 318/* May be called with local BH enabled. */
268static void unlink_from_pool(struct inet_peer *p) 319static void unlink_from_pool(struct inet_peer *p)
@@ -271,13 +322,14 @@ static void unlink_from_pool(struct inet_peer *p)
271 322
272 do_free = 0; 323 do_free = 0;
273 324
274 write_lock_bh(&peer_pool_lock); 325 spin_lock_bh(&peers.lock);
275 /* Check the reference counter. It was artificially incremented by 1 326 /* Check the reference counter. It was artificially incremented by 1
276 * in cleanup() function to prevent sudden disappearing. If the 327 * in cleanup() function to prevent sudden disappearing. If we can
277 * reference count is still 1 then the node is referenced only as `p' 328 * atomically (because of lockless readers) take this last reference,
278 * here and from the pool. So under the exclusive pool lock it's safe 329 * it's safe to remove the node and free it later.
279 * to remove the node and free it later. */ 330 * We use refcnt=-1 to alert lockless readers this entry is deleted.
280 if (atomic_read(&p->refcnt) == 1) { 331 */
332 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
281 struct inet_peer **stack[PEER_MAXDEPTH]; 333 struct inet_peer **stack[PEER_MAXDEPTH];
282 struct inet_peer ***stackptr, ***delp; 334 struct inet_peer ***stackptr, ***delp;
283 if (lookup(p->v4daddr, stack) != p) 335 if (lookup(p->v4daddr, stack) != p)
@@ -303,20 +355,21 @@ static void unlink_from_pool(struct inet_peer *p)
303 delp[1] = &t->avl_left; /* was &p->avl_left */ 355 delp[1] = &t->avl_left; /* was &p->avl_left */
304 } 356 }
305 peer_avl_rebalance(stack, stackptr); 357 peer_avl_rebalance(stack, stackptr);
306 peer_total--; 358 peers.total--;
307 do_free = 1; 359 do_free = 1;
308 } 360 }
309 write_unlock_bh(&peer_pool_lock); 361 spin_unlock_bh(&peers.lock);
310 362
311 if (do_free) 363 if (do_free)
312 kmem_cache_free(peer_cachep, p); 364 call_rcu_bh(&p->rcu, inetpeer_free_rcu);
313 else 365 else
314 /* The node is used again. Decrease the reference counter 366 /* The node is used again. Decrease the reference counter
315 * back. The loop "cleanup -> unlink_from_unused 367 * back. The loop "cleanup -> unlink_from_unused
316 * -> unlink_from_pool -> putpeer -> link_to_unused 368 * -> unlink_from_pool -> putpeer -> link_to_unused
317 * -> cleanup (for the same node)" 369 * -> cleanup (for the same node)"
318 * doesn't really exist because the entry will have a 370 * doesn't really exist because the entry will have a
319 * recent deletion time and will not be cleaned again soon. */ 371 * recent deletion time and will not be cleaned again soon.
372 */
320 inet_putpeer(p); 373 inet_putpeer(p);
321} 374}
322 375
@@ -326,16 +379,16 @@ static int cleanup_once(unsigned long ttl)
326 struct inet_peer *p = NULL; 379 struct inet_peer *p = NULL;
327 380
328 /* Remove the first entry from the list of unused nodes. */ 381 /* Remove the first entry from the list of unused nodes. */
329 spin_lock_bh(&inet_peer_unused_lock); 382 spin_lock_bh(&unused_peers.lock);
330 if (!list_empty(&unused_peers)) { 383 if (!list_empty(&unused_peers.list)) {
331 __u32 delta; 384 __u32 delta;
332 385
333 p = list_first_entry(&unused_peers, struct inet_peer, unused); 386 p = list_first_entry(&unused_peers.list, struct inet_peer, unused);
334 delta = (__u32)jiffies - p->dtime; 387 delta = (__u32)jiffies - p->dtime;
335 388
336 if (delta < ttl) { 389 if (delta < ttl) {
337 /* Do not prune fresh entries. */ 390 /* Do not prune fresh entries. */
338 spin_unlock_bh(&inet_peer_unused_lock); 391 spin_unlock_bh(&unused_peers.lock);
339 return -1; 392 return -1;
340 } 393 }
341 394
@@ -345,7 +398,7 @@ static int cleanup_once(unsigned long ttl)
345 * before unlink_from_pool() call. */ 398 * before unlink_from_pool() call. */
346 atomic_inc(&p->refcnt); 399 atomic_inc(&p->refcnt);
347 } 400 }
348 spin_unlock_bh(&inet_peer_unused_lock); 401 spin_unlock_bh(&unused_peers.lock);
349 402
350 if (p == NULL) 403 if (p == NULL)
351 /* It means that the total number of USED entries has 404 /* It means that the total number of USED entries has
@@ -360,62 +413,56 @@ static int cleanup_once(unsigned long ttl)
360/* Called with or without local BH being disabled. */ 413/* Called with or without local BH being disabled. */
361struct inet_peer *inet_getpeer(__be32 daddr, int create) 414struct inet_peer *inet_getpeer(__be32 daddr, int create)
362{ 415{
363 struct inet_peer *p, *n; 416 struct inet_peer *p;
364 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; 417 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
365 418
366 /* Look up for the address quickly. */ 419 /* Look up for the address quickly, lockless.
367 read_lock_bh(&peer_pool_lock); 420 * Because of a concurrent writer, we might not find an existing entry.
368 p = lookup(daddr, NULL); 421 */
369 if (p != peer_avl_empty) 422 rcu_read_lock_bh();
370 atomic_inc(&p->refcnt); 423 p = lookup_rcu_bh(daddr);
371 read_unlock_bh(&peer_pool_lock); 424 rcu_read_unlock_bh();
425
426 if (p) {
427 /* The existing node has been found.
428 * Remove the entry from unused list if it was there.
429 */
430 unlink_from_unused(p);
431 return p;
432 }
372 433
434 /* retry an exact lookup, taking the lock before.
435 * At least, nodes should be hot in our cache.
436 */
437 spin_lock_bh(&peers.lock);
438 p = lookup(daddr, stack);
373 if (p != peer_avl_empty) { 439 if (p != peer_avl_empty) {
374 /* The existing node has been found. */ 440 atomic_inc(&p->refcnt);
441 spin_unlock_bh(&peers.lock);
375 /* Remove the entry from unused list if it was there. */ 442 /* Remove the entry from unused list if it was there. */
376 unlink_from_unused(p); 443 unlink_from_unused(p);
377 return p; 444 return p;
378 } 445 }
446 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
447 if (p) {
448 p->v4daddr = daddr;
449 atomic_set(&p->refcnt, 1);
450 atomic_set(&p->rid, 0);
451 atomic_set(&p->ip_id_count, secure_ip_id(daddr));
452 p->tcp_ts_stamp = 0;
453 INIT_LIST_HEAD(&p->unused);
454
455
456 /* Link the node. */
457 link_to_pool(p);
458 peers.total++;
459 }
460 spin_unlock_bh(&peers.lock);
379 461
380 if (!create) 462 if (peers.total >= inet_peer_threshold)
381 return NULL;
382
383 /* Allocate the space outside the locked region. */
384 n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
385 if (n == NULL)
386 return NULL;
387 n->v4daddr = daddr;
388 atomic_set(&n->refcnt, 1);
389 atomic_set(&n->rid, 0);
390 atomic_set(&n->ip_id_count, secure_ip_id(daddr));
391 n->tcp_ts_stamp = 0;
392
393 write_lock_bh(&peer_pool_lock);
394 /* Check if an entry has suddenly appeared. */
395 p = lookup(daddr, stack);
396 if (p != peer_avl_empty)
397 goto out_free;
398
399 /* Link the node. */
400 link_to_pool(n);
401 INIT_LIST_HEAD(&n->unused);
402 peer_total++;
403 write_unlock_bh(&peer_pool_lock);
404
405 if (peer_total >= inet_peer_threshold)
406 /* Remove one less-recently-used entry. */ 463 /* Remove one less-recently-used entry. */
407 cleanup_once(0); 464 cleanup_once(0);
408 465
409 return n;
410
411out_free:
412 /* The appropriate node is already in the pool. */
413 atomic_inc(&p->refcnt);
414 write_unlock_bh(&peer_pool_lock);
415 /* Remove the entry from unused list if it was there. */
416 unlink_from_unused(p);
417 /* Free preallocated the preallocated node. */
418 kmem_cache_free(peer_cachep, n);
419 return p; 466 return p;
420} 467}
421 468
@@ -425,12 +472,12 @@ static void peer_check_expire(unsigned long dummy)
425 unsigned long now = jiffies; 472 unsigned long now = jiffies;
426 int ttl; 473 int ttl;
427 474
428 if (peer_total >= inet_peer_threshold) 475 if (peers.total >= inet_peer_threshold)
429 ttl = inet_peer_minttl; 476 ttl = inet_peer_minttl;
430 else 477 else
431 ttl = inet_peer_maxttl 478 ttl = inet_peer_maxttl
432 - (inet_peer_maxttl - inet_peer_minttl) / HZ * 479 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
433 peer_total / inet_peer_threshold * HZ; 480 peers.total / inet_peer_threshold * HZ;
434 while (!cleanup_once(ttl)) { 481 while (!cleanup_once(ttl)) {
435 if (jiffies != now) 482 if (jiffies != now)
436 break; 483 break;
@@ -439,22 +486,25 @@ static void peer_check_expire(unsigned long dummy)
439 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime 486 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
440 * interval depending on the total number of entries (more entries, 487 * interval depending on the total number of entries (more entries,
441 * less interval). */ 488 * less interval). */
442 if (peer_total >= inet_peer_threshold) 489 if (peers.total >= inet_peer_threshold)
443 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; 490 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
444 else 491 else
445 peer_periodic_timer.expires = jiffies 492 peer_periodic_timer.expires = jiffies
446 + inet_peer_gc_maxtime 493 + inet_peer_gc_maxtime
447 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * 494 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
448 peer_total / inet_peer_threshold * HZ; 495 peers.total / inet_peer_threshold * HZ;
449 add_timer(&peer_periodic_timer); 496 add_timer(&peer_periodic_timer);
450} 497}
451 498
452void inet_putpeer(struct inet_peer *p) 499void inet_putpeer(struct inet_peer *p)
453{ 500{
454 spin_lock_bh(&inet_peer_unused_lock); 501 local_bh_disable();
455 if (atomic_dec_and_test(&p->refcnt)) { 502
456 list_add_tail(&p->unused, &unused_peers); 503 if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) {
504 list_add_tail(&p->unused, &unused_peers.list);
457 p->dtime = (__u32)jiffies; 505 p->dtime = (__u32)jiffies;
506 spin_unlock(&unused_peers.lock);
458 } 507 }
459 spin_unlock_bh(&inet_peer_unused_lock); 508
509 local_bh_enable();
460} 510}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 56cdf68a074c..99461f09320f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb)
87 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 87 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
88 goto sr_failed; 88 goto sr_failed;
89 89
90 if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && 90 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
91 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { 91 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
92 IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); 92 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
93 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 93 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
94 htonl(dst_mtu(&rt->u.dst))); 94 htonl(dst_mtu(&rt->dst)));
95 goto drop; 95 goto drop;
96 } 96 }
97 97
98 /* We are about to mangle packet. Copy it! */ 98 /* We are about to mangle packet. Copy it! */
99 if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) 99 if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len))
100 goto drop; 100 goto drop;
101 iph = ip_hdr(skb); 101 iph = ip_hdr(skb);
102 102
@@ -113,7 +113,7 @@ int ip_forward(struct sk_buff *skb)
113 skb->priority = rt_tos2priority(iph->tos); 113 skb->priority = rt_tos2priority(iph->tos);
114 114
115 return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, 115 return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
116 rt->u.dst.dev, ip_forward_finish); 116 rt->dst.dev, ip_forward_finish);
117 117
118sr_failed: 118sr_failed:
119 /* 119 /*
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 75347ea70ea0..b7c41654dde5 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -124,11 +124,8 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
124} 124}
125 125
126/* Memory Tracking Functions. */ 126/* Memory Tracking Functions. */
127static __inline__ void frag_kfree_skb(struct netns_frags *nf, 127static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
128 struct sk_buff *skb, int *work)
129{ 128{
130 if (work)
131 *work -= skb->truesize;
132 atomic_sub(skb->truesize, &nf->mem); 129 atomic_sub(skb->truesize, &nf->mem);
133 kfree_skb(skb); 130 kfree_skb(skb);
134} 131}
@@ -309,7 +306,7 @@ static int ip_frag_reinit(struct ipq *qp)
309 fp = qp->q.fragments; 306 fp = qp->q.fragments;
310 do { 307 do {
311 struct sk_buff *xp = fp->next; 308 struct sk_buff *xp = fp->next;
312 frag_kfree_skb(qp->q.net, fp, NULL); 309 frag_kfree_skb(qp->q.net, fp);
313 fp = xp; 310 fp = xp;
314 } while (fp); 311 } while (fp);
315 312
@@ -317,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp)
317 qp->q.len = 0; 314 qp->q.len = 0;
318 qp->q.meat = 0; 315 qp->q.meat = 0;
319 qp->q.fragments = NULL; 316 qp->q.fragments = NULL;
317 qp->q.fragments_tail = NULL;
320 qp->iif = 0; 318 qp->iif = 0;
321 319
322 return 0; 320 return 0;
@@ -389,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
389 * in the chain of fragments so far. We must know where to put 387 * in the chain of fragments so far. We must know where to put
390 * this fragment, right? 388 * this fragment, right?
391 */ 389 */
390 prev = qp->q.fragments_tail;
391 if (!prev || FRAG_CB(prev)->offset < offset) {
392 next = NULL;
393 goto found;
394 }
392 prev = NULL; 395 prev = NULL;
393 for (next = qp->q.fragments; next != NULL; next = next->next) { 396 for (next = qp->q.fragments; next != NULL; next = next->next) {
394 if (FRAG_CB(next)->offset >= offset) 397 if (FRAG_CB(next)->offset >= offset)
@@ -396,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
396 prev = next; 399 prev = next;
397 } 400 }
398 401
402found:
399 /* We found where to put this one. Check for overlap with 403 /* We found where to put this one. Check for overlap with
400 * preceding fragment, and, if needed, align things so that 404 * preceding fragment, and, if needed, align things so that
401 * any overlaps are eliminated. 405 * any overlaps are eliminated.
@@ -446,7 +450,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
446 qp->q.fragments = next; 450 qp->q.fragments = next;
447 451
448 qp->q.meat -= free_it->len; 452 qp->q.meat -= free_it->len;
449 frag_kfree_skb(qp->q.net, free_it, NULL); 453 frag_kfree_skb(qp->q.net, free_it);
450 } 454 }
451 } 455 }
452 456
@@ -454,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
454 458
455 /* Insert this fragment in the chain of fragments. */ 459 /* Insert this fragment in the chain of fragments. */
456 skb->next = next; 460 skb->next = next;
461 if (!next)
462 qp->q.fragments_tail = skb;
457 if (prev) 463 if (prev)
458 prev->next = skb; 464 prev->next = skb;
459 else 465 else
@@ -507,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
507 goto out_nomem; 513 goto out_nomem;
508 514
509 fp->next = head->next; 515 fp->next = head->next;
516 if (!fp->next)
517 qp->q.fragments_tail = fp;
510 prev->next = fp; 518 prev->next = fp;
511 519
512 skb_morph(head, qp->q.fragments); 520 skb_morph(head, qp->q.fragments);
@@ -556,7 +564,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
556 564
557 skb_shinfo(head)->frag_list = head->next; 565 skb_shinfo(head)->frag_list = head->next;
558 skb_push(head, head->data - skb_network_header(head)); 566 skb_push(head, head->data - skb_network_header(head));
559 atomic_sub(head->truesize, &qp->q.net->mem);
560 567
561 for (fp=head->next; fp; fp = fp->next) { 568 for (fp=head->next; fp; fp = fp->next) {
562 head->data_len += fp->len; 569 head->data_len += fp->len;
@@ -566,8 +573,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
566 else if (head->ip_summed == CHECKSUM_COMPLETE) 573 else if (head->ip_summed == CHECKSUM_COMPLETE)
567 head->csum = csum_add(head->csum, fp->csum); 574 head->csum = csum_add(head->csum, fp->csum);
568 head->truesize += fp->truesize; 575 head->truesize += fp->truesize;
569 atomic_sub(fp->truesize, &qp->q.net->mem);
570 } 576 }
577 atomic_sub(head->truesize, &qp->q.net->mem);
571 578
572 head->next = NULL; 579 head->next = NULL;
573 head->dev = dev; 580 head->dev = dev;
@@ -578,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
578 iph->tot_len = htons(len); 585 iph->tot_len = htons(len);
579 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); 586 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
580 qp->q.fragments = NULL; 587 qp->q.fragments = NULL;
588 qp->q.fragments_tail = NULL;
581 return 0; 589 return 0;
582 590
583out_nomem: 591out_nomem:
@@ -624,6 +632,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
624 kfree_skb(skb); 632 kfree_skb(skb);
625 return -ENOMEM; 633 return -ENOMEM;
626} 634}
635EXPORT_SYMBOL(ip_defrag);
627 636
628#ifdef CONFIG_SYSCTL 637#ifdef CONFIG_SYSCTL
629static int zero; 638static int zero;
@@ -777,5 +786,3 @@ void __init ipfrag_init(void)
777 ip4_frags.secret_interval = 10 * 60 * HZ; 786 ip4_frags.secret_interval = 10 * 60 * HZ;
778 inet_frags_init(&ip4_frags); 787 inet_frags_init(&ip4_frags);
779} 788}
780
781EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 32618e11076d..945b20a5ad50 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -731,6 +731,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
731 tos = 0; 731 tos = 0;
732 if (skb->protocol == htons(ETH_P_IP)) 732 if (skb->protocol == htons(ETH_P_IP))
733 tos = old_iph->tos; 733 tos = old_iph->tos;
734 else if (skb->protocol == htons(ETH_P_IPV6))
735 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
734 } 736 }
735 737
736 { 738 {
@@ -745,7 +747,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
745 goto tx_error; 747 goto tx_error;
746 } 748 }
747 } 749 }
748 tdev = rt->u.dst.dev; 750 tdev = rt->dst.dev;
749 751
750 if (tdev == dev) { 752 if (tdev == dev) {
751 ip_rt_put(rt); 753 ip_rt_put(rt);
@@ -755,7 +757,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
755 757
756 df = tiph->frag_off; 758 df = tiph->frag_off;
757 if (df) 759 if (df)
758 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; 760 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
759 else 761 else
760 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 762 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
761 763
@@ -803,7 +805,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
803 tunnel->err_count = 0; 805 tunnel->err_count = 0;
804 } 806 }
805 807
806 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; 808 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
807 809
808 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 810 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
809 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 811 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -830,7 +832,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
830 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 832 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
831 IPSKB_REROUTED); 833 IPSKB_REROUTED);
832 skb_dst_drop(skb); 834 skb_dst_drop(skb);
833 skb_dst_set(skb, &rt->u.dst); 835 skb_dst_set(skb, &rt->dst);
834 836
835 /* 837 /*
836 * Push down and install the IPIP header. 838 * Push down and install the IPIP header.
@@ -853,7 +855,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
853 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; 855 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
854#endif 856#endif
855 else 857 else
856 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); 858 iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
857 } 859 }
858 860
859 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; 861 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
@@ -915,7 +917,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
915 .proto = IPPROTO_GRE }; 917 .proto = IPPROTO_GRE };
916 struct rtable *rt; 918 struct rtable *rt;
917 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 919 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
918 tdev = rt->u.dst.dev; 920 tdev = rt->dst.dev;
919 ip_rt_put(rt); 921 ip_rt_put(rt);
920 } 922 }
921 923
@@ -1174,7 +1176,7 @@ static int ipgre_open(struct net_device *dev)
1174 struct rtable *rt; 1176 struct rtable *rt;
1175 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1177 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1176 return -EADDRNOTAVAIL; 1178 return -EADDRNOTAVAIL;
1177 dev = rt->u.dst.dev; 1179 dev = rt->dst.dev;
1178 ip_rt_put(rt); 1180 ip_rt_put(rt);
1179 if (__in_dev_get_rtnl(dev) == NULL) 1181 if (__in_dev_get_rtnl(dev) == NULL)
1180 return -EADDRNOTAVAIL; 1182 return -EADDRNOTAVAIL;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d930dc5e4d85..d859bcc26cb7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -146,7 +146,7 @@
146#include <linux/netlink.h> 146#include <linux/netlink.h>
147 147
148/* 148/*
149 * Process Router Attention IP option 149 * Process Router Attention IP option (RFC 2113)
150 */ 150 */
151int ip_call_ra_chain(struct sk_buff *skb) 151int ip_call_ra_chain(struct sk_buff *skb)
152{ 152{
@@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
155 struct sock *last = NULL; 155 struct sock *last = NULL;
156 struct net_device *dev = skb->dev; 156 struct net_device *dev = skb->dev;
157 157
158 read_lock(&ip_ra_lock); 158 for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
159 for (ra = ip_ra_chain; ra; ra = ra->next) {
160 struct sock *sk = ra->sk; 159 struct sock *sk = ra->sk;
161 160
162 /* If socket is bound to an interface, only report 161 /* If socket is bound to an interface, only report
@@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
167 sk->sk_bound_dev_if == dev->ifindex) && 166 sk->sk_bound_dev_if == dev->ifindex) &&
168 net_eq(sock_net(sk), dev_net(dev))) { 167 net_eq(sock_net(sk), dev_net(dev))) {
169 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 168 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
170 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { 169 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
171 read_unlock(&ip_ra_lock);
172 return 1; 170 return 1;
173 }
174 } 171 }
175 if (last) { 172 if (last) {
176 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 173 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
183 180
184 if (last) { 181 if (last) {
185 raw_rcv(last, skb); 182 raw_rcv(last, skb);
186 read_unlock(&ip_ra_lock);
187 return 1; 183 return 1;
188 } 184 }
189 read_unlock(&ip_ra_lock);
190 return 0; 185 return 0;
191} 186}
192 187
@@ -298,18 +293,16 @@ static inline int ip_rcv_options(struct sk_buff *skb)
298 } 293 }
299 294
300 if (unlikely(opt->srr)) { 295 if (unlikely(opt->srr)) {
301 struct in_device *in_dev = in_dev_get(dev); 296 struct in_device *in_dev = __in_dev_get_rcu(dev);
297
302 if (in_dev) { 298 if (in_dev) {
303 if (!IN_DEV_SOURCE_ROUTE(in_dev)) { 299 if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
304 if (IN_DEV_LOG_MARTIANS(in_dev) && 300 if (IN_DEV_LOG_MARTIANS(in_dev) &&
305 net_ratelimit()) 301 net_ratelimit())
306 printk(KERN_INFO "source route option %pI4 -> %pI4\n", 302 printk(KERN_INFO "source route option %pI4 -> %pI4\n",
307 &iph->saddr, &iph->daddr); 303 &iph->saddr, &iph->daddr);
308 in_dev_put(in_dev);
309 goto drop; 304 goto drop;
310 } 305 }
311
312 in_dev_put(in_dev);
313 } 306 }
314 307
315 if (ip_options_rcv_srr(skb)) 308 if (ip_options_rcv_srr(skb))
@@ -340,13 +333,16 @@ static int ip_rcv_finish(struct sk_buff *skb)
340 else if (err == -ENETUNREACH) 333 else if (err == -ENETUNREACH)
341 IP_INC_STATS_BH(dev_net(skb->dev), 334 IP_INC_STATS_BH(dev_net(skb->dev),
342 IPSTATS_MIB_INNOROUTES); 335 IPSTATS_MIB_INNOROUTES);
336 else if (err == -EXDEV)
337 NET_INC_STATS_BH(dev_net(skb->dev),
338 LINUX_MIB_IPRPFILTER);
343 goto drop; 339 goto drop;
344 } 340 }
345 } 341 }
346 342
347#ifdef CONFIG_NET_CLS_ROUTE 343#ifdef CONFIG_NET_CLS_ROUTE
348 if (unlikely(skb_dst(skb)->tclassid)) { 344 if (unlikely(skb_dst(skb)->tclassid)) {
349 struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); 345 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
350 u32 idx = skb_dst(skb)->tclassid; 346 u32 idx = skb_dst(skb)->tclassid;
351 st[idx&0xFF].o_packets++; 347 st[idx&0xFF].o_packets++;
352 st[idx&0xFF].o_bytes += skb->len; 348 st[idx&0xFF].o_bytes += skb->len;
@@ -360,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb)
360 356
361 rt = skb_rtable(skb); 357 rt = skb_rtable(skb);
362 if (rt->rt_type == RTN_MULTICAST) { 358 if (rt->rt_type == RTN_MULTICAST) {
363 IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, 359 IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,
364 skb->len); 360 skb->len);
365 } else if (rt->rt_type == RTN_BROADCAST) 361 } else if (rt->rt_type == RTN_BROADCAST)
366 IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST, 362 IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
367 skb->len); 363 skb->len);
368 364
369 return dst_input(skb); 365 return dst_input(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 041d41df1224..04b69896df5f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -89,6 +89,7 @@ __inline__ void ip_send_check(struct iphdr *iph)
89 iph->check = 0; 89 iph->check = 0;
90 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 90 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
91} 91}
92EXPORT_SYMBOL(ip_send_check);
92 93
93int __ip_local_out(struct sk_buff *skb) 94int __ip_local_out(struct sk_buff *skb)
94{ 95{
@@ -151,15 +152,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
151 iph->version = 4; 152 iph->version = 4;
152 iph->ihl = 5; 153 iph->ihl = 5;
153 iph->tos = inet->tos; 154 iph->tos = inet->tos;
154 if (ip_dont_fragment(sk, &rt->u.dst)) 155 if (ip_dont_fragment(sk, &rt->dst))
155 iph->frag_off = htons(IP_DF); 156 iph->frag_off = htons(IP_DF);
156 else 157 else
157 iph->frag_off = 0; 158 iph->frag_off = 0;
158 iph->ttl = ip_select_ttl(inet, &rt->u.dst); 159 iph->ttl = ip_select_ttl(inet, &rt->dst);
159 iph->daddr = rt->rt_dst; 160 iph->daddr = rt->rt_dst;
160 iph->saddr = rt->rt_src; 161 iph->saddr = rt->rt_src;
161 iph->protocol = sk->sk_protocol; 162 iph->protocol = sk->sk_protocol;
162 ip_select_ident(iph, &rt->u.dst, sk); 163 ip_select_ident(iph, &rt->dst, sk);
163 164
164 if (opt && opt->optlen) { 165 if (opt && opt->optlen) {
165 iph->ihl += opt->optlen>>2; 166 iph->ihl += opt->optlen>>2;
@@ -172,7 +173,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
172 /* Send it out. */ 173 /* Send it out. */
173 return ip_local_out(skb); 174 return ip_local_out(skb);
174} 175}
175
176EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); 176EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
177 177
178static inline int ip_finish_output2(struct sk_buff *skb) 178static inline int ip_finish_output2(struct sk_buff *skb)
@@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb)
240{ 240{
241 struct sock *sk = skb->sk; 241 struct sock *sk = skb->sk;
242 struct rtable *rt = skb_rtable(skb); 242 struct rtable *rt = skb_rtable(skb);
243 struct net_device *dev = rt->u.dst.dev; 243 struct net_device *dev = rt->dst.dev;
244 244
245 /* 245 /*
246 * If the indicated interface is up and running, send the packet. 246 * If the indicated interface is up and running, send the packet.
@@ -359,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb)
359 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) 359 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
360 goto no_route; 360 goto no_route;
361 } 361 }
362 sk_setup_caps(sk, &rt->u.dst); 362 sk_setup_caps(sk, &rt->dst);
363 } 363 }
364 skb_dst_set_noref(skb, &rt->u.dst); 364 skb_dst_set_noref(skb, &rt->dst);
365 365
366packet_routed: 366packet_routed:
367 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 367 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -372,11 +372,11 @@ packet_routed:
372 skb_reset_network_header(skb); 372 skb_reset_network_header(skb);
373 iph = ip_hdr(skb); 373 iph = ip_hdr(skb);
374 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 374 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
375 if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df) 375 if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
376 iph->frag_off = htons(IP_DF); 376 iph->frag_off = htons(IP_DF);
377 else 377 else
378 iph->frag_off = 0; 378 iph->frag_off = 0;
379 iph->ttl = ip_select_ttl(inet, &rt->u.dst); 379 iph->ttl = ip_select_ttl(inet, &rt->dst);
380 iph->protocol = sk->sk_protocol; 380 iph->protocol = sk->sk_protocol;
381 iph->saddr = rt->rt_src; 381 iph->saddr = rt->rt_src;
382 iph->daddr = rt->rt_dst; 382 iph->daddr = rt->rt_dst;
@@ -387,7 +387,7 @@ packet_routed:
387 ip_options_build(skb, opt, inet->inet_daddr, rt, 0); 387 ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
388 } 388 }
389 389
390 ip_select_ident_more(iph, &rt->u.dst, sk, 390 ip_select_ident_more(iph, &rt->dst, sk,
391 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 391 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
392 392
393 skb->priority = sk->sk_priority; 393 skb->priority = sk->sk_priority;
@@ -403,6 +403,7 @@ no_route:
403 kfree_skb(skb); 403 kfree_skb(skb);
404 return -EHOSTUNREACH; 404 return -EHOSTUNREACH;
405} 405}
406EXPORT_SYMBOL(ip_queue_xmit);
406 407
407 408
408static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) 409static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
@@ -411,7 +412,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
411 to->priority = from->priority; 412 to->priority = from->priority;
412 to->protocol = from->protocol; 413 to->protocol = from->protocol;
413 skb_dst_drop(to); 414 skb_dst_drop(to);
414 skb_dst_set(to, dst_clone(skb_dst(from))); 415 skb_dst_copy(to, from);
415 to->dev = from->dev; 416 to->dev = from->dev;
416 to->mark = from->mark; 417 to->mark = from->mark;
417 418
@@ -442,17 +443,16 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
442int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 443int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
443{ 444{
444 struct iphdr *iph; 445 struct iphdr *iph;
445 int raw = 0;
446 int ptr; 446 int ptr;
447 struct net_device *dev; 447 struct net_device *dev;
448 struct sk_buff *skb2; 448 struct sk_buff *skb2;
449 unsigned int mtu, hlen, left, len, ll_rs, pad; 449 unsigned int mtu, hlen, left, len, ll_rs;
450 int offset; 450 int offset;
451 __be16 not_last_frag; 451 __be16 not_last_frag;
452 struct rtable *rt = skb_rtable(skb); 452 struct rtable *rt = skb_rtable(skb);
453 int err = 0; 453 int err = 0;
454 454
455 dev = rt->u.dst.dev; 455 dev = rt->dst.dev;
456 456
457 /* 457 /*
458 * Point into the IP datagram header. 458 * Point into the IP datagram header.
@@ -473,7 +473,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
473 */ 473 */
474 474
475 hlen = iph->ihl * 4; 475 hlen = iph->ihl * 4;
476 mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ 476 mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */
477#ifdef CONFIG_BRIDGE_NETFILTER 477#ifdef CONFIG_BRIDGE_NETFILTER
478 if (skb->nf_bridge) 478 if (skb->nf_bridge)
479 mtu -= nf_bridge_mtu_reduction(skb); 479 mtu -= nf_bridge_mtu_reduction(skb);
@@ -580,14 +580,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
580 580
581slow_path: 581slow_path:
582 left = skb->len - hlen; /* Space per frame */ 582 left = skb->len - hlen; /* Space per frame */
583 ptr = raw + hlen; /* Where to start from */ 583 ptr = hlen; /* Where to start from */
584 584
585 /* for bridged IP traffic encapsulated inside f.e. a vlan header, 585 /* for bridged IP traffic encapsulated inside f.e. a vlan header,
586 * we need to make room for the encapsulating header 586 * we need to make room for the encapsulating header
587 */ 587 */
588 pad = nf_bridge_pad(skb); 588 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
589 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
590 mtu -= pad;
591 589
592 /* 590 /*
593 * Fragment the datagram. 591 * Fragment the datagram.
@@ -697,7 +695,6 @@ fail:
697 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 695 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
698 return err; 696 return err;
699} 697}
700
701EXPORT_SYMBOL(ip_fragment); 698EXPORT_SYMBOL(ip_fragment);
702 699
703int 700int
@@ -716,6 +713,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
716 } 713 }
717 return 0; 714 return 0;
718} 715}
716EXPORT_SYMBOL(ip_generic_getfrag);
719 717
720static inline __wsum 718static inline __wsum
721csum_page(struct page *page, int offset, int copy) 719csum_page(struct page *page, int offset, int copy)
@@ -833,13 +831,13 @@ int ip_append_data(struct sock *sk,
833 */ 831 */
834 *rtp = NULL; 832 *rtp = NULL;
835 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? 833 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
836 rt->u.dst.dev->mtu : 834 rt->dst.dev->mtu :
837 dst_mtu(rt->u.dst.path); 835 dst_mtu(rt->dst.path);
838 inet->cork.dst = &rt->u.dst; 836 inet->cork.dst = &rt->dst;
839 inet->cork.length = 0; 837 inet->cork.length = 0;
840 sk->sk_sndmsg_page = NULL; 838 sk->sk_sndmsg_page = NULL;
841 sk->sk_sndmsg_off = 0; 839 sk->sk_sndmsg_off = 0;
842 if ((exthdrlen = rt->u.dst.header_len) != 0) { 840 if ((exthdrlen = rt->dst.header_len) != 0) {
843 length += exthdrlen; 841 length += exthdrlen;
844 transhdrlen += exthdrlen; 842 transhdrlen += exthdrlen;
845 } 843 }
@@ -852,7 +850,7 @@ int ip_append_data(struct sock *sk,
852 exthdrlen = 0; 850 exthdrlen = 0;
853 mtu = inet->cork.fragsize; 851 mtu = inet->cork.fragsize;
854 } 852 }
855 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 853 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
856 854
857 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 855 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
858 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 856 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -869,7 +867,7 @@ int ip_append_data(struct sock *sk,
869 */ 867 */
870 if (transhdrlen && 868 if (transhdrlen &&
871 length + fragheaderlen <= mtu && 869 length + fragheaderlen <= mtu &&
872 rt->u.dst.dev->features & NETIF_F_V4_CSUM && 870 rt->dst.dev->features & NETIF_F_V4_CSUM &&
873 !exthdrlen) 871 !exthdrlen)
874 csummode = CHECKSUM_PARTIAL; 872 csummode = CHECKSUM_PARTIAL;
875 873
@@ -878,7 +876,7 @@ int ip_append_data(struct sock *sk,
878 inet->cork.length += length; 876 inet->cork.length += length;
879 if (((length > mtu) || (skb && skb_is_gso(skb))) && 877 if (((length > mtu) || (skb && skb_is_gso(skb))) &&
880 (sk->sk_protocol == IPPROTO_UDP) && 878 (sk->sk_protocol == IPPROTO_UDP) &&
881 (rt->u.dst.dev->features & NETIF_F_UFO)) { 879 (rt->dst.dev->features & NETIF_F_UFO)) {
882 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, 880 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
883 fragheaderlen, transhdrlen, mtu, 881 fragheaderlen, transhdrlen, mtu,
884 flags); 882 flags);
@@ -926,7 +924,7 @@ alloc_new_skb:
926 fraglen = datalen + fragheaderlen; 924 fraglen = datalen + fragheaderlen;
927 925
928 if ((flags & MSG_MORE) && 926 if ((flags & MSG_MORE) &&
929 !(rt->u.dst.dev->features&NETIF_F_SG)) 927 !(rt->dst.dev->features&NETIF_F_SG))
930 alloclen = mtu; 928 alloclen = mtu;
931 else 929 else
932 alloclen = datalen + fragheaderlen; 930 alloclen = datalen + fragheaderlen;
@@ -937,7 +935,7 @@ alloc_new_skb:
937 * the last. 935 * the last.
938 */ 936 */
939 if (datalen == length + fraggap) 937 if (datalen == length + fraggap)
940 alloclen += rt->u.dst.trailer_len; 938 alloclen += rt->dst.trailer_len;
941 939
942 if (transhdrlen) { 940 if (transhdrlen) {
943 skb = sock_alloc_send_skb(sk, 941 skb = sock_alloc_send_skb(sk,
@@ -1010,7 +1008,7 @@ alloc_new_skb:
1010 if (copy > length) 1008 if (copy > length)
1011 copy = length; 1009 copy = length;
1012 1010
1013 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1011 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1014 unsigned int off; 1012 unsigned int off;
1015 1013
1016 off = skb->len; 1014 off = skb->len;
@@ -1105,10 +1103,10 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1105 if (inet->cork.flags & IPCORK_OPT) 1103 if (inet->cork.flags & IPCORK_OPT)
1106 opt = inet->cork.opt; 1104 opt = inet->cork.opt;
1107 1105
1108 if (!(rt->u.dst.dev->features&NETIF_F_SG)) 1106 if (!(rt->dst.dev->features&NETIF_F_SG))
1109 return -EOPNOTSUPP; 1107 return -EOPNOTSUPP;
1110 1108
1111 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1109 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1112 mtu = inet->cork.fragsize; 1110 mtu = inet->cork.fragsize;
1113 1111
1114 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 1112 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1125,7 +1123,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1125 inet->cork.length += size; 1123 inet->cork.length += size;
1126 if ((size + skb->len > mtu) && 1124 if ((size + skb->len > mtu) &&
1127 (sk->sk_protocol == IPPROTO_UDP) && 1125 (sk->sk_protocol == IPPROTO_UDP) &&
1128 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1126 (rt->dst.dev->features & NETIF_F_UFO)) {
1129 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 1127 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1130 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1128 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1131 } 1129 }
@@ -1277,8 +1275,8 @@ int ip_push_pending_frames(struct sock *sk)
1277 * If local_df is set too, we still allow to fragment this frame 1275 * If local_df is set too, we still allow to fragment this frame
1278 * locally. */ 1276 * locally. */
1279 if (inet->pmtudisc >= IP_PMTUDISC_DO || 1277 if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1280 (skb->len <= dst_mtu(&rt->u.dst) && 1278 (skb->len <= dst_mtu(&rt->dst) &&
1281 ip_dont_fragment(sk, &rt->u.dst))) 1279 ip_dont_fragment(sk, &rt->dst)))
1282 df = htons(IP_DF); 1280 df = htons(IP_DF);
1283 1281
1284 if (inet->cork.flags & IPCORK_OPT) 1282 if (inet->cork.flags & IPCORK_OPT)
@@ -1287,7 +1285,7 @@ int ip_push_pending_frames(struct sock *sk)
1287 if (rt->rt_type == RTN_MULTICAST) 1285 if (rt->rt_type == RTN_MULTICAST)
1288 ttl = inet->mc_ttl; 1286 ttl = inet->mc_ttl;
1289 else 1287 else
1290 ttl = ip_select_ttl(inet, &rt->u.dst); 1288 ttl = ip_select_ttl(inet, &rt->dst);
1291 1289
1292 iph = (struct iphdr *)skb->data; 1290 iph = (struct iphdr *)skb->data;
1293 iph->version = 4; 1291 iph->version = 4;
@@ -1298,7 +1296,7 @@ int ip_push_pending_frames(struct sock *sk)
1298 } 1296 }
1299 iph->tos = inet->tos; 1297 iph->tos = inet->tos;
1300 iph->frag_off = df; 1298 iph->frag_off = df;
1301 ip_select_ident(iph, &rt->u.dst, sk); 1299 ip_select_ident(iph, &rt->dst, sk);
1302 iph->ttl = ttl; 1300 iph->ttl = ttl;
1303 iph->protocol = sk->sk_protocol; 1301 iph->protocol = sk->sk_protocol;
1304 iph->saddr = rt->rt_src; 1302 iph->saddr = rt->rt_src;
@@ -1311,7 +1309,7 @@ int ip_push_pending_frames(struct sock *sk)
1311 * on dst refcount 1309 * on dst refcount
1312 */ 1310 */
1313 inet->cork.dst = NULL; 1311 inet->cork.dst = NULL;
1314 skb_dst_set(skb, &rt->u.dst); 1312 skb_dst_set(skb, &rt->dst);
1315 1313
1316 if (iph->protocol == IPPROTO_ICMP) 1314 if (iph->protocol == IPPROTO_ICMP)
1317 icmp_out_count(net, ((struct icmphdr *) 1315 icmp_out_count(net, ((struct icmphdr *)
@@ -1448,7 +1446,3 @@ void __init ip_init(void)
1448 igmp_mc_proc_init(); 1446 igmp_mc_proc_init();
1449#endif 1447#endif
1450} 1448}
1451
1452EXPORT_SYMBOL(ip_generic_getfrag);
1453EXPORT_SYMBOL(ip_queue_xmit);
1454EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ce231780a2b1..6c40a8c46e79 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -239,7 +239,16 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
239 sent to multicast group to reach destination designated router. 239 sent to multicast group to reach destination designated router.
240 */ 240 */
241struct ip_ra_chain *ip_ra_chain; 241struct ip_ra_chain *ip_ra_chain;
242DEFINE_RWLOCK(ip_ra_lock); 242static DEFINE_SPINLOCK(ip_ra_lock);
243
244
245static void ip_ra_destroy_rcu(struct rcu_head *head)
246{
247 struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
248
249 sock_put(ra->saved_sk);
250 kfree(ra);
251}
243 252
244int ip_ra_control(struct sock *sk, unsigned char on, 253int ip_ra_control(struct sock *sk, unsigned char on,
245 void (*destructor)(struct sock *)) 254 void (*destructor)(struct sock *))
@@ -251,35 +260,42 @@ int ip_ra_control(struct sock *sk, unsigned char on,
251 260
252 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 261 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
253 262
254 write_lock_bh(&ip_ra_lock); 263 spin_lock_bh(&ip_ra_lock);
255 for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { 264 for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
256 if (ra->sk == sk) { 265 if (ra->sk == sk) {
257 if (on) { 266 if (on) {
258 write_unlock_bh(&ip_ra_lock); 267 spin_unlock_bh(&ip_ra_lock);
259 kfree(new_ra); 268 kfree(new_ra);
260 return -EADDRINUSE; 269 return -EADDRINUSE;
261 } 270 }
262 *rap = ra->next; 271 /* dont let ip_call_ra_chain() use sk again */
263 write_unlock_bh(&ip_ra_lock); 272 ra->sk = NULL;
273 rcu_assign_pointer(*rap, ra->next);
274 spin_unlock_bh(&ip_ra_lock);
264 275
265 if (ra->destructor) 276 if (ra->destructor)
266 ra->destructor(sk); 277 ra->destructor(sk);
267 sock_put(sk); 278 /*
268 kfree(ra); 279 * Delay sock_put(sk) and kfree(ra) after one rcu grace
280 * period. This guarantee ip_call_ra_chain() dont need
281 * to mess with socket refcounts.
282 */
283 ra->saved_sk = sk;
284 call_rcu(&ra->rcu, ip_ra_destroy_rcu);
269 return 0; 285 return 0;
270 } 286 }
271 } 287 }
272 if (new_ra == NULL) { 288 if (new_ra == NULL) {
273 write_unlock_bh(&ip_ra_lock); 289 spin_unlock_bh(&ip_ra_lock);
274 return -ENOBUFS; 290 return -ENOBUFS;
275 } 291 }
276 new_ra->sk = sk; 292 new_ra->sk = sk;
277 new_ra->destructor = destructor; 293 new_ra->destructor = destructor;
278 294
279 new_ra->next = ra; 295 new_ra->next = ra;
280 *rap = new_ra; 296 rcu_assign_pointer(*rap, new_ra);
281 sock_hold(sk); 297 sock_hold(sk);
282 write_unlock_bh(&ip_ra_lock); 298 spin_unlock_bh(&ip_ra_lock);
283 299
284 return 0; 300 return 0;
285} 301}
@@ -449,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
449 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | 465 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
450 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | 466 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
451 (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | 467 (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
452 (1<<IP_MINTTL))) || 468 (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
453 optname == IP_MULTICAST_TTL || 469 optname == IP_MULTICAST_TTL ||
454 optname == IP_MULTICAST_ALL || 470 optname == IP_MULTICAST_ALL ||
455 optname == IP_MULTICAST_LOOP || 471 optname == IP_MULTICAST_LOOP ||
@@ -572,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
572 } 588 }
573 inet->hdrincl = val ? 1 : 0; 589 inet->hdrincl = val ? 1 : 0;
574 break; 590 break;
591 case IP_NODEFRAG:
592 if (sk->sk_type != SOCK_RAW) {
593 err = -ENOPROTOOPT;
594 break;
595 }
596 inet->nodefrag = val ? 1 : 0;
597 break;
575 case IP_MTU_DISCOVER: 598 case IP_MTU_DISCOVER:
576 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) 599 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
577 goto e_inval; 600 goto e_inval;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b9d84e800cf4..3a6e1ec5e9ae 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -665,6 +665,13 @@ ic_dhcp_init_options(u8 *options)
665 memcpy(e, ic_req_params, sizeof(ic_req_params)); 665 memcpy(e, ic_req_params, sizeof(ic_req_params));
666 e += sizeof(ic_req_params); 666 e += sizeof(ic_req_params);
667 667
668 if (ic_host_name_set) {
669 *e++ = 12; /* host-name */
670 len = strlen(utsname()->nodename);
671 *e++ = len;
672 memcpy(e, utsname()->nodename, len);
673 e += len;
674 }
668 if (*vendor_class_identifier) { 675 if (*vendor_class_identifier) {
669 printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", 676 printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n",
670 vendor_class_identifier); 677 vendor_class_identifier);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 7fd636711037..ec036731a70b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -435,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
435 goto tx_error_icmp; 435 goto tx_error_icmp;
436 } 436 }
437 } 437 }
438 tdev = rt->u.dst.dev; 438 tdev = rt->dst.dev;
439 439
440 if (tdev == dev) { 440 if (tdev == dev) {
441 ip_rt_put(rt); 441 ip_rt_put(rt);
@@ -446,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
446 df |= old_iph->frag_off & htons(IP_DF); 446 df |= old_iph->frag_off & htons(IP_DF);
447 447
448 if (df) { 448 if (df) {
449 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 449 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
450 450
451 if (mtu < 68) { 451 if (mtu < 68) {
452 stats->collisions++; 452 stats->collisions++;
@@ -503,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
503 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 503 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
504 IPSKB_REROUTED); 504 IPSKB_REROUTED);
505 skb_dst_drop(skb); 505 skb_dst_drop(skb);
506 skb_dst_set(skb, &rt->u.dst); 506 skb_dst_set(skb, &rt->dst);
507 507
508 /* 508 /*
509 * Push down and install the IPIP header. 509 * Push down and install the IPIP header.
@@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
552 .proto = IPPROTO_IPIP }; 552 .proto = IPPROTO_IPIP };
553 struct rtable *rt; 553 struct rtable *rt;
554 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 554 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
555 tdev = rt->u.dst.dev; 555 tdev = rt->dst.dev;
556 ip_rt_put(rt); 556 ip_rt_put(rt);
557 } 557 }
558 dev->flags |= IFF_POINTOPOINT; 558 dev->flags |= IFF_POINTOPOINT;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7f6273506eea..179fcab866fc 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1555,9 +1555,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1555 goto out_free; 1555 goto out_free;
1556 } 1556 }
1557 1557
1558 dev = rt->u.dst.dev; 1558 dev = rt->dst.dev;
1559 1559
1560 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { 1560 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1561 /* Do not fragment multicasts. Alas, IPv4 does not 1561 /* Do not fragment multicasts. Alas, IPv4 does not
1562 allow to send ICMP, so that packets will disappear 1562 allow to send ICMP, so that packets will disappear
1563 to blackhole. 1563 to blackhole.
@@ -1568,7 +1568,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1568 goto out_free; 1568 goto out_free;
1569 } 1569 }
1570 1570
1571 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; 1571 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1572 1572
1573 if (skb_cow(skb, encap)) { 1573 if (skb_cow(skb, encap)) {
1574 ip_rt_put(rt); 1574 ip_rt_put(rt);
@@ -1579,7 +1579,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1579 vif->bytes_out += skb->len; 1579 vif->bytes_out += skb->len;
1580 1580
1581 skb_dst_drop(skb); 1581 skb_dst_drop(skb);
1582 skb_dst_set(skb, &rt->u.dst); 1582 skb_dst_set(skb, &rt->dst);
1583 ip_decrease_ttl(ip_hdr(skb)); 1583 ip_decrease_ttl(ip_hdr(skb));
1584 1584
1585 /* FIXME: forward and output firewalls used to be called here. 1585 /* FIXME: forward and output firewalls used to be called here.
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 07de855e2175..d88a46c54fd1 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
43 43
44 /* Drop old route. */ 44 /* Drop old route. */
45 skb_dst_drop(skb); 45 skb_dst_drop(skb);
46 skb_dst_set(skb, &rt->u.dst); 46 skb_dst_set(skb, &rt->dst);
47 } else { 47 } else {
48 /* non-local src, find valid iif to satisfy 48 /* non-local src, find valid iif to satisfy
49 * rp-filter when calling ip_route_input. */ 49 * rp-filter when calling ip_route_input. */
@@ -53,11 +53,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
53 53
54 orefdst = skb->_skb_refdst; 54 orefdst = skb->_skb_refdst;
55 if (ip_route_input(skb, iph->daddr, iph->saddr, 55 if (ip_route_input(skb, iph->daddr, iph->saddr,
56 RT_TOS(iph->tos), rt->u.dst.dev) != 0) { 56 RT_TOS(iph->tos), rt->dst.dev) != 0) {
57 dst_release(&rt->u.dst); 57 dst_release(&rt->dst);
58 return -1; 58 return -1;
59 } 59 }
60 dst_release(&rt->u.dst); 60 dst_release(&rt->dst);
61 refdst_drop(orefdst); 61 refdst_drop(orefdst);
62 } 62 }
63 63
@@ -212,9 +212,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
212 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, 212 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
213 skb->len - dataoff, 0); 213 skb->len - dataoff, 0);
214 skb->ip_summed = CHECKSUM_NONE; 214 skb->ip_summed = CHECKSUM_NONE;
215 csum = __skb_checksum_complete_head(skb, dataoff + len); 215 return __skb_checksum_complete_head(skb, dataoff + len);
216 if (!csum)
217 skb->ip_summed = CHECKSUM_UNNECESSARY;
218 } 216 }
219 return csum; 217 return csum;
220} 218}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 1ac01b128621..6bccba31d132 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -283,16 +283,13 @@ unsigned int arpt_do_table(struct sk_buff *skb,
283 arp = arp_hdr(skb); 283 arp = arp_hdr(skb);
284 do { 284 do {
285 const struct arpt_entry_target *t; 285 const struct arpt_entry_target *t;
286 int hdr_len;
287 286
288 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { 287 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
289 e = arpt_next_entry(e); 288 e = arpt_next_entry(e);
290 continue; 289 continue;
291 } 290 }
292 291
293 hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + 292 ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
294 (2 * skb->dev->addr_len);
295 ADD_COUNTER(e->counters, hdr_len, 1);
296 293
297 t = arpt_get_target_c(e); 294 t = arpt_get_target_c(e);
298 295
@@ -713,7 +710,7 @@ static void get_counters(const struct xt_table_info *t,
713 struct arpt_entry *iter; 710 struct arpt_entry *iter;
714 unsigned int cpu; 711 unsigned int cpu;
715 unsigned int i; 712 unsigned int i;
716 unsigned int curcpu; 713 unsigned int curcpu = get_cpu();
717 714
718 /* Instead of clearing (by a previous call to memset()) 715 /* Instead of clearing (by a previous call to memset())
719 * the counters and using adds, we set the counters 716 * the counters and using adds, we set the counters
@@ -723,14 +720,16 @@ static void get_counters(const struct xt_table_info *t,
723 * if new softirq were to run and call ipt_do_table 720 * if new softirq were to run and call ipt_do_table
724 */ 721 */
725 local_bh_disable(); 722 local_bh_disable();
726 curcpu = smp_processor_id();
727
728 i = 0; 723 i = 0;
729 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 724 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
730 SET_COUNTER(counters[i], iter->counters.bcnt, 725 SET_COUNTER(counters[i], iter->counters.bcnt,
731 iter->counters.pcnt); 726 iter->counters.pcnt);
732 ++i; 727 ++i;
733 } 728 }
729 local_bh_enable();
730 /* Processing counters from other cpus, we can let bottom half enabled,
731 * (preemption is disabled)
732 */
734 733
735 for_each_possible_cpu(cpu) { 734 for_each_possible_cpu(cpu) {
736 if (cpu == curcpu) 735 if (cpu == curcpu)
@@ -744,7 +743,7 @@ static void get_counters(const struct xt_table_info *t,
744 } 743 }
745 xt_info_wrunlock(cpu); 744 xt_info_wrunlock(cpu);
746 } 745 }
747 local_bh_enable(); 746 put_cpu();
748} 747}
749 748
750static struct xt_counters *alloc_counters(const struct xt_table *table) 749static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -758,7 +757,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
758 * about). 757 * about).
759 */ 758 */
760 countersize = sizeof(struct xt_counters) * private->number; 759 countersize = sizeof(struct xt_counters) * private->number;
761 counters = vmalloc_node(countersize, numa_node_id()); 760 counters = vmalloc(countersize);
762 761
763 if (counters == NULL) 762 if (counters == NULL)
764 return ERR_PTR(-ENOMEM); 763 return ERR_PTR(-ENOMEM);
@@ -1005,8 +1004,7 @@ static int __do_replace(struct net *net, const char *name,
1005 struct arpt_entry *iter; 1004 struct arpt_entry *iter;
1006 1005
1007 ret = 0; 1006 ret = 0;
1008 counters = vmalloc_node(num_counters * sizeof(struct xt_counters), 1007 counters = vmalloc(num_counters * sizeof(struct xt_counters));
1009 numa_node_id());
1010 if (!counters) { 1008 if (!counters) {
1011 ret = -ENOMEM; 1009 ret = -ENOMEM;
1012 goto out; 1010 goto out;
@@ -1159,7 +1157,7 @@ static int do_add_counters(struct net *net, const void __user *user,
1159 if (len != size + num_counters * sizeof(struct xt_counters)) 1157 if (len != size + num_counters * sizeof(struct xt_counters))
1160 return -EINVAL; 1158 return -EINVAL;
1161 1159
1162 paddc = vmalloc_node(len - size, numa_node_id()); 1160 paddc = vmalloc(len - size);
1163 if (!paddc) 1161 if (!paddc)
1164 return -ENOMEM; 1162 return -ENOMEM;
1165 1163
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index a4e5fc5df4bf..d2c1311cb28d 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -42,7 +42,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
42 42
43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
45static DEFINE_RWLOCK(queue_lock); 45static DEFINE_SPINLOCK(queue_lock);
46static int peer_pid __read_mostly; 46static int peer_pid __read_mostly;
47static unsigned int copy_range __read_mostly; 47static unsigned int copy_range __read_mostly;
48static unsigned int queue_total; 48static unsigned int queue_total;
@@ -72,10 +72,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
72 break; 72 break;
73 73
74 case IPQ_COPY_PACKET: 74 case IPQ_COPY_PACKET:
75 copy_mode = mode; 75 if (range > 0xFFFF)
76 range = 0xFFFF;
76 copy_range = range; 77 copy_range = range;
77 if (copy_range > 0xFFFF) 78 copy_mode = mode;
78 copy_range = 0xFFFF;
79 break; 79 break;
80 80
81 default: 81 default:
@@ -101,7 +101,7 @@ ipq_find_dequeue_entry(unsigned long id)
101{ 101{
102 struct nf_queue_entry *entry = NULL, *i; 102 struct nf_queue_entry *entry = NULL, *i;
103 103
104 write_lock_bh(&queue_lock); 104 spin_lock_bh(&queue_lock);
105 105
106 list_for_each_entry(i, &queue_list, list) { 106 list_for_each_entry(i, &queue_list, list) {
107 if ((unsigned long)i == id) { 107 if ((unsigned long)i == id) {
@@ -115,7 +115,7 @@ ipq_find_dequeue_entry(unsigned long id)
115 queue_total--; 115 queue_total--;
116 } 116 }
117 117
118 write_unlock_bh(&queue_lock); 118 spin_unlock_bh(&queue_lock);
119 return entry; 119 return entry;
120} 120}
121 121
@@ -136,9 +136,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
136static void 136static void
137ipq_flush(ipq_cmpfn cmpfn, unsigned long data) 137ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
138{ 138{
139 write_lock_bh(&queue_lock); 139 spin_lock_bh(&queue_lock);
140 __ipq_flush(cmpfn, data); 140 __ipq_flush(cmpfn, data);
141 write_unlock_bh(&queue_lock); 141 spin_unlock_bh(&queue_lock);
142} 142}
143 143
144static struct sk_buff * 144static struct sk_buff *
@@ -152,9 +152,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
152 struct nlmsghdr *nlh; 152 struct nlmsghdr *nlh;
153 struct timeval tv; 153 struct timeval tv;
154 154
155 read_lock_bh(&queue_lock); 155 switch (ACCESS_ONCE(copy_mode)) {
156
157 switch (copy_mode) {
158 case IPQ_COPY_META: 156 case IPQ_COPY_META:
159 case IPQ_COPY_NONE: 157 case IPQ_COPY_NONE:
160 size = NLMSG_SPACE(sizeof(*pmsg)); 158 size = NLMSG_SPACE(sizeof(*pmsg));
@@ -162,26 +160,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
162 160
163 case IPQ_COPY_PACKET: 161 case IPQ_COPY_PACKET:
164 if (entry->skb->ip_summed == CHECKSUM_PARTIAL && 162 if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
165 (*errp = skb_checksum_help(entry->skb))) { 163 (*errp = skb_checksum_help(entry->skb)))
166 read_unlock_bh(&queue_lock);
167 return NULL; 164 return NULL;
168 } 165
169 if (copy_range == 0 || copy_range > entry->skb->len) 166 data_len = ACCESS_ONCE(copy_range);
167 if (data_len == 0 || data_len > entry->skb->len)
170 data_len = entry->skb->len; 168 data_len = entry->skb->len;
171 else
172 data_len = copy_range;
173 169
174 size = NLMSG_SPACE(sizeof(*pmsg) + data_len); 170 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
175 break; 171 break;
176 172
177 default: 173 default:
178 *errp = -EINVAL; 174 *errp = -EINVAL;
179 read_unlock_bh(&queue_lock);
180 return NULL; 175 return NULL;
181 } 176 }
182 177
183 read_unlock_bh(&queue_lock);
184
185 skb = alloc_skb(size, GFP_ATOMIC); 178 skb = alloc_skb(size, GFP_ATOMIC);
186 if (!skb) 179 if (!skb)
187 goto nlmsg_failure; 180 goto nlmsg_failure;
@@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
242 if (nskb == NULL) 235 if (nskb == NULL)
243 return status; 236 return status;
244 237
245 write_lock_bh(&queue_lock); 238 spin_lock_bh(&queue_lock);
246 239
247 if (!peer_pid) 240 if (!peer_pid)
248 goto err_out_free_nskb; 241 goto err_out_free_nskb;
@@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
266 259
267 __ipq_enqueue_entry(entry); 260 __ipq_enqueue_entry(entry);
268 261
269 write_unlock_bh(&queue_lock); 262 spin_unlock_bh(&queue_lock);
270 return status; 263 return status;
271 264
272err_out_free_nskb: 265err_out_free_nskb:
273 kfree_skb(nskb); 266 kfree_skb(nskb);
274 267
275err_out_unlock: 268err_out_unlock:
276 write_unlock_bh(&queue_lock); 269 spin_unlock_bh(&queue_lock);
277 return status; 270 return status;
278} 271}
279 272
@@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
342{ 335{
343 int status; 336 int status;
344 337
345 write_lock_bh(&queue_lock); 338 spin_lock_bh(&queue_lock);
346 status = __ipq_set_mode(mode, range); 339 status = __ipq_set_mode(mode, range);
347 write_unlock_bh(&queue_lock); 340 spin_unlock_bh(&queue_lock);
348 return status; 341 return status;
349} 342}
350 343
@@ -440,11 +433,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
440 if (security_netlink_recv(skb, CAP_NET_ADMIN)) 433 if (security_netlink_recv(skb, CAP_NET_ADMIN))
441 RCV_SKB_FAIL(-EPERM); 434 RCV_SKB_FAIL(-EPERM);
442 435
443 write_lock_bh(&queue_lock); 436 spin_lock_bh(&queue_lock);
444 437
445 if (peer_pid) { 438 if (peer_pid) {
446 if (peer_pid != pid) { 439 if (peer_pid != pid) {
447 write_unlock_bh(&queue_lock); 440 spin_unlock_bh(&queue_lock);
448 RCV_SKB_FAIL(-EBUSY); 441 RCV_SKB_FAIL(-EBUSY);
449 } 442 }
450 } else { 443 } else {
@@ -452,7 +445,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
452 peer_pid = pid; 445 peer_pid = pid;
453 } 446 }
454 447
455 write_unlock_bh(&queue_lock); 448 spin_unlock_bh(&queue_lock);
456 449
457 status = ipq_receive_peer(NLMSG_DATA(nlh), type, 450 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
458 nlmsglen - NLMSG_LENGTH(0)); 451 nlmsglen - NLMSG_LENGTH(0));
@@ -497,10 +490,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
497 struct netlink_notify *n = ptr; 490 struct netlink_notify *n = ptr;
498 491
499 if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { 492 if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
500 write_lock_bh(&queue_lock); 493 spin_lock_bh(&queue_lock);
501 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) 494 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
502 __ipq_reset(); 495 __ipq_reset();
503 write_unlock_bh(&queue_lock); 496 spin_unlock_bh(&queue_lock);
504 } 497 }
505 return NOTIFY_DONE; 498 return NOTIFY_DONE;
506} 499}
@@ -527,7 +520,7 @@ static ctl_table ipq_table[] = {
527#ifdef CONFIG_PROC_FS 520#ifdef CONFIG_PROC_FS
528static int ip_queue_show(struct seq_file *m, void *v) 521static int ip_queue_show(struct seq_file *m, void *v)
529{ 522{
530 read_lock_bh(&queue_lock); 523 spin_lock_bh(&queue_lock);
531 524
532 seq_printf(m, 525 seq_printf(m,
533 "Peer PID : %d\n" 526 "Peer PID : %d\n"
@@ -545,7 +538,7 @@ static int ip_queue_show(struct seq_file *m, void *v)
545 queue_dropped, 538 queue_dropped,
546 queue_user_dropped); 539 queue_user_dropped);
547 540
548 read_unlock_bh(&queue_lock); 541 spin_unlock_bh(&queue_lock);
549 return 0; 542 return 0;
550} 543}
551 544
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4b6c5ca610fc..c439721b165a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -364,7 +364,7 @@ ipt_do_table(struct sk_buff *skb,
364 goto no_match; 364 goto no_match;
365 } 365 }
366 366
367 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); 367 ADD_COUNTER(e->counters, skb->len, 1);
368 368
369 t = ipt_get_target(e); 369 t = ipt_get_target(e);
370 IP_NF_ASSERT(t->u.kernel.target); 370 IP_NF_ASSERT(t->u.kernel.target);
@@ -884,7 +884,7 @@ get_counters(const struct xt_table_info *t,
884 struct ipt_entry *iter; 884 struct ipt_entry *iter;
885 unsigned int cpu; 885 unsigned int cpu;
886 unsigned int i; 886 unsigned int i;
887 unsigned int curcpu; 887 unsigned int curcpu = get_cpu();
888 888
889 /* Instead of clearing (by a previous call to memset()) 889 /* Instead of clearing (by a previous call to memset())
890 * the counters and using adds, we set the counters 890 * the counters and using adds, we set the counters
@@ -894,14 +894,16 @@ get_counters(const struct xt_table_info *t,
894 * if new softirq were to run and call ipt_do_table 894 * if new softirq were to run and call ipt_do_table
895 */ 895 */
896 local_bh_disable(); 896 local_bh_disable();
897 curcpu = smp_processor_id();
898
899 i = 0; 897 i = 0;
900 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 898 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
901 SET_COUNTER(counters[i], iter->counters.bcnt, 899 SET_COUNTER(counters[i], iter->counters.bcnt,
902 iter->counters.pcnt); 900 iter->counters.pcnt);
903 ++i; 901 ++i;
904 } 902 }
903 local_bh_enable();
904 /* Processing counters from other cpus, we can let bottom half enabled,
905 * (preemption is disabled)
906 */
905 907
906 for_each_possible_cpu(cpu) { 908 for_each_possible_cpu(cpu) {
907 if (cpu == curcpu) 909 if (cpu == curcpu)
@@ -915,7 +917,7 @@ get_counters(const struct xt_table_info *t,
915 } 917 }
916 xt_info_wrunlock(cpu); 918 xt_info_wrunlock(cpu);
917 } 919 }
918 local_bh_enable(); 920 put_cpu();
919} 921}
920 922
921static struct xt_counters *alloc_counters(const struct xt_table *table) 923static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -928,7 +930,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
928 (other than comefrom, which userspace doesn't care 930 (other than comefrom, which userspace doesn't care
929 about). */ 931 about). */
930 countersize = sizeof(struct xt_counters) * private->number; 932 countersize = sizeof(struct xt_counters) * private->number;
931 counters = vmalloc_node(countersize, numa_node_id()); 933 counters = vmalloc(countersize);
932 934
933 if (counters == NULL) 935 if (counters == NULL)
934 return ERR_PTR(-ENOMEM); 936 return ERR_PTR(-ENOMEM);
@@ -1352,7 +1354,7 @@ do_add_counters(struct net *net, const void __user *user,
1352 if (len != size + num_counters * sizeof(struct xt_counters)) 1354 if (len != size + num_counters * sizeof(struct xt_counters))
1353 return -EINVAL; 1355 return -EINVAL;
1354 1356
1355 paddc = vmalloc_node(len - size, numa_node_id()); 1357 paddc = vmalloc(len - size);
1356 if (!paddc) 1358 if (!paddc)
1357 return -ENOMEM; 1359 return -ENOMEM;
1358 1360
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f91c94b9a790..3a43cf36db87 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -53,12 +53,13 @@ struct clusterip_config {
53#endif 53#endif
54 enum clusterip_hashmode hash_mode; /* which hashing mode */ 54 enum clusterip_hashmode hash_mode; /* which hashing mode */
55 u_int32_t hash_initval; /* hash initialization */ 55 u_int32_t hash_initval; /* hash initialization */
56 struct rcu_head rcu;
56}; 57};
57 58
58static LIST_HEAD(clusterip_configs); 59static LIST_HEAD(clusterip_configs);
59 60
60/* clusterip_lock protects the clusterip_configs list */ 61/* clusterip_lock protects the clusterip_configs list */
61static DEFINE_RWLOCK(clusterip_lock); 62static DEFINE_SPINLOCK(clusterip_lock);
62 63
63#ifdef CONFIG_PROC_FS 64#ifdef CONFIG_PROC_FS
64static const struct file_operations clusterip_proc_fops; 65static const struct file_operations clusterip_proc_fops;
@@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
71 atomic_inc(&c->refcount); 72 atomic_inc(&c->refcount);
72} 73}
73 74
75
76static void clusterip_config_rcu_free(struct rcu_head *head)
77{
78 kfree(container_of(head, struct clusterip_config, rcu));
79}
80
74static inline void 81static inline void
75clusterip_config_put(struct clusterip_config *c) 82clusterip_config_put(struct clusterip_config *c)
76{ 83{
77 if (atomic_dec_and_test(&c->refcount)) 84 if (atomic_dec_and_test(&c->refcount))
78 kfree(c); 85 call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
79} 86}
80 87
81/* decrease the count of entries using/referencing this config. If last 88/* decrease the count of entries using/referencing this config. If last
@@ -84,10 +91,11 @@ clusterip_config_put(struct clusterip_config *c)
84static inline void 91static inline void
85clusterip_config_entry_put(struct clusterip_config *c) 92clusterip_config_entry_put(struct clusterip_config *c)
86{ 93{
87 write_lock_bh(&clusterip_lock); 94 local_bh_disable();
88 if (atomic_dec_and_test(&c->entries)) { 95 if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) {
89 list_del(&c->list); 96 list_del_rcu(&c->list);
90 write_unlock_bh(&clusterip_lock); 97 spin_unlock(&clusterip_lock);
98 local_bh_enable();
91 99
92 dev_mc_del(c->dev, c->clustermac); 100 dev_mc_del(c->dev, c->clustermac);
93 dev_put(c->dev); 101 dev_put(c->dev);
@@ -100,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
100#endif 108#endif
101 return; 109 return;
102 } 110 }
103 write_unlock_bh(&clusterip_lock); 111 local_bh_enable();
104} 112}
105 113
106static struct clusterip_config * 114static struct clusterip_config *
@@ -108,7 +116,7 @@ __clusterip_config_find(__be32 clusterip)
108{ 116{
109 struct clusterip_config *c; 117 struct clusterip_config *c;
110 118
111 list_for_each_entry(c, &clusterip_configs, list) { 119 list_for_each_entry_rcu(c, &clusterip_configs, list) {
112 if (c->clusterip == clusterip) 120 if (c->clusterip == clusterip)
113 return c; 121 return c;
114 } 122 }
@@ -121,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
121{ 129{
122 struct clusterip_config *c; 130 struct clusterip_config *c;
123 131
124 read_lock_bh(&clusterip_lock); 132 rcu_read_lock_bh();
125 c = __clusterip_config_find(clusterip); 133 c = __clusterip_config_find(clusterip);
126 if (!c) { 134 if (c) {
127 read_unlock_bh(&clusterip_lock); 135 if (unlikely(!atomic_inc_not_zero(&c->refcount)))
128 return NULL; 136 c = NULL;
137 else if (entry)
138 atomic_inc(&c->entries);
129 } 139 }
130 atomic_inc(&c->refcount); 140 rcu_read_unlock_bh();
131 if (entry)
132 atomic_inc(&c->entries);
133 read_unlock_bh(&clusterip_lock);
134 141
135 return c; 142 return c;
136} 143}
@@ -181,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
181 } 188 }
182#endif 189#endif
183 190
184 write_lock_bh(&clusterip_lock); 191 spin_lock_bh(&clusterip_lock);
185 list_add(&c->list, &clusterip_configs); 192 list_add_rcu(&c->list, &clusterip_configs);
186 write_unlock_bh(&clusterip_lock); 193 spin_unlock_bh(&clusterip_lock);
187 194
188 return c; 195 return c;
189} 196}
@@ -462,7 +469,7 @@ struct arp_payload {
462 __be32 src_ip; 469 __be32 src_ip;
463 u_int8_t dst_hw[ETH_ALEN]; 470 u_int8_t dst_hw[ETH_ALEN];
464 __be32 dst_ip; 471 __be32 dst_ip;
465} __attribute__ ((packed)); 472} __packed;
466 473
467#ifdef DEBUG 474#ifdef DEBUG
468static void arp_print(struct arp_payload *payload) 475static void arp_print(struct arp_payload *payload)
@@ -733,6 +740,9 @@ static void __exit clusterip_tg_exit(void)
733#endif 740#endif
734 nf_unregister_hook(&cip_arp_ops); 741 nf_unregister_hook(&cip_arp_ops);
735 xt_unregister_target(&clusterip_tg_reg); 742 xt_unregister_target(&clusterip_tg_reg);
743
744 /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
745 rcu_barrier_bh();
736} 746}
737 747
738module_init(clusterip_tg_init); 748module_init(clusterip_tg_init);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 5234f4f3499a..915fc17d7ce2 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <linux/if_arp.h>
16#include <linux/ip.h> 17#include <linux/ip.h>
17#include <net/icmp.h> 18#include <net/icmp.h>
18#include <net/udp.h> 19#include <net/udp.h>
@@ -363,6 +364,42 @@ static void dump_packet(const struct nf_loginfo *info,
363 /* maxlen = 230+ 91 + 230 + 252 = 803 */ 364 /* maxlen = 230+ 91 + 230 + 252 = 803 */
364} 365}
365 366
367static void dump_mac_header(const struct nf_loginfo *info,
368 const struct sk_buff *skb)
369{
370 struct net_device *dev = skb->dev;
371 unsigned int logflags = 0;
372
373 if (info->type == NF_LOG_TYPE_LOG)
374 logflags = info->u.log.logflags;
375
376 if (!(logflags & IPT_LOG_MACDECODE))
377 goto fallback;
378
379 switch (dev->type) {
380 case ARPHRD_ETHER:
381 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
382 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
383 ntohs(eth_hdr(skb)->h_proto));
384 return;
385 default:
386 break;
387 }
388
389fallback:
390 printk("MAC=");
391 if (dev->hard_header_len &&
392 skb->mac_header != skb->network_header) {
393 const unsigned char *p = skb_mac_header(skb);
394 unsigned int i;
395
396 printk("%02x", *p++);
397 for (i = 1; i < dev->hard_header_len; i++, p++)
398 printk(":%02x", *p);
399 }
400 printk(" ");
401}
402
366static struct nf_loginfo default_loginfo = { 403static struct nf_loginfo default_loginfo = {
367 .type = NF_LOG_TYPE_LOG, 404 .type = NF_LOG_TYPE_LOG,
368 .u = { 405 .u = {
@@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf,
404 } 441 }
405#endif 442#endif
406 443
407 if (in && !out) { 444 /* MAC logging for input path only. */
408 /* MAC logging for input chain only. */ 445 if (in && !out)
409 printk("MAC="); 446 dump_mac_header(loginfo, skb);
410 if (skb->dev && skb->dev->hard_header_len &&
411 skb->mac_header != skb->network_header) {
412 int i;
413 const unsigned char *p = skb_mac_header(skb);
414 for (i = 0; i < skb->dev->hard_header_len; i++,p++)
415 printk("%02x%c", *p,
416 i==skb->dev->hard_header_len - 1
417 ? ' ':':');
418 } else
419 printk(" ");
420 }
421 447
422 dump_packet(loginfo, skb, 0); 448 dump_packet(loginfo, skb, 0);
423 printk("\n"); 449 printk("\n");
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f43867d1697f..6cdb298f1035 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
48 48
49 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || 49 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
50 par->hooknum == NF_INET_POST_ROUTING || 50 par->hooknum == NF_INET_POST_ROUTING ||
51 par->hooknum == NF_INET_LOCAL_OUT); 51 par->hooknum == NF_INET_LOCAL_OUT ||
52 par->hooknum == NF_INET_LOCAL_IN);
52 ct = nf_ct_get(skb, &ctinfo); 53 ct = nf_ct_get(skb, &ctinfo);
53 54
54 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); 55 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
@@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = {
77 .table = "nat", 78 .table = "nat",
78 .hooks = (1 << NF_INET_PRE_ROUTING) | 79 .hooks = (1 << NF_INET_PRE_ROUTING) |
79 (1 << NF_INET_POST_ROUTING) | 80 (1 << NF_INET_POST_ROUTING) |
80 (1 << NF_INET_LOCAL_OUT), 81 (1 << NF_INET_LOCAL_OUT) |
82 (1 << NF_INET_LOCAL_IN),
81 .checkentry = netmap_tg_check, 83 .checkentry = netmap_tg_check,
82 .me = THIS_MODULE 84 .me = THIS_MODULE
83}; 85};
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f5f4a888e4ec..b254dafaf429 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -95,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
95 } 95 }
96 96
97 tcph->rst = 1; 97 tcph->rst = 1;
98 tcph->check = tcp_v4_check(sizeof(struct tcphdr), 98 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
99 niph->saddr, niph->daddr, 99 niph->daddr, 0);
100 csum_partial(tcph, 100 nskb->ip_summed = CHECKSUM_PARTIAL;
101 sizeof(struct tcphdr), 0)); 101 nskb->csum_start = (unsigned char *)tcph - nskb->head;
102 nskb->csum_offset = offsetof(struct tcphdr, check);
102 103
103 addr_type = RTN_UNSPEC; 104 addr_type = RTN_UNSPEC;
104 if (hook != NF_INET_FORWARD 105 if (hook != NF_INET_FORWARD
@@ -109,13 +110,12 @@ static void send_reset(struct sk_buff *oldskb, int hook)
109 addr_type = RTN_LOCAL; 110 addr_type = RTN_LOCAL;
110 111
111 /* ip_route_me_harder expects skb->dst to be set */ 112 /* ip_route_me_harder expects skb->dst to be set */
112 skb_dst_set(nskb, dst_clone(skb_dst(oldskb))); 113 skb_dst_set_noref(nskb, skb_dst(oldskb));
113 114
114 if (ip_route_me_harder(nskb, addr_type)) 115 if (ip_route_me_harder(nskb, addr_type))
115 goto free_nskb; 116 goto free_nskb;
116 117
117 niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); 118 niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT);
118 nskb->ip_summed = CHECKSUM_NONE;
119 119
120 /* "Never happens" */ 120 /* "Never happens" */
121 if (nskb->len > dst_mtu(skb_dst(nskb))) 121 if (nskb->len > dst_mtu(skb_dst(nskb)))
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index cb763ae9ed90..eab8de32f200 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
66 const struct net_device *out, 66 const struct net_device *out,
67 int (*okfn)(struct sk_buff *)) 67 int (*okfn)(struct sk_buff *))
68{ 68{
69 struct inet_sock *inet = inet_sk(skb->sk);
70
71 if (inet && inet->nodefrag)
72 return NF_ACCEPT;
73
69#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 74#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
70#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) 75#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
71 /* Previously seen (loopback)? Ignore. Do this before 76 /* Previously seen (loopback)? Ignore. Do this before
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4f8bddb760c9..8c8632d9b93c 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -261,14 +261,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
261 rcu_read_lock(); 261 rcu_read_lock();
262 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 262 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
263 263
264 /* Change protocol info to have some randomization */
265 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
266 proto->unique_tuple(tuple, range, maniptype, ct);
267 goto out;
268 }
269
270 /* Only bother mapping if it's not already in range and unique */ 264 /* Only bother mapping if it's not already in range and unique */
271 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 265 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) &&
266 (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
272 proto->in_range(tuple, maniptype, &range->min, &range->max)) && 267 proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
273 !nf_nat_used_tuple(tuple, ct)) 268 !nf_nat_used_tuple(tuple, ct))
274 goto out; 269 goto out;
@@ -440,7 +435,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
440 if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) 435 if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
441 return 0; 436 return 0;
442 437
443 inside = (void *)skb->data + ip_hdrlen(skb); 438 inside = (void *)skb->data + hdrlen;
444 439
445 /* We're actually going to mangle it beyond trivial checksum 440 /* We're actually going to mangle it beyond trivial checksum
446 adjustment, so make sure the current checksum is correct. */ 441 adjustment, so make sure the current checksum is correct. */
@@ -470,12 +465,10 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
470 /* rcu_read_lock()ed by nf_hook_slow */ 465 /* rcu_read_lock()ed by nf_hook_slow */
471 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); 466 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
472 467
473 if (!nf_ct_get_tuple(skb, 468 if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr),
474 ip_hdrlen(skb) + sizeof(struct icmphdr), 469 (hdrlen +
475 (ip_hdrlen(skb) +
476 sizeof(struct icmphdr) + inside->ip.ihl * 4), 470 sizeof(struct icmphdr) + inside->ip.ihl * 4),
477 (u_int16_t)AF_INET, 471 (u_int16_t)AF_INET, inside->ip.protocol,
478 inside->ip.protocol,
479 &inner, l3proto, l4proto)) 472 &inner, l3proto, l4proto))
480 return 0; 473 return 0;
481 474
@@ -484,15 +477,13 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
484 pass all hooks (locally-generated ICMP). Consider incoming 477 pass all hooks (locally-generated ICMP). Consider incoming
485 packet: PREROUTING (DST manip), routing produces ICMP, goes 478 packet: PREROUTING (DST manip), routing produces ICMP, goes
486 through POSTROUTING (which must correct the DST manip). */ 479 through POSTROUTING (which must correct the DST manip). */
487 if (!manip_pkt(inside->ip.protocol, skb, 480 if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp),
488 ip_hdrlen(skb) + sizeof(inside->icmp), 481 &ct->tuplehash[!dir].tuple, !manip))
489 &ct->tuplehash[!dir].tuple,
490 !manip))
491 return 0; 482 return 0;
492 483
493 if (skb->ip_summed != CHECKSUM_PARTIAL) { 484 if (skb->ip_summed != CHECKSUM_PARTIAL) {
494 /* Reloading "inside" here since manip_pkt inner. */ 485 /* Reloading "inside" here since manip_pkt inner. */
495 inside = (void *)skb->data + ip_hdrlen(skb); 486 inside = (void *)skb->data + hdrlen;
496 inside->icmp.checksum = 0; 487 inside->icmp.checksum = 0;
497 inside->icmp.checksum = 488 inside->icmp.checksum =
498 csum_fold(skb_checksum(skb, hdrlen, 489 csum_fold(skb_checksum(skb, hdrlen,
@@ -742,7 +733,7 @@ static int __init nf_nat_init(void)
742 spin_unlock_bh(&nf_nat_lock); 733 spin_unlock_bh(&nf_nat_lock);
743 734
744 /* Initialize fake conntrack so that NAT will skip it */ 735 /* Initialize fake conntrack so that NAT will skip it */
745 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; 736 nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
746 737
747 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); 738 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
748 739
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 6c4f11f51446..3e61faf23a9a 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
34} 34}
35EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); 35EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
36 36
37bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, 37void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
38 const struct nf_nat_range *range, 38 const struct nf_nat_range *range,
39 enum nf_nat_manip_type maniptype, 39 enum nf_nat_manip_type maniptype,
40 const struct nf_conn *ct, 40 const struct nf_conn *ct,
@@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
53 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { 53 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
54 /* If it's dst rewrite, can't change port */ 54 /* If it's dst rewrite, can't change port */
55 if (maniptype == IP_NAT_MANIP_DST) 55 if (maniptype == IP_NAT_MANIP_DST)
56 return false; 56 return;
57 57
58 if (ntohs(*portptr) < 1024) { 58 if (ntohs(*portptr) < 1024) {
59 /* Loose convention: >> 512 is credential passing */ 59 /* Loose convention: >> 512 is credential passing */
@@ -81,15 +81,15 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
81 else 81 else
82 off = *rover; 82 off = *rover;
83 83
84 for (i = 0; i < range_size; i++, off++) { 84 for (i = 0; ; ++off) {
85 *portptr = htons(min + off % range_size); 85 *portptr = htons(min + off % range_size);
86 if (nf_nat_used_tuple(tuple, ct)) 86 if (++i != range_size && nf_nat_used_tuple(tuple, ct))
87 continue; 87 continue;
88 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) 88 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
89 *rover = off; 89 *rover = off;
90 return true; 90 return;
91 } 91 }
92 return false; 92 return;
93} 93}
94EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); 94EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
95 95
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 22485ce306d4..570faf2667b2 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -22,14 +22,14 @@
22 22
23static u_int16_t dccp_port_rover; 23static u_int16_t dccp_port_rover;
24 24
25static bool 25static void
26dccp_unique_tuple(struct nf_conntrack_tuple *tuple, 26dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
27 const struct nf_nat_range *range, 27 const struct nf_nat_range *range,
28 enum nf_nat_manip_type maniptype, 28 enum nf_nat_manip_type maniptype,
29 const struct nf_conn *ct) 29 const struct nf_conn *ct)
30{ 30{
31 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 31 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
32 &dccp_port_rover); 32 &dccp_port_rover);
33} 33}
34 34
35static bool 35static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d7e89201351e..bc8d83a31c73 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
37MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); 37MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
38 38
39/* generate unique tuple ... */ 39/* generate unique tuple ... */
40static bool 40static void
41gre_unique_tuple(struct nf_conntrack_tuple *tuple, 41gre_unique_tuple(struct nf_conntrack_tuple *tuple,
42 const struct nf_nat_range *range, 42 const struct nf_nat_range *range,
43 enum nf_nat_manip_type maniptype, 43 enum nf_nat_manip_type maniptype,
@@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
50 /* If there is no master conntrack we are not PPTP, 50 /* If there is no master conntrack we are not PPTP,
51 do not change tuples */ 51 do not change tuples */
52 if (!ct->master) 52 if (!ct->master)
53 return false; 53 return;
54 54
55 if (maniptype == IP_NAT_MANIP_SRC) 55 if (maniptype == IP_NAT_MANIP_SRC)
56 keyptr = &tuple->src.u.gre.key; 56 keyptr = &tuple->src.u.gre.key;
@@ -68,14 +68,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
68 68
69 pr_debug("min = %u, range_size = %u\n", min, range_size); 69 pr_debug("min = %u, range_size = %u\n", min, range_size);
70 70
71 for (i = 0; i < range_size; i++, key++) { 71 for (i = 0; ; ++key) {
72 *keyptr = htons(min + key % range_size); 72 *keyptr = htons(min + key % range_size);
73 if (!nf_nat_used_tuple(tuple, ct)) 73 if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
74 return true; 74 return;
75 } 75 }
76 76
77 pr_debug("%p: no NAT mapping\n", ct); 77 pr_debug("%p: no NAT mapping\n", ct);
78 return false; 78 return;
79} 79}
80 80
81/* manipulate a GRE packet according to maniptype */ 81/* manipulate a GRE packet according to maniptype */
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 19a8b0b07d8e..5744c3ec847c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
27 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); 27 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
28} 28}
29 29
30static bool 30static void
31icmp_unique_tuple(struct nf_conntrack_tuple *tuple, 31icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
32 const struct nf_nat_range *range, 32 const struct nf_nat_range *range,
33 enum nf_nat_manip_type maniptype, 33 enum nf_nat_manip_type maniptype,
@@ -42,13 +42,13 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
42 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) 42 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
43 range_size = 0xFFFF; 43 range_size = 0xFFFF;
44 44
45 for (i = 0; i < range_size; i++, id++) { 45 for (i = 0; ; ++id) {
46 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + 46 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
47 (id % range_size)); 47 (id % range_size));
48 if (!nf_nat_used_tuple(tuple, ct)) 48 if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
49 return true; 49 return;
50 } 50 }
51 return false; 51 return;
52} 52}
53 53
54static bool 54static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 3fc598eeeb1a..756331d42661 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -16,14 +16,14 @@
16 16
17static u_int16_t nf_sctp_port_rover; 17static u_int16_t nf_sctp_port_rover;
18 18
19static bool 19static void
20sctp_unique_tuple(struct nf_conntrack_tuple *tuple, 20sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
21 const struct nf_nat_range *range, 21 const struct nf_nat_range *range,
22 enum nf_nat_manip_type maniptype, 22 enum nf_nat_manip_type maniptype,
23 const struct nf_conn *ct) 23 const struct nf_conn *ct)
24{ 24{
25 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 25 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
26 &nf_sctp_port_rover); 26 &nf_sctp_port_rover);
27} 27}
28 28
29static bool 29static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 399e2cfa263b..aa460a595d5d 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -20,14 +20,13 @@
20 20
21static u_int16_t tcp_port_rover; 21static u_int16_t tcp_port_rover;
22 22
23static bool 23static void
24tcp_unique_tuple(struct nf_conntrack_tuple *tuple, 24tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
25 const struct nf_nat_range *range, 25 const struct nf_nat_range *range,
26 enum nf_nat_manip_type maniptype, 26 enum nf_nat_manip_type maniptype,
27 const struct nf_conn *ct) 27 const struct nf_conn *ct)
28{ 28{
29 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 29 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover);
30 &tcp_port_rover);
31} 30}
32 31
33static bool 32static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 9e61c79492e4..dfe65c7e2925 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -19,14 +19,13 @@
19 19
20static u_int16_t udp_port_rover; 20static u_int16_t udp_port_rover;
21 21
22static bool 22static void
23udp_unique_tuple(struct nf_conntrack_tuple *tuple, 23udp_unique_tuple(struct nf_conntrack_tuple *tuple,
24 const struct nf_nat_range *range, 24 const struct nf_nat_range *range,
25 enum nf_nat_manip_type maniptype, 25 enum nf_nat_manip_type maniptype,
26 const struct nf_conn *ct) 26 const struct nf_conn *ct)
27{ 27{
28 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 28 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover);
29 &udp_port_rover);
30} 29}
31 30
32static bool 31static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index 440a229bbd87..3cc8c8af39ef 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -18,14 +18,14 @@
18 18
19static u_int16_t udplite_port_rover; 19static u_int16_t udplite_port_rover;
20 20
21static bool 21static void
22udplite_unique_tuple(struct nf_conntrack_tuple *tuple, 22udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
23 const struct nf_nat_range *range, 23 const struct nf_nat_range *range,
24 enum nf_nat_manip_type maniptype, 24 enum nf_nat_manip_type maniptype,
25 const struct nf_conn *ct) 25 const struct nf_conn *ct)
26{ 26{
27 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 27 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
28 &udplite_port_rover); 28 &udplite_port_rover);
29} 29}
30 30
31static bool 31static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index 14381c62acea..a50f2bc1c732 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
26 return true; 26 return true;
27} 27}
28 28
29static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, 29static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
30 const struct nf_nat_range *range, 30 const struct nf_nat_range *range,
31 enum nf_nat_manip_type maniptype, 31 enum nf_nat_manip_type maniptype,
32 const struct nf_conn *ct) 32 const struct nf_conn *ct)
33{ 33{
34 /* Sorry: we can't help you; if it's not unique, we can't frob 34 /* Sorry: we can't help you; if it's not unique, we can't frob
35 anything. */ 35 anything. */
36 return false; 36 return;
37} 37}
38 38
39static bool 39static bool
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 98ed78281aee..ebbd319f62f5 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,7 +28,8 @@
28 28
29#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ 29#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
30 (1 << NF_INET_POST_ROUTING) | \ 30 (1 << NF_INET_POST_ROUTING) | \
31 (1 << NF_INET_LOCAL_OUT)) 31 (1 << NF_INET_LOCAL_OUT) | \
32 (1 << NF_INET_LOCAL_IN))
32 33
33static const struct xt_table nat_table = { 34static const struct xt_table nat_table = {
34 .name = "nat", 35 .name = "nat",
@@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
45 enum ip_conntrack_info ctinfo; 46 enum ip_conntrack_info ctinfo;
46 const struct nf_nat_multi_range_compat *mr = par->targinfo; 47 const struct nf_nat_multi_range_compat *mr = par->targinfo;
47 48
48 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); 49 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
50 par->hooknum == NF_INET_LOCAL_IN);
49 51
50 ct = nf_ct_get(skb, &ctinfo); 52 ct = nf_ct_get(skb, &ctinfo);
51 53
@@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
99 return 0; 101 return 0;
100} 102}
101 103
102unsigned int 104static unsigned int
103alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) 105alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
104{ 106{
105 /* Force range to this IP; let proto decide mapping for 107 /* Force range to this IP; let proto decide mapping for
@@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
141 .target = ipt_snat_target, 143 .target = ipt_snat_target,
142 .targetsize = sizeof(struct nf_nat_multi_range_compat), 144 .targetsize = sizeof(struct nf_nat_multi_range_compat),
143 .table = "nat", 145 .table = "nat",
144 .hooks = 1 << NF_INET_POST_ROUTING, 146 .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
145 .checkentry = ipt_snat_checkentry, 147 .checkentry = ipt_snat_checkentry,
146 .family = AF_INET, 148 .family = AF_INET,
147}; 149};
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index beb25819c9c9..95481fee8bdb 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum,
98 return NF_ACCEPT; 98 return NF_ACCEPT;
99 99
100 /* Don't try to NAT if this packet is not conntracked */ 100 /* Don't try to NAT if this packet is not conntracked */
101 if (ct == &nf_conntrack_untracked) 101 if (nf_ct_is_untracked(ct))
102 return NF_ACCEPT; 102 return NF_ACCEPT;
103 103
104 nat = nfct_nat(ct); 104 nat = nfct_nat(ct);
@@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum,
131 if (!nf_nat_initialized(ct, maniptype)) { 131 if (!nf_nat_initialized(ct, maniptype)) {
132 unsigned int ret; 132 unsigned int ret;
133 133
134 if (hooknum == NF_INET_LOCAL_IN) 134 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
135 /* LOCAL_IN hook doesn't have a chain! */
136 ret = alloc_null_binding(ct, hooknum);
137 else
138 ret = nf_nat_rule_find(skb, hooknum, in, out,
139 ct);
140
141 if (ret != NF_ACCEPT) 135 if (ret != NF_ACCEPT)
142 return ret; 136 return ret;
143 } else 137 } else
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3dc9914c1dce..4ae1f203f7cb 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -252,6 +252,7 @@ static const struct snmp_mib snmp4_net_list[] = {
252 SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), 252 SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), 253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), 254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
255 SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
255 SNMP_MIB_SENTINEL 256 SNMP_MIB_SENTINEL
256}; 257};
257 258
@@ -342,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
342 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, 343 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
343 sysctl_ip_default_ttl); 344 sysctl_ip_default_ttl);
344 345
346 BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
345 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 347 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
346 seq_printf(seq, " %lu", 348 seq_printf(seq, " %llu",
347 snmp_fold_field((void __percpu **)net->mib.ip_statistics, 349 snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
348 snmp4_ipstats_list[i].entry)); 350 snmp4_ipstats_list[i].entry,
351 offsetof(struct ipstats_mib, syncp)));
349 352
350 icmp_put(seq); /* RFC 2011 compatibility */ 353 icmp_put(seq); /* RFC 2011 compatibility */
351 icmpmsg_put(seq); 354 icmpmsg_put(seq);
@@ -431,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
431 434
432 seq_puts(seq, "\nIpExt:"); 435 seq_puts(seq, "\nIpExt:");
433 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) 436 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
434 seq_printf(seq, " %lu", 437 seq_printf(seq, " %llu",
435 snmp_fold_field((void __percpu **)net->mib.ip_statistics, 438 snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
436 snmp4_ipextstats_list[i].entry)); 439 snmp4_ipextstats_list[i].entry,
440 offsetof(struct ipstats_mib, syncp)));
437 441
438 seq_putc(seq, '\n'); 442 seq_putc(seq, '\n');
439 return 0; 443 return 0;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 542f22fc98b3..f2d297351405 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -52,6 +52,7 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
52 52
53 return ret; 53 return ret;
54} 54}
55EXPORT_SYMBOL(inet_add_protocol);
55 56
56/* 57/*
57 * Remove a protocol from the hash tables. 58 * Remove a protocol from the hash tables.
@@ -76,6 +77,4 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
76 77
77 return ret; 78 return ret;
78} 79}
79
80EXPORT_SYMBOL(inet_add_protocol);
81EXPORT_SYMBOL(inet_del_protocol); 80EXPORT_SYMBOL(inet_del_protocol);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2c7a1639388a..009a7b2aa1ef 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -314,7 +314,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
314} 314}
315 315
316static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, 316static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
317 struct rtable *rt, 317 struct rtable **rtp,
318 unsigned int flags) 318 unsigned int flags)
319{ 319{
320 struct inet_sock *inet = inet_sk(sk); 320 struct inet_sock *inet = inet_sk(sk);
@@ -323,25 +323,27 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
323 struct sk_buff *skb; 323 struct sk_buff *skb;
324 unsigned int iphlen; 324 unsigned int iphlen;
325 int err; 325 int err;
326 struct rtable *rt = *rtp;
326 327
327 if (length > rt->u.dst.dev->mtu) { 328 if (length > rt->dst.dev->mtu) {
328 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 329 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
329 rt->u.dst.dev->mtu); 330 rt->dst.dev->mtu);
330 return -EMSGSIZE; 331 return -EMSGSIZE;
331 } 332 }
332 if (flags&MSG_PROBE) 333 if (flags&MSG_PROBE)
333 goto out; 334 goto out;
334 335
335 skb = sock_alloc_send_skb(sk, 336 skb = sock_alloc_send_skb(sk,
336 length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, 337 length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
337 flags & MSG_DONTWAIT, &err); 338 flags & MSG_DONTWAIT, &err);
338 if (skb == NULL) 339 if (skb == NULL)
339 goto error; 340 goto error;
340 skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); 341 skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
341 342
342 skb->priority = sk->sk_priority; 343 skb->priority = sk->sk_priority;
343 skb->mark = sk->sk_mark; 344 skb->mark = sk->sk_mark;
344 skb_dst_set(skb, dst_clone(&rt->u.dst)); 345 skb_dst_set(skb, &rt->dst);
346 *rtp = NULL;
345 347
346 skb_reset_network_header(skb); 348 skb_reset_network_header(skb);
347 iph = ip_hdr(skb); 349 iph = ip_hdr(skb);
@@ -373,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
373 iph->check = 0; 375 iph->check = 0;
374 iph->tot_len = htons(length); 376 iph->tot_len = htons(length);
375 if (!iph->id) 377 if (!iph->id)
376 ip_select_ident(iph, &rt->u.dst, NULL); 378 ip_select_ident(iph, &rt->dst, NULL);
377 379
378 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 380 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
379 } 381 }
@@ -382,7 +384,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
382 skb_transport_header(skb))->type); 384 skb_transport_header(skb))->type);
383 385
384 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, 386 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
385 rt->u.dst.dev, dst_output); 387 rt->dst.dev, dst_output);
386 if (err > 0) 388 if (err > 0)
387 err = net_xmit_errno(err); 389 err = net_xmit_errno(err);
388 if (err) 390 if (err)
@@ -576,7 +578,7 @@ back_from_confirm:
576 578
577 if (inet->hdrincl) 579 if (inet->hdrincl)
578 err = raw_send_hdrinc(sk, msg->msg_iov, len, 580 err = raw_send_hdrinc(sk, msg->msg_iov, len,
579 rt, msg->msg_flags); 581 &rt, msg->msg_flags);
580 582
581 else { 583 else {
582 if (!ipc.addr) 584 if (!ipc.addr)
@@ -604,7 +606,7 @@ out:
604 return len; 606 return len;
605 607
606do_confirm: 608do_confirm:
607 dst_confirm(&rt->u.dst); 609 dst_confirm(&rt->dst);
608 if (!(msg->msg_flags & MSG_PROBE) || len) 610 if (!(msg->msg_flags & MSG_PROBE) || len)
609 goto back_from_confirm; 611 goto back_from_confirm;
610 err = 0; 612 err = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 560acc677ce4..3f56b6e6c6aa 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -253,8 +253,7 @@ static unsigned rt_hash_mask __read_mostly;
253static unsigned int rt_hash_log __read_mostly; 253static unsigned int rt_hash_log __read_mostly;
254 254
255static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 255static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
256#define RT_CACHE_STAT_INC(field) \ 256#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
257 (__raw_get_cpu_var(rt_cache_stat).field++)
258 257
259static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, 258static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
260 int genid) 259 int genid)
@@ -287,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
287 rcu_read_lock_bh(); 286 rcu_read_lock_bh();
288 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 287 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
289 while (r) { 288 while (r) {
290 if (dev_net(r->u.dst.dev) == seq_file_net(seq) && 289 if (dev_net(r->dst.dev) == seq_file_net(seq) &&
291 r->rt_genid == st->genid) 290 r->rt_genid == st->genid)
292 return r; 291 return r;
293 r = rcu_dereference_bh(r->u.dst.rt_next); 292 r = rcu_dereference_bh(r->dst.rt_next);
294 } 293 }
295 rcu_read_unlock_bh(); 294 rcu_read_unlock_bh();
296 } 295 }
@@ -302,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
302{ 301{
303 struct rt_cache_iter_state *st = seq->private; 302 struct rt_cache_iter_state *st = seq->private;
304 303
305 r = r->u.dst.rt_next; 304 r = r->dst.rt_next;
306 while (!r) { 305 while (!r) {
307 rcu_read_unlock_bh(); 306 rcu_read_unlock_bh();
308 do { 307 do {
@@ -320,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq,
320{ 319{
321 struct rt_cache_iter_state *st = seq->private; 320 struct rt_cache_iter_state *st = seq->private;
322 while ((r = __rt_cache_get_next(seq, r)) != NULL) { 321 while ((r = __rt_cache_get_next(seq, r)) != NULL) {
323 if (dev_net(r->u.dst.dev) != seq_file_net(seq)) 322 if (dev_net(r->dst.dev) != seq_file_net(seq))
324 continue; 323 continue;
325 if (r->rt_genid == st->genid) 324 if (r->rt_genid == st->genid)
326 break; 325 break;
@@ -378,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
378 377
379 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" 378 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
380 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", 379 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
381 r->u.dst.dev ? r->u.dst.dev->name : "*", 380 r->dst.dev ? r->dst.dev->name : "*",
382 (__force u32)r->rt_dst, 381 (__force u32)r->rt_dst,
383 (__force u32)r->rt_gateway, 382 (__force u32)r->rt_gateway,
384 r->rt_flags, atomic_read(&r->u.dst.__refcnt), 383 r->rt_flags, atomic_read(&r->dst.__refcnt),
385 r->u.dst.__use, 0, (__force u32)r->rt_src, 384 r->dst.__use, 0, (__force u32)r->rt_src,
386 (dst_metric(&r->u.dst, RTAX_ADVMSS) ? 385 (dst_metric(&r->dst, RTAX_ADVMSS) ?
387 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), 386 (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
388 dst_metric(&r->u.dst, RTAX_WINDOW), 387 dst_metric(&r->dst, RTAX_WINDOW),
389 (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + 388 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
390 dst_metric(&r->u.dst, RTAX_RTTVAR)), 389 dst_metric(&r->dst, RTAX_RTTVAR)),
391 r->fl.fl4_tos, 390 r->fl.fl4_tos,
392 r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, 391 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
393 r->u.dst.hh ? (r->u.dst.hh->hh_output == 392 r->dst.hh ? (r->dst.hh->hh_output ==
394 dev_queue_xmit) : 0, 393 dev_queue_xmit) : 0,
395 r->rt_spec_dst, &len); 394 r->rt_spec_dst, &len);
396 395
@@ -609,13 +608,13 @@ static inline int ip_rt_proc_init(void)
609 608
610static inline void rt_free(struct rtable *rt) 609static inline void rt_free(struct rtable *rt)
611{ 610{
612 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 611 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
613} 612}
614 613
615static inline void rt_drop(struct rtable *rt) 614static inline void rt_drop(struct rtable *rt)
616{ 615{
617 ip_rt_put(rt); 616 ip_rt_put(rt);
618 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 617 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
619} 618}
620 619
621static inline int rt_fast_clean(struct rtable *rth) 620static inline int rt_fast_clean(struct rtable *rth)
@@ -623,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth)
623 /* Kill broadcast/multicast entries very aggresively, if they 622 /* Kill broadcast/multicast entries very aggresively, if they
624 collide in hash table with more useful entries */ 623 collide in hash table with more useful entries */
625 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && 624 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
626 rth->fl.iif && rth->u.dst.rt_next; 625 rth->fl.iif && rth->dst.rt_next;
627} 626}
628 627
629static inline int rt_valuable(struct rtable *rth) 628static inline int rt_valuable(struct rtable *rth)
630{ 629{
631 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 630 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
632 rth->u.dst.expires; 631 rth->dst.expires;
633} 632}
634 633
635static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 634static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -637,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
637 unsigned long age; 636 unsigned long age;
638 int ret = 0; 637 int ret = 0;
639 638
640 if (atomic_read(&rth->u.dst.__refcnt)) 639 if (atomic_read(&rth->dst.__refcnt))
641 goto out; 640 goto out;
642 641
643 ret = 1; 642 ret = 1;
644 if (rth->u.dst.expires && 643 if (rth->dst.expires &&
645 time_after_eq(jiffies, rth->u.dst.expires)) 644 time_after_eq(jiffies, rth->dst.expires))
646 goto out; 645 goto out;
647 646
648 age = jiffies - rth->u.dst.lastuse; 647 age = jiffies - rth->dst.lastuse;
649 ret = 0; 648 ret = 0;
650 if ((age <= tmo1 && !rt_fast_clean(rth)) || 649 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
651 (age <= tmo2 && rt_valuable(rth))) 650 (age <= tmo2 && rt_valuable(rth)))
@@ -661,7 +660,7 @@ out: return ret;
661 */ 660 */
662static inline u32 rt_score(struct rtable *rt) 661static inline u32 rt_score(struct rtable *rt)
663{ 662{
664 u32 score = jiffies - rt->u.dst.lastuse; 663 u32 score = jiffies - rt->dst.lastuse;
665 664
666 score = ~score & ~(3<<30); 665 score = ~score & ~(3<<30);
667 666
@@ -701,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
701 700
702static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 701static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
703{ 702{
704 return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); 703 return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev));
705} 704}
706 705
707static inline int rt_is_expired(struct rtable *rth) 706static inline int rt_is_expired(struct rtable *rth)
708{ 707{
709 return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); 708 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
710} 709}
711 710
712/* 711/*
@@ -735,7 +734,7 @@ static void rt_do_flush(int process_context)
735 rth = rt_hash_table[i].chain; 734 rth = rt_hash_table[i].chain;
736 735
737 /* defer releasing the head of the list after spin_unlock */ 736 /* defer releasing the head of the list after spin_unlock */
738 for (tail = rth; tail; tail = tail->u.dst.rt_next) 737 for (tail = rth; tail; tail = tail->dst.rt_next)
739 if (!rt_is_expired(tail)) 738 if (!rt_is_expired(tail))
740 break; 739 break;
741 if (rth != tail) 740 if (rth != tail)
@@ -744,9 +743,9 @@ static void rt_do_flush(int process_context)
744 /* call rt_free on entries after the tail requiring flush */ 743 /* call rt_free on entries after the tail requiring flush */
745 prev = &rt_hash_table[i].chain; 744 prev = &rt_hash_table[i].chain;
746 for (p = *prev; p; p = next) { 745 for (p = *prev; p; p = next) {
747 next = p->u.dst.rt_next; 746 next = p->dst.rt_next;
748 if (!rt_is_expired(p)) { 747 if (!rt_is_expired(p)) {
749 prev = &p->u.dst.rt_next; 748 prev = &p->dst.rt_next;
750 } else { 749 } else {
751 *prev = next; 750 *prev = next;
752 rt_free(p); 751 rt_free(p);
@@ -761,7 +760,7 @@ static void rt_do_flush(int process_context)
761 spin_unlock_bh(rt_hash_lock_addr(i)); 760 spin_unlock_bh(rt_hash_lock_addr(i));
762 761
763 for (; rth != tail; rth = next) { 762 for (; rth != tail; rth = next) {
764 next = rth->u.dst.rt_next; 763 next = rth->dst.rt_next;
765 rt_free(rth); 764 rt_free(rth);
766 } 765 }
767 } 766 }
@@ -792,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
792 while (aux != rth) { 791 while (aux != rth) {
793 if (compare_hash_inputs(&aux->fl, &rth->fl)) 792 if (compare_hash_inputs(&aux->fl, &rth->fl))
794 return 0; 793 return 0;
795 aux = aux->u.dst.rt_next; 794 aux = aux->dst.rt_next;
796 } 795 }
797 return ONE; 796 return ONE;
798} 797}
@@ -832,18 +831,18 @@ static void rt_check_expire(void)
832 length = 0; 831 length = 0;
833 spin_lock_bh(rt_hash_lock_addr(i)); 832 spin_lock_bh(rt_hash_lock_addr(i));
834 while ((rth = *rthp) != NULL) { 833 while ((rth = *rthp) != NULL) {
835 prefetch(rth->u.dst.rt_next); 834 prefetch(rth->dst.rt_next);
836 if (rt_is_expired(rth)) { 835 if (rt_is_expired(rth)) {
837 *rthp = rth->u.dst.rt_next; 836 *rthp = rth->dst.rt_next;
838 rt_free(rth); 837 rt_free(rth);
839 continue; 838 continue;
840 } 839 }
841 if (rth->u.dst.expires) { 840 if (rth->dst.expires) {
842 /* Entry is expired even if it is in use */ 841 /* Entry is expired even if it is in use */
843 if (time_before_eq(jiffies, rth->u.dst.expires)) { 842 if (time_before_eq(jiffies, rth->dst.expires)) {
844nofree: 843nofree:
845 tmo >>= 1; 844 tmo >>= 1;
846 rthp = &rth->u.dst.rt_next; 845 rthp = &rth->dst.rt_next;
847 /* 846 /*
848 * We only count entries on 847 * We only count entries on
849 * a chain with equal hash inputs once 848 * a chain with equal hash inputs once
@@ -859,7 +858,7 @@ nofree:
859 goto nofree; 858 goto nofree;
860 859
861 /* Cleanup aged off entries. */ 860 /* Cleanup aged off entries. */
862 *rthp = rth->u.dst.rt_next; 861 *rthp = rth->dst.rt_next;
863 rt_free(rth); 862 rt_free(rth);
864 } 863 }
865 spin_unlock_bh(rt_hash_lock_addr(i)); 864 spin_unlock_bh(rt_hash_lock_addr(i));
@@ -1000,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops)
1000 if (!rt_is_expired(rth) && 999 if (!rt_is_expired(rth) &&
1001 !rt_may_expire(rth, tmo, expire)) { 1000 !rt_may_expire(rth, tmo, expire)) {
1002 tmo >>= 1; 1001 tmo >>= 1;
1003 rthp = &rth->u.dst.rt_next; 1002 rthp = &rth->dst.rt_next;
1004 continue; 1003 continue;
1005 } 1004 }
1006 *rthp = rth->u.dst.rt_next; 1005 *rthp = rth->dst.rt_next;
1007 rt_free(rth); 1006 rt_free(rth);
1008 goal--; 1007 goal--;
1009 } 1008 }
@@ -1069,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head)
1069 1068
1070 while (rth) { 1069 while (rth) {
1071 length += has_noalias(head, rth); 1070 length += has_noalias(head, rth);
1072 rth = rth->u.dst.rt_next; 1071 rth = rth->dst.rt_next;
1073 } 1072 }
1074 return length >> FRACT_BITS; 1073 return length >> FRACT_BITS;
1075} 1074}
@@ -1091,7 +1090,7 @@ restart:
1091 candp = NULL; 1090 candp = NULL;
1092 now = jiffies; 1091 now = jiffies;
1093 1092
1094 if (!rt_caching(dev_net(rt->u.dst.dev))) { 1093 if (!rt_caching(dev_net(rt->dst.dev))) {
1095 /* 1094 /*
1096 * If we're not caching, just tell the caller we 1095 * If we're not caching, just tell the caller we
1097 * were successful and don't touch the route. The 1096 * were successful and don't touch the route. The
@@ -1109,7 +1108,7 @@ restart:
1109 */ 1108 */
1110 1109
1111 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1110 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1112 int err = arp_bind_neighbour(&rt->u.dst); 1111 int err = arp_bind_neighbour(&rt->dst);
1113 if (err) { 1112 if (err) {
1114 if (net_ratelimit()) 1113 if (net_ratelimit())
1115 printk(KERN_WARNING 1114 printk(KERN_WARNING
@@ -1128,19 +1127,19 @@ restart:
1128 spin_lock_bh(rt_hash_lock_addr(hash)); 1127 spin_lock_bh(rt_hash_lock_addr(hash));
1129 while ((rth = *rthp) != NULL) { 1128 while ((rth = *rthp) != NULL) {
1130 if (rt_is_expired(rth)) { 1129 if (rt_is_expired(rth)) {
1131 *rthp = rth->u.dst.rt_next; 1130 *rthp = rth->dst.rt_next;
1132 rt_free(rth); 1131 rt_free(rth);
1133 continue; 1132 continue;
1134 } 1133 }
1135 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { 1134 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
1136 /* Put it first */ 1135 /* Put it first */
1137 *rthp = rth->u.dst.rt_next; 1136 *rthp = rth->dst.rt_next;
1138 /* 1137 /*
1139 * Since lookup is lockfree, the deletion 1138 * Since lookup is lockfree, the deletion
1140 * must be visible to another weakly ordered CPU before 1139 * must be visible to another weakly ordered CPU before
1141 * the insertion at the start of the hash chain. 1140 * the insertion at the start of the hash chain.
1142 */ 1141 */
1143 rcu_assign_pointer(rth->u.dst.rt_next, 1142 rcu_assign_pointer(rth->dst.rt_next,
1144 rt_hash_table[hash].chain); 1143 rt_hash_table[hash].chain);
1145 /* 1144 /*
1146 * Since lookup is lockfree, the update writes 1145 * Since lookup is lockfree, the update writes
@@ -1148,18 +1147,18 @@ restart:
1148 */ 1147 */
1149 rcu_assign_pointer(rt_hash_table[hash].chain, rth); 1148 rcu_assign_pointer(rt_hash_table[hash].chain, rth);
1150 1149
1151 dst_use(&rth->u.dst, now); 1150 dst_use(&rth->dst, now);
1152 spin_unlock_bh(rt_hash_lock_addr(hash)); 1151 spin_unlock_bh(rt_hash_lock_addr(hash));
1153 1152
1154 rt_drop(rt); 1153 rt_drop(rt);
1155 if (rp) 1154 if (rp)
1156 *rp = rth; 1155 *rp = rth;
1157 else 1156 else
1158 skb_dst_set(skb, &rth->u.dst); 1157 skb_dst_set(skb, &rth->dst);
1159 return 0; 1158 return 0;
1160 } 1159 }
1161 1160
1162 if (!atomic_read(&rth->u.dst.__refcnt)) { 1161 if (!atomic_read(&rth->dst.__refcnt)) {
1163 u32 score = rt_score(rth); 1162 u32 score = rt_score(rth);
1164 1163
1165 if (score <= min_score) { 1164 if (score <= min_score) {
@@ -1171,7 +1170,7 @@ restart:
1171 1170
1172 chain_length++; 1171 chain_length++;
1173 1172
1174 rthp = &rth->u.dst.rt_next; 1173 rthp = &rth->dst.rt_next;
1175 } 1174 }
1176 1175
1177 if (cand) { 1176 if (cand) {
@@ -1182,17 +1181,17 @@ restart:
1182 * only 2 entries per bucket. We will see. 1181 * only 2 entries per bucket. We will see.
1183 */ 1182 */
1184 if (chain_length > ip_rt_gc_elasticity) { 1183 if (chain_length > ip_rt_gc_elasticity) {
1185 *candp = cand->u.dst.rt_next; 1184 *candp = cand->dst.rt_next;
1186 rt_free(cand); 1185 rt_free(cand);
1187 } 1186 }
1188 } else { 1187 } else {
1189 if (chain_length > rt_chain_length_max && 1188 if (chain_length > rt_chain_length_max &&
1190 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { 1189 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
1191 struct net *net = dev_net(rt->u.dst.dev); 1190 struct net *net = dev_net(rt->dst.dev);
1192 int num = ++net->ipv4.current_rt_cache_rebuild_count; 1191 int num = ++net->ipv4.current_rt_cache_rebuild_count;
1193 if (!rt_caching(net)) { 1192 if (!rt_caching(net)) {
1194 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", 1193 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
1195 rt->u.dst.dev->name, num); 1194 rt->dst.dev->name, num);
1196 } 1195 }
1197 rt_emergency_hash_rebuild(net); 1196 rt_emergency_hash_rebuild(net);
1198 spin_unlock_bh(rt_hash_lock_addr(hash)); 1197 spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1207,7 +1206,7 @@ restart:
1207 route or unicast forwarding path. 1206 route or unicast forwarding path.
1208 */ 1207 */
1209 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1208 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1210 int err = arp_bind_neighbour(&rt->u.dst); 1209 int err = arp_bind_neighbour(&rt->dst);
1211 if (err) { 1210 if (err) {
1212 spin_unlock_bh(rt_hash_lock_addr(hash)); 1211 spin_unlock_bh(rt_hash_lock_addr(hash));
1213 1212
@@ -1238,14 +1237,14 @@ restart:
1238 } 1237 }
1239 } 1238 }
1240 1239
1241 rt->u.dst.rt_next = rt_hash_table[hash].chain; 1240 rt->dst.rt_next = rt_hash_table[hash].chain;
1242 1241
1243#if RT_CACHE_DEBUG >= 2 1242#if RT_CACHE_DEBUG >= 2
1244 if (rt->u.dst.rt_next) { 1243 if (rt->dst.rt_next) {
1245 struct rtable *trt; 1244 struct rtable *trt;
1246 printk(KERN_DEBUG "rt_cache @%02x: %pI4", 1245 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1247 hash, &rt->rt_dst); 1246 hash, &rt->rt_dst);
1248 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1247 for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
1249 printk(" . %pI4", &trt->rt_dst); 1248 printk(" . %pI4", &trt->rt_dst);
1250 printk("\n"); 1249 printk("\n");
1251 } 1250 }
@@ -1263,7 +1262,7 @@ skip_hashing:
1263 if (rp) 1262 if (rp)
1264 *rp = rt; 1263 *rp = rt;
1265 else 1264 else
1266 skb_dst_set(skb, &rt->u.dst); 1265 skb_dst_set(skb, &rt->dst);
1267 return 0; 1266 return 0;
1268} 1267}
1269 1268
@@ -1325,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1325 1324
1326 ip_select_fb_ident(iph); 1325 ip_select_fb_ident(iph);
1327} 1326}
1327EXPORT_SYMBOL(__ip_select_ident);
1328 1328
1329static void rt_del(unsigned hash, struct rtable *rt) 1329static void rt_del(unsigned hash, struct rtable *rt)
1330{ 1330{
@@ -1335,20 +1335,21 @@ static void rt_del(unsigned hash, struct rtable *rt)
1335 ip_rt_put(rt); 1335 ip_rt_put(rt);
1336 while ((aux = *rthp) != NULL) { 1336 while ((aux = *rthp) != NULL) {
1337 if (aux == rt || rt_is_expired(aux)) { 1337 if (aux == rt || rt_is_expired(aux)) {
1338 *rthp = aux->u.dst.rt_next; 1338 *rthp = aux->dst.rt_next;
1339 rt_free(aux); 1339 rt_free(aux);
1340 continue; 1340 continue;
1341 } 1341 }
1342 rthp = &aux->u.dst.rt_next; 1342 rthp = &aux->dst.rt_next;
1343 } 1343 }
1344 spin_unlock_bh(rt_hash_lock_addr(hash)); 1344 spin_unlock_bh(rt_hash_lock_addr(hash));
1345} 1345}
1346 1346
1347/* called in rcu_read_lock() section */
1347void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1348void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1348 __be32 saddr, struct net_device *dev) 1349 __be32 saddr, struct net_device *dev)
1349{ 1350{
1350 int i, k; 1351 int i, k;
1351 struct in_device *in_dev = in_dev_get(dev); 1352 struct in_device *in_dev = __in_dev_get_rcu(dev);
1352 struct rtable *rth, **rthp; 1353 struct rtable *rth, **rthp;
1353 __be32 skeys[2] = { saddr, 0 }; 1354 __be32 skeys[2] = { saddr, 0 };
1354 int ikeys[2] = { dev->ifindex, 0 }; 1355 int ikeys[2] = { dev->ifindex, 0 };
@@ -1384,7 +1385,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1384 1385
1385 rthp=&rt_hash_table[hash].chain; 1386 rthp=&rt_hash_table[hash].chain;
1386 1387
1387 rcu_read_lock();
1388 while ((rth = rcu_dereference(*rthp)) != NULL) { 1388 while ((rth = rcu_dereference(*rthp)) != NULL) {
1389 struct rtable *rt; 1389 struct rtable *rt;
1390 1390
@@ -1393,44 +1393,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1393 rth->fl.oif != ikeys[k] || 1393 rth->fl.oif != ikeys[k] ||
1394 rth->fl.iif != 0 || 1394 rth->fl.iif != 0 ||
1395 rt_is_expired(rth) || 1395 rt_is_expired(rth) ||
1396 !net_eq(dev_net(rth->u.dst.dev), net)) { 1396 !net_eq(dev_net(rth->dst.dev), net)) {
1397 rthp = &rth->u.dst.rt_next; 1397 rthp = &rth->dst.rt_next;
1398 continue; 1398 continue;
1399 } 1399 }
1400 1400
1401 if (rth->rt_dst != daddr || 1401 if (rth->rt_dst != daddr ||
1402 rth->rt_src != saddr || 1402 rth->rt_src != saddr ||
1403 rth->u.dst.error || 1403 rth->dst.error ||
1404 rth->rt_gateway != old_gw || 1404 rth->rt_gateway != old_gw ||
1405 rth->u.dst.dev != dev) 1405 rth->dst.dev != dev)
1406 break; 1406 break;
1407 1407
1408 dst_hold(&rth->u.dst); 1408 dst_hold(&rth->dst);
1409 rcu_read_unlock();
1410 1409
1411 rt = dst_alloc(&ipv4_dst_ops); 1410 rt = dst_alloc(&ipv4_dst_ops);
1412 if (rt == NULL) { 1411 if (rt == NULL) {
1413 ip_rt_put(rth); 1412 ip_rt_put(rth);
1414 in_dev_put(in_dev);
1415 return; 1413 return;
1416 } 1414 }
1417 1415
1418 /* Copy all the information. */ 1416 /* Copy all the information. */
1419 *rt = *rth; 1417 *rt = *rth;
1420 rt->u.dst.__use = 1; 1418 rt->dst.__use = 1;
1421 atomic_set(&rt->u.dst.__refcnt, 1); 1419 atomic_set(&rt->dst.__refcnt, 1);
1422 rt->u.dst.child = NULL; 1420 rt->dst.child = NULL;
1423 if (rt->u.dst.dev) 1421 if (rt->dst.dev)
1424 dev_hold(rt->u.dst.dev); 1422 dev_hold(rt->dst.dev);
1425 if (rt->idev) 1423 if (rt->idev)
1426 in_dev_hold(rt->idev); 1424 in_dev_hold(rt->idev);
1427 rt->u.dst.obsolete = -1; 1425 rt->dst.obsolete = -1;
1428 rt->u.dst.lastuse = jiffies; 1426 rt->dst.lastuse = jiffies;
1429 rt->u.dst.path = &rt->u.dst; 1427 rt->dst.path = &rt->dst;
1430 rt->u.dst.neighbour = NULL; 1428 rt->dst.neighbour = NULL;
1431 rt->u.dst.hh = NULL; 1429 rt->dst.hh = NULL;
1432#ifdef CONFIG_XFRM 1430#ifdef CONFIG_XFRM
1433 rt->u.dst.xfrm = NULL; 1431 rt->dst.xfrm = NULL;
1434#endif 1432#endif
1435 rt->rt_genid = rt_genid(net); 1433 rt->rt_genid = rt_genid(net);
1436 rt->rt_flags |= RTCF_REDIRECTED; 1434 rt->rt_flags |= RTCF_REDIRECTED;
@@ -1439,23 +1437,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1439 rt->rt_gateway = new_gw; 1437 rt->rt_gateway = new_gw;
1440 1438
1441 /* Redirect received -> path was valid */ 1439 /* Redirect received -> path was valid */
1442 dst_confirm(&rth->u.dst); 1440 dst_confirm(&rth->dst);
1443 1441
1444 if (rt->peer) 1442 if (rt->peer)
1445 atomic_inc(&rt->peer->refcnt); 1443 atomic_inc(&rt->peer->refcnt);
1446 1444
1447 if (arp_bind_neighbour(&rt->u.dst) || 1445 if (arp_bind_neighbour(&rt->dst) ||
1448 !(rt->u.dst.neighbour->nud_state & 1446 !(rt->dst.neighbour->nud_state &
1449 NUD_VALID)) { 1447 NUD_VALID)) {
1450 if (rt->u.dst.neighbour) 1448 if (rt->dst.neighbour)
1451 neigh_event_send(rt->u.dst.neighbour, NULL); 1449 neigh_event_send(rt->dst.neighbour, NULL);
1452 ip_rt_put(rth); 1450 ip_rt_put(rth);
1453 rt_drop(rt); 1451 rt_drop(rt);
1454 goto do_next; 1452 goto do_next;
1455 } 1453 }
1456 1454
1457 netevent.old = &rth->u.dst; 1455 netevent.old = &rth->dst;
1458 netevent.new = &rt->u.dst; 1456 netevent.new = &rt->dst;
1459 call_netevent_notifiers(NETEVENT_REDIRECT, 1457 call_netevent_notifiers(NETEVENT_REDIRECT,
1460 &netevent); 1458 &netevent);
1461 1459
@@ -1464,12 +1462,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1464 ip_rt_put(rt); 1462 ip_rt_put(rt);
1465 goto do_next; 1463 goto do_next;
1466 } 1464 }
1467 rcu_read_unlock();
1468 do_next: 1465 do_next:
1469 ; 1466 ;
1470 } 1467 }
1471 } 1468 }
1472 in_dev_put(in_dev);
1473 return; 1469 return;
1474 1470
1475reject_redirect: 1471reject_redirect:
@@ -1480,7 +1476,7 @@ reject_redirect:
1480 &old_gw, dev->name, &new_gw, 1476 &old_gw, dev->name, &new_gw,
1481 &saddr, &daddr); 1477 &saddr, &daddr);
1482#endif 1478#endif
1483 in_dev_put(in_dev); 1479 ;
1484} 1480}
1485 1481
1486static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1482static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
@@ -1493,8 +1489,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1493 ip_rt_put(rt); 1489 ip_rt_put(rt);
1494 ret = NULL; 1490 ret = NULL;
1495 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1491 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1496 (rt->u.dst.expires && 1492 (rt->dst.expires &&
1497 time_after_eq(jiffies, rt->u.dst.expires))) { 1493 time_after_eq(jiffies, rt->dst.expires))) {
1498 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1494 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1499 rt->fl.oif, 1495 rt->fl.oif,
1500 rt_genid(dev_net(dst->dev))); 1496 rt_genid(dev_net(dst->dev)));
@@ -1532,7 +1528,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1532 int log_martians; 1528 int log_martians;
1533 1529
1534 rcu_read_lock(); 1530 rcu_read_lock();
1535 in_dev = __in_dev_get_rcu(rt->u.dst.dev); 1531 in_dev = __in_dev_get_rcu(rt->dst.dev);
1536 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { 1532 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
1537 rcu_read_unlock(); 1533 rcu_read_unlock();
1538 return; 1534 return;
@@ -1543,30 +1539,30 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1543 /* No redirected packets during ip_rt_redirect_silence; 1539 /* No redirected packets during ip_rt_redirect_silence;
1544 * reset the algorithm. 1540 * reset the algorithm.
1545 */ 1541 */
1546 if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) 1542 if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
1547 rt->u.dst.rate_tokens = 0; 1543 rt->dst.rate_tokens = 0;
1548 1544
1549 /* Too many ignored redirects; do not send anything 1545 /* Too many ignored redirects; do not send anything
1550 * set u.dst.rate_last to the last seen redirected packet. 1546 * set dst.rate_last to the last seen redirected packet.
1551 */ 1547 */
1552 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { 1548 if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
1553 rt->u.dst.rate_last = jiffies; 1549 rt->dst.rate_last = jiffies;
1554 return; 1550 return;
1555 } 1551 }
1556 1552
1557 /* Check for load limit; set rate_last to the latest sent 1553 /* Check for load limit; set rate_last to the latest sent
1558 * redirect. 1554 * redirect.
1559 */ 1555 */
1560 if (rt->u.dst.rate_tokens == 0 || 1556 if (rt->dst.rate_tokens == 0 ||
1561 time_after(jiffies, 1557 time_after(jiffies,
1562 (rt->u.dst.rate_last + 1558 (rt->dst.rate_last +
1563 (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { 1559 (ip_rt_redirect_load << rt->dst.rate_tokens)))) {
1564 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1560 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1565 rt->u.dst.rate_last = jiffies; 1561 rt->dst.rate_last = jiffies;
1566 ++rt->u.dst.rate_tokens; 1562 ++rt->dst.rate_tokens;
1567#ifdef CONFIG_IP_ROUTE_VERBOSE 1563#ifdef CONFIG_IP_ROUTE_VERBOSE
1568 if (log_martians && 1564 if (log_martians &&
1569 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1565 rt->dst.rate_tokens == ip_rt_redirect_number &&
1570 net_ratelimit()) 1566 net_ratelimit())
1571 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1567 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1572 &rt->rt_src, rt->rt_iif, 1568 &rt->rt_src, rt->rt_iif,
@@ -1581,7 +1577,7 @@ static int ip_error(struct sk_buff *skb)
1581 unsigned long now; 1577 unsigned long now;
1582 int code; 1578 int code;
1583 1579
1584 switch (rt->u.dst.error) { 1580 switch (rt->dst.error) {
1585 case EINVAL: 1581 case EINVAL:
1586 default: 1582 default:
1587 goto out; 1583 goto out;
@@ -1590,7 +1586,7 @@ static int ip_error(struct sk_buff *skb)
1590 break; 1586 break;
1591 case ENETUNREACH: 1587 case ENETUNREACH:
1592 code = ICMP_NET_UNREACH; 1588 code = ICMP_NET_UNREACH;
1593 IP_INC_STATS_BH(dev_net(rt->u.dst.dev), 1589 IP_INC_STATS_BH(dev_net(rt->dst.dev),
1594 IPSTATS_MIB_INNOROUTES); 1590 IPSTATS_MIB_INNOROUTES);
1595 break; 1591 break;
1596 case EACCES: 1592 case EACCES:
@@ -1599,12 +1595,12 @@ static int ip_error(struct sk_buff *skb)
1599 } 1595 }
1600 1596
1601 now = jiffies; 1597 now = jiffies;
1602 rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; 1598 rt->dst.rate_tokens += now - rt->dst.rate_last;
1603 if (rt->u.dst.rate_tokens > ip_rt_error_burst) 1599 if (rt->dst.rate_tokens > ip_rt_error_burst)
1604 rt->u.dst.rate_tokens = ip_rt_error_burst; 1600 rt->dst.rate_tokens = ip_rt_error_burst;
1605 rt->u.dst.rate_last = now; 1601 rt->dst.rate_last = now;
1606 if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { 1602 if (rt->dst.rate_tokens >= ip_rt_error_cost) {
1607 rt->u.dst.rate_tokens -= ip_rt_error_cost; 1603 rt->dst.rate_tokens -= ip_rt_error_cost;
1608 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1604 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1609 } 1605 }
1610 1606
@@ -1649,7 +1645,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1649 1645
1650 rcu_read_lock(); 1646 rcu_read_lock();
1651 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1647 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1652 rth = rcu_dereference(rth->u.dst.rt_next)) { 1648 rth = rcu_dereference(rth->dst.rt_next)) {
1653 unsigned short mtu = new_mtu; 1649 unsigned short mtu = new_mtu;
1654 1650
1655 if (rth->fl.fl4_dst != daddr || 1651 if (rth->fl.fl4_dst != daddr ||
@@ -1658,8 +1654,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1658 rth->rt_src != iph->saddr || 1654 rth->rt_src != iph->saddr ||
1659 rth->fl.oif != ikeys[k] || 1655 rth->fl.oif != ikeys[k] ||
1660 rth->fl.iif != 0 || 1656 rth->fl.iif != 0 ||
1661 dst_metric_locked(&rth->u.dst, RTAX_MTU) || 1657 dst_metric_locked(&rth->dst, RTAX_MTU) ||
1662 !net_eq(dev_net(rth->u.dst.dev), net) || 1658 !net_eq(dev_net(rth->dst.dev), net) ||
1663 rt_is_expired(rth)) 1659 rt_is_expired(rth))
1664 continue; 1660 continue;
1665 1661
@@ -1667,22 +1663,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1667 1663
1668 /* BSD 4.2 compatibility hack :-( */ 1664 /* BSD 4.2 compatibility hack :-( */
1669 if (mtu == 0 && 1665 if (mtu == 0 &&
1670 old_mtu >= dst_mtu(&rth->u.dst) && 1666 old_mtu >= dst_mtu(&rth->dst) &&
1671 old_mtu >= 68 + (iph->ihl << 2)) 1667 old_mtu >= 68 + (iph->ihl << 2))
1672 old_mtu -= iph->ihl << 2; 1668 old_mtu -= iph->ihl << 2;
1673 1669
1674 mtu = guess_mtu(old_mtu); 1670 mtu = guess_mtu(old_mtu);
1675 } 1671 }
1676 if (mtu <= dst_mtu(&rth->u.dst)) { 1672 if (mtu <= dst_mtu(&rth->dst)) {
1677 if (mtu < dst_mtu(&rth->u.dst)) { 1673 if (mtu < dst_mtu(&rth->dst)) {
1678 dst_confirm(&rth->u.dst); 1674 dst_confirm(&rth->dst);
1679 if (mtu < ip_rt_min_pmtu) { 1675 if (mtu < ip_rt_min_pmtu) {
1680 mtu = ip_rt_min_pmtu; 1676 mtu = ip_rt_min_pmtu;
1681 rth->u.dst.metrics[RTAX_LOCK-1] |= 1677 rth->dst.metrics[RTAX_LOCK-1] |=
1682 (1 << RTAX_MTU); 1678 (1 << RTAX_MTU);
1683 } 1679 }
1684 rth->u.dst.metrics[RTAX_MTU-1] = mtu; 1680 rth->dst.metrics[RTAX_MTU-1] = mtu;
1685 dst_set_expires(&rth->u.dst, 1681 dst_set_expires(&rth->dst,
1686 ip_rt_mtu_expires); 1682 ip_rt_mtu_expires);
1687 } 1683 }
1688 est_mtu = mtu; 1684 est_mtu = mtu;
@@ -1755,7 +1751,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1755 1751
1756 rt = skb_rtable(skb); 1752 rt = skb_rtable(skb);
1757 if (rt) 1753 if (rt)
1758 dst_set_expires(&rt->u.dst, 0); 1754 dst_set_expires(&rt->dst, 0);
1759} 1755}
1760 1756
1761static int ip_rt_bug(struct sk_buff *skb) 1757static int ip_rt_bug(struct sk_buff *skb)
@@ -1783,11 +1779,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1783 1779
1784 if (rt->fl.iif == 0) 1780 if (rt->fl.iif == 0)
1785 src = rt->rt_src; 1781 src = rt->rt_src;
1786 else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { 1782 else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
1787 src = FIB_RES_PREFSRC(res); 1783 src = FIB_RES_PREFSRC(res);
1788 fib_res_put(&res); 1784 fib_res_put(&res);
1789 } else 1785 } else
1790 src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, 1786 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1791 RT_SCOPE_UNIVERSE); 1787 RT_SCOPE_UNIVERSE);
1792 memcpy(addr, &src, 4); 1788 memcpy(addr, &src, 4);
1793} 1789}
@@ -1795,10 +1791,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1795#ifdef CONFIG_NET_CLS_ROUTE 1791#ifdef CONFIG_NET_CLS_ROUTE
1796static void set_class_tag(struct rtable *rt, u32 tag) 1792static void set_class_tag(struct rtable *rt, u32 tag)
1797{ 1793{
1798 if (!(rt->u.dst.tclassid & 0xFFFF)) 1794 if (!(rt->dst.tclassid & 0xFFFF))
1799 rt->u.dst.tclassid |= tag & 0xFFFF; 1795 rt->dst.tclassid |= tag & 0xFFFF;
1800 if (!(rt->u.dst.tclassid & 0xFFFF0000)) 1796 if (!(rt->dst.tclassid & 0xFFFF0000))
1801 rt->u.dst.tclassid |= tag & 0xFFFF0000; 1797 rt->dst.tclassid |= tag & 0xFFFF0000;
1802} 1798}
1803#endif 1799#endif
1804 1800
@@ -1810,30 +1806,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1810 if (FIB_RES_GW(*res) && 1806 if (FIB_RES_GW(*res) &&
1811 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1807 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1812 rt->rt_gateway = FIB_RES_GW(*res); 1808 rt->rt_gateway = FIB_RES_GW(*res);
1813 memcpy(rt->u.dst.metrics, fi->fib_metrics, 1809 memcpy(rt->dst.metrics, fi->fib_metrics,
1814 sizeof(rt->u.dst.metrics)); 1810 sizeof(rt->dst.metrics));
1815 if (fi->fib_mtu == 0) { 1811 if (fi->fib_mtu == 0) {
1816 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 1812 rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
1817 if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1813 if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
1818 rt->rt_gateway != rt->rt_dst && 1814 rt->rt_gateway != rt->rt_dst &&
1819 rt->u.dst.dev->mtu > 576) 1815 rt->dst.dev->mtu > 576)
1820 rt->u.dst.metrics[RTAX_MTU-1] = 576; 1816 rt->dst.metrics[RTAX_MTU-1] = 576;
1821 } 1817 }
1822#ifdef CONFIG_NET_CLS_ROUTE 1818#ifdef CONFIG_NET_CLS_ROUTE
1823 rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; 1819 rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
1824#endif 1820#endif
1825 } else 1821 } else
1826 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; 1822 rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
1827 1823
1828 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1824 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1829 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; 1825 rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
1830 if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) 1826 if (dst_mtu(&rt->dst) > IP_MAX_MTU)
1831 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; 1827 rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
1832 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) 1828 if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
1833 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, 1829 rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
1834 ip_rt_min_advmss); 1830 ip_rt_min_advmss);
1835 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) 1831 if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
1836 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; 1832 rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
1837 1833
1838#ifdef CONFIG_NET_CLS_ROUTE 1834#ifdef CONFIG_NET_CLS_ROUTE
1839#ifdef CONFIG_IP_MULTIPLE_TABLES 1835#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1844,14 +1840,16 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1844 rt->rt_type = res->type; 1840 rt->rt_type = res->type;
1845} 1841}
1846 1842
1843/* called in rcu_read_lock() section */
1847static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1844static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1848 u8 tos, struct net_device *dev, int our) 1845 u8 tos, struct net_device *dev, int our)
1849{ 1846{
1850 unsigned hash; 1847 unsigned int hash;
1851 struct rtable *rth; 1848 struct rtable *rth;
1852 __be32 spec_dst; 1849 __be32 spec_dst;
1853 struct in_device *in_dev = in_dev_get(dev); 1850 struct in_device *in_dev = __in_dev_get_rcu(dev);
1854 u32 itag = 0; 1851 u32 itag = 0;
1852 int err;
1855 1853
1856 /* Primary sanity checks. */ 1854 /* Primary sanity checks. */
1857 1855
@@ -1866,21 +1864,23 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1866 if (!ipv4_is_local_multicast(daddr)) 1864 if (!ipv4_is_local_multicast(daddr))
1867 goto e_inval; 1865 goto e_inval;
1868 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1866 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1869 } else if (fib_validate_source(saddr, 0, tos, 0, 1867 } else {
1870 dev, &spec_dst, &itag, 0) < 0) 1868 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
1871 goto e_inval; 1869 &itag, 0);
1872 1870 if (err < 0)
1871 goto e_err;
1872 }
1873 rth = dst_alloc(&ipv4_dst_ops); 1873 rth = dst_alloc(&ipv4_dst_ops);
1874 if (!rth) 1874 if (!rth)
1875 goto e_nobufs; 1875 goto e_nobufs;
1876 1876
1877 rth->u.dst.output = ip_rt_bug; 1877 rth->dst.output = ip_rt_bug;
1878 rth->u.dst.obsolete = -1; 1878 rth->dst.obsolete = -1;
1879 1879
1880 atomic_set(&rth->u.dst.__refcnt, 1); 1880 atomic_set(&rth->dst.__refcnt, 1);
1881 rth->u.dst.flags= DST_HOST; 1881 rth->dst.flags= DST_HOST;
1882 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 1882 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1883 rth->u.dst.flags |= DST_NOPOLICY; 1883 rth->dst.flags |= DST_NOPOLICY;
1884 rth->fl.fl4_dst = daddr; 1884 rth->fl.fl4_dst = daddr;
1885 rth->rt_dst = daddr; 1885 rth->rt_dst = daddr;
1886 rth->fl.fl4_tos = tos; 1886 rth->fl.fl4_tos = tos;
@@ -1888,13 +1888,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1888 rth->fl.fl4_src = saddr; 1888 rth->fl.fl4_src = saddr;
1889 rth->rt_src = saddr; 1889 rth->rt_src = saddr;
1890#ifdef CONFIG_NET_CLS_ROUTE 1890#ifdef CONFIG_NET_CLS_ROUTE
1891 rth->u.dst.tclassid = itag; 1891 rth->dst.tclassid = itag;
1892#endif 1892#endif
1893 rth->rt_iif = 1893 rth->rt_iif =
1894 rth->fl.iif = dev->ifindex; 1894 rth->fl.iif = dev->ifindex;
1895 rth->u.dst.dev = init_net.loopback_dev; 1895 rth->dst.dev = init_net.loopback_dev;
1896 dev_hold(rth->u.dst.dev); 1896 dev_hold(rth->dst.dev);
1897 rth->idev = in_dev_get(rth->u.dst.dev); 1897 rth->idev = in_dev_get(rth->dst.dev);
1898 rth->fl.oif = 0; 1898 rth->fl.oif = 0;
1899 rth->rt_gateway = daddr; 1899 rth->rt_gateway = daddr;
1900 rth->rt_spec_dst= spec_dst; 1900 rth->rt_spec_dst= spec_dst;
@@ -1902,27 +1902,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1902 rth->rt_flags = RTCF_MULTICAST; 1902 rth->rt_flags = RTCF_MULTICAST;
1903 rth->rt_type = RTN_MULTICAST; 1903 rth->rt_type = RTN_MULTICAST;
1904 if (our) { 1904 if (our) {
1905 rth->u.dst.input= ip_local_deliver; 1905 rth->dst.input= ip_local_deliver;
1906 rth->rt_flags |= RTCF_LOCAL; 1906 rth->rt_flags |= RTCF_LOCAL;
1907 } 1907 }
1908 1908
1909#ifdef CONFIG_IP_MROUTE 1909#ifdef CONFIG_IP_MROUTE
1910 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) 1910 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1911 rth->u.dst.input = ip_mr_input; 1911 rth->dst.input = ip_mr_input;
1912#endif 1912#endif
1913 RT_CACHE_STAT_INC(in_slow_mc); 1913 RT_CACHE_STAT_INC(in_slow_mc);
1914 1914
1915 in_dev_put(in_dev);
1916 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1915 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1917 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); 1916 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
1918 1917
1919e_nobufs: 1918e_nobufs:
1920 in_dev_put(in_dev);
1921 return -ENOBUFS; 1919 return -ENOBUFS;
1922
1923e_inval: 1920e_inval:
1924 in_dev_put(in_dev);
1925 return -EINVAL; 1921 return -EINVAL;
1922e_err:
1923 return err;
1926} 1924}
1927 1925
1928 1926
@@ -1956,22 +1954,22 @@ static void ip_handle_martian_source(struct net_device *dev,
1956#endif 1954#endif
1957} 1955}
1958 1956
1957/* called in rcu_read_lock() section */
1959static int __mkroute_input(struct sk_buff *skb, 1958static int __mkroute_input(struct sk_buff *skb,
1960 struct fib_result *res, 1959 struct fib_result *res,
1961 struct in_device *in_dev, 1960 struct in_device *in_dev,
1962 __be32 daddr, __be32 saddr, u32 tos, 1961 __be32 daddr, __be32 saddr, u32 tos,
1963 struct rtable **result) 1962 struct rtable **result)
1964{ 1963{
1965
1966 struct rtable *rth; 1964 struct rtable *rth;
1967 int err; 1965 int err;
1968 struct in_device *out_dev; 1966 struct in_device *out_dev;
1969 unsigned flags = 0; 1967 unsigned int flags = 0;
1970 __be32 spec_dst; 1968 __be32 spec_dst;
1971 u32 itag; 1969 u32 itag;
1972 1970
1973 /* get a working reference to the output device */ 1971 /* get a working reference to the output device */
1974 out_dev = in_dev_get(FIB_RES_DEV(*res)); 1972 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1975 if (out_dev == NULL) { 1973 if (out_dev == NULL) {
1976 if (net_ratelimit()) 1974 if (net_ratelimit())
1977 printk(KERN_CRIT "Bug in ip_route_input" \ 1975 printk(KERN_CRIT "Bug in ip_route_input" \
@@ -1986,7 +1984,6 @@ static int __mkroute_input(struct sk_buff *skb,
1986 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1984 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1987 saddr); 1985 saddr);
1988 1986
1989 err = -EINVAL;
1990 goto cleanup; 1987 goto cleanup;
1991 } 1988 }
1992 1989
@@ -2020,12 +2017,12 @@ static int __mkroute_input(struct sk_buff *skb,
2020 goto cleanup; 2017 goto cleanup;
2021 } 2018 }
2022 2019
2023 atomic_set(&rth->u.dst.__refcnt, 1); 2020 atomic_set(&rth->dst.__refcnt, 1);
2024 rth->u.dst.flags= DST_HOST; 2021 rth->dst.flags= DST_HOST;
2025 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2022 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2026 rth->u.dst.flags |= DST_NOPOLICY; 2023 rth->dst.flags |= DST_NOPOLICY;
2027 if (IN_DEV_CONF_GET(out_dev, NOXFRM)) 2024 if (IN_DEV_CONF_GET(out_dev, NOXFRM))
2028 rth->u.dst.flags |= DST_NOXFRM; 2025 rth->dst.flags |= DST_NOXFRM;
2029 rth->fl.fl4_dst = daddr; 2026 rth->fl.fl4_dst = daddr;
2030 rth->rt_dst = daddr; 2027 rth->rt_dst = daddr;
2031 rth->fl.fl4_tos = tos; 2028 rth->fl.fl4_tos = tos;
@@ -2035,16 +2032,16 @@ static int __mkroute_input(struct sk_buff *skb,
2035 rth->rt_gateway = daddr; 2032 rth->rt_gateway = daddr;
2036 rth->rt_iif = 2033 rth->rt_iif =
2037 rth->fl.iif = in_dev->dev->ifindex; 2034 rth->fl.iif = in_dev->dev->ifindex;
2038 rth->u.dst.dev = (out_dev)->dev; 2035 rth->dst.dev = (out_dev)->dev;
2039 dev_hold(rth->u.dst.dev); 2036 dev_hold(rth->dst.dev);
2040 rth->idev = in_dev_get(rth->u.dst.dev); 2037 rth->idev = in_dev_get(rth->dst.dev);
2041 rth->fl.oif = 0; 2038 rth->fl.oif = 0;
2042 rth->rt_spec_dst= spec_dst; 2039 rth->rt_spec_dst= spec_dst;
2043 2040
2044 rth->u.dst.obsolete = -1; 2041 rth->dst.obsolete = -1;
2045 rth->u.dst.input = ip_forward; 2042 rth->dst.input = ip_forward;
2046 rth->u.dst.output = ip_output; 2043 rth->dst.output = ip_output;
2047 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); 2044 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2048 2045
2049 rt_set_nexthop(rth, res, itag); 2046 rt_set_nexthop(rth, res, itag);
2050 2047
@@ -2053,8 +2050,6 @@ static int __mkroute_input(struct sk_buff *skb,
2053 *result = rth; 2050 *result = rth;
2054 err = 0; 2051 err = 0;
2055 cleanup: 2052 cleanup:
2056 /* release the working reference to the output device */
2057 in_dev_put(out_dev);
2058 return err; 2053 return err;
2059} 2054}
2060 2055
@@ -2080,7 +2075,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2080 2075
2081 /* put it into the cache */ 2076 /* put it into the cache */
2082 hash = rt_hash(daddr, saddr, fl->iif, 2077 hash = rt_hash(daddr, saddr, fl->iif,
2083 rt_genid(dev_net(rth->u.dst.dev))); 2078 rt_genid(dev_net(rth->dst.dev)));
2084 return rt_intern_hash(hash, rth, NULL, skb, fl->iif); 2079 return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
2085} 2080}
2086 2081
@@ -2098,7 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2098 u8 tos, struct net_device *dev) 2093 u8 tos, struct net_device *dev)
2099{ 2094{
2100 struct fib_result res; 2095 struct fib_result res;
2101 struct in_device *in_dev = in_dev_get(dev); 2096 struct in_device *in_dev = __in_dev_get_rcu(dev);
2102 struct flowi fl = { .nl_u = { .ip4_u = 2097 struct flowi fl = { .nl_u = { .ip4_u =
2103 { .daddr = daddr, 2098 { .daddr = daddr,
2104 .saddr = saddr, 2099 .saddr = saddr,
@@ -2158,13 +2153,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2158 goto brd_input; 2153 goto brd_input;
2159 2154
2160 if (res.type == RTN_LOCAL) { 2155 if (res.type == RTN_LOCAL) {
2161 int result; 2156 err = fib_validate_source(saddr, daddr, tos,
2162 result = fib_validate_source(saddr, daddr, tos,
2163 net->loopback_dev->ifindex, 2157 net->loopback_dev->ifindex,
2164 dev, &spec_dst, &itag, skb->mark); 2158 dev, &spec_dst, &itag, skb->mark);
2165 if (result < 0) 2159 if (err < 0)
2166 goto martian_source; 2160 goto martian_source_keep_err;
2167 if (result) 2161 if (err)
2168 flags |= RTCF_DIRECTSRC; 2162 flags |= RTCF_DIRECTSRC;
2169 spec_dst = daddr; 2163 spec_dst = daddr;
2170 goto local_input; 2164 goto local_input;
@@ -2177,7 +2171,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2177 2171
2178 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2172 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
2179done: 2173done:
2180 in_dev_put(in_dev);
2181 if (free_res) 2174 if (free_res)
2182 fib_res_put(&res); 2175 fib_res_put(&res);
2183out: return err; 2176out: return err;
@@ -2192,7 +2185,7 @@ brd_input:
2192 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 2185 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
2193 &itag, skb->mark); 2186 &itag, skb->mark);
2194 if (err < 0) 2187 if (err < 0)
2195 goto martian_source; 2188 goto martian_source_keep_err;
2196 if (err) 2189 if (err)
2197 flags |= RTCF_DIRECTSRC; 2190 flags |= RTCF_DIRECTSRC;
2198 } 2191 }
@@ -2205,14 +2198,14 @@ local_input:
2205 if (!rth) 2198 if (!rth)
2206 goto e_nobufs; 2199 goto e_nobufs;
2207 2200
2208 rth->u.dst.output= ip_rt_bug; 2201 rth->dst.output= ip_rt_bug;
2209 rth->u.dst.obsolete = -1; 2202 rth->dst.obsolete = -1;
2210 rth->rt_genid = rt_genid(net); 2203 rth->rt_genid = rt_genid(net);
2211 2204
2212 atomic_set(&rth->u.dst.__refcnt, 1); 2205 atomic_set(&rth->dst.__refcnt, 1);
2213 rth->u.dst.flags= DST_HOST; 2206 rth->dst.flags= DST_HOST;
2214 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2207 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2215 rth->u.dst.flags |= DST_NOPOLICY; 2208 rth->dst.flags |= DST_NOPOLICY;
2216 rth->fl.fl4_dst = daddr; 2209 rth->fl.fl4_dst = daddr;
2217 rth->rt_dst = daddr; 2210 rth->rt_dst = daddr;
2218 rth->fl.fl4_tos = tos; 2211 rth->fl.fl4_tos = tos;
@@ -2220,20 +2213,20 @@ local_input:
2220 rth->fl.fl4_src = saddr; 2213 rth->fl.fl4_src = saddr;
2221 rth->rt_src = saddr; 2214 rth->rt_src = saddr;
2222#ifdef CONFIG_NET_CLS_ROUTE 2215#ifdef CONFIG_NET_CLS_ROUTE
2223 rth->u.dst.tclassid = itag; 2216 rth->dst.tclassid = itag;
2224#endif 2217#endif
2225 rth->rt_iif = 2218 rth->rt_iif =
2226 rth->fl.iif = dev->ifindex; 2219 rth->fl.iif = dev->ifindex;
2227 rth->u.dst.dev = net->loopback_dev; 2220 rth->dst.dev = net->loopback_dev;
2228 dev_hold(rth->u.dst.dev); 2221 dev_hold(rth->dst.dev);
2229 rth->idev = in_dev_get(rth->u.dst.dev); 2222 rth->idev = in_dev_get(rth->dst.dev);
2230 rth->rt_gateway = daddr; 2223 rth->rt_gateway = daddr;
2231 rth->rt_spec_dst= spec_dst; 2224 rth->rt_spec_dst= spec_dst;
2232 rth->u.dst.input= ip_local_deliver; 2225 rth->dst.input= ip_local_deliver;
2233 rth->rt_flags = flags|RTCF_LOCAL; 2226 rth->rt_flags = flags|RTCF_LOCAL;
2234 if (res.type == RTN_UNREACHABLE) { 2227 if (res.type == RTN_UNREACHABLE) {
2235 rth->u.dst.input= ip_error; 2228 rth->dst.input= ip_error;
2236 rth->u.dst.error= -err; 2229 rth->dst.error= -err;
2237 rth->rt_flags &= ~RTCF_LOCAL; 2230 rth->rt_flags &= ~RTCF_LOCAL;
2238 } 2231 }
2239 rth->rt_type = res.type; 2232 rth->rt_type = res.type;
@@ -2273,8 +2266,10 @@ e_nobufs:
2273 goto done; 2266 goto done;
2274 2267
2275martian_source: 2268martian_source:
2269 err = -EINVAL;
2270martian_source_keep_err:
2276 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2271 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2277 goto e_inval; 2272 goto done;
2278} 2273}
2279 2274
2280int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2275int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2284,32 +2279,34 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2284 unsigned hash; 2279 unsigned hash;
2285 int iif = dev->ifindex; 2280 int iif = dev->ifindex;
2286 struct net *net; 2281 struct net *net;
2282 int res;
2287 2283
2288 net = dev_net(dev); 2284 net = dev_net(dev);
2289 2285
2286 rcu_read_lock();
2287
2290 if (!rt_caching(net)) 2288 if (!rt_caching(net))
2291 goto skip_cache; 2289 goto skip_cache;
2292 2290
2293 tos &= IPTOS_RT_MASK; 2291 tos &= IPTOS_RT_MASK;
2294 hash = rt_hash(daddr, saddr, iif, rt_genid(net)); 2292 hash = rt_hash(daddr, saddr, iif, rt_genid(net));
2295 2293
2296 rcu_read_lock();
2297 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2294 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2298 rth = rcu_dereference(rth->u.dst.rt_next)) { 2295 rth = rcu_dereference(rth->dst.rt_next)) {
2299 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | 2296 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
2300 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | 2297 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
2301 (rth->fl.iif ^ iif) | 2298 (rth->fl.iif ^ iif) |
2302 rth->fl.oif | 2299 rth->fl.oif |
2303 (rth->fl.fl4_tos ^ tos)) == 0 && 2300 (rth->fl.fl4_tos ^ tos)) == 0 &&
2304 rth->fl.mark == skb->mark && 2301 rth->fl.mark == skb->mark &&
2305 net_eq(dev_net(rth->u.dst.dev), net) && 2302 net_eq(dev_net(rth->dst.dev), net) &&
2306 !rt_is_expired(rth)) { 2303 !rt_is_expired(rth)) {
2307 if (noref) { 2304 if (noref) {
2308 dst_use_noref(&rth->u.dst, jiffies); 2305 dst_use_noref(&rth->dst, jiffies);
2309 skb_dst_set_noref(skb, &rth->u.dst); 2306 skb_dst_set_noref(skb, &rth->dst);
2310 } else { 2307 } else {
2311 dst_use(&rth->u.dst, jiffies); 2308 dst_use(&rth->dst, jiffies);
2312 skb_dst_set(skb, &rth->u.dst); 2309 skb_dst_set(skb, &rth->dst);
2313 } 2310 }
2314 RT_CACHE_STAT_INC(in_hit); 2311 RT_CACHE_STAT_INC(in_hit);
2315 rcu_read_unlock(); 2312 rcu_read_unlock();
@@ -2317,7 +2314,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2317 } 2314 }
2318 RT_CACHE_STAT_INC(in_hlist_search); 2315 RT_CACHE_STAT_INC(in_hlist_search);
2319 } 2316 }
2320 rcu_read_unlock();
2321 2317
2322skip_cache: 2318skip_cache:
2323 /* Multicast recognition logic is moved from route cache to here. 2319 /* Multicast recognition logic is moved from route cache to here.
@@ -2332,12 +2328,11 @@ skip_cache:
2332 route cache entry is created eventually. 2328 route cache entry is created eventually.
2333 */ 2329 */
2334 if (ipv4_is_multicast(daddr)) { 2330 if (ipv4_is_multicast(daddr)) {
2335 struct in_device *in_dev; 2331 struct in_device *in_dev = __in_dev_get_rcu(dev);
2336 2332
2337 rcu_read_lock(); 2333 if (in_dev) {
2338 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
2339 int our = ip_check_mc(in_dev, daddr, saddr, 2334 int our = ip_check_mc(in_dev, daddr, saddr,
2340 ip_hdr(skb)->protocol); 2335 ip_hdr(skb)->protocol);
2341 if (our 2336 if (our
2342#ifdef CONFIG_IP_MROUTE 2337#ifdef CONFIG_IP_MROUTE
2343 || 2338 ||
@@ -2345,15 +2340,18 @@ skip_cache:
2345 IN_DEV_MFORWARD(in_dev)) 2340 IN_DEV_MFORWARD(in_dev))
2346#endif 2341#endif
2347 ) { 2342 ) {
2343 int res = ip_route_input_mc(skb, daddr, saddr,
2344 tos, dev, our);
2348 rcu_read_unlock(); 2345 rcu_read_unlock();
2349 return ip_route_input_mc(skb, daddr, saddr, 2346 return res;
2350 tos, dev, our);
2351 } 2347 }
2352 } 2348 }
2353 rcu_read_unlock(); 2349 rcu_read_unlock();
2354 return -EINVAL; 2350 return -EINVAL;
2355 } 2351 }
2356 return ip_route_input_slow(skb, daddr, saddr, tos, dev); 2352 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
2353 rcu_read_unlock();
2354 return res;
2357} 2355}
2358EXPORT_SYMBOL(ip_route_input_common); 2356EXPORT_SYMBOL(ip_route_input_common);
2359 2357
@@ -2415,12 +2413,12 @@ static int __mkroute_output(struct rtable **result,
2415 goto cleanup; 2413 goto cleanup;
2416 } 2414 }
2417 2415
2418 atomic_set(&rth->u.dst.__refcnt, 1); 2416 atomic_set(&rth->dst.__refcnt, 1);
2419 rth->u.dst.flags= DST_HOST; 2417 rth->dst.flags= DST_HOST;
2420 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2418 if (IN_DEV_CONF_GET(in_dev, NOXFRM))
2421 rth->u.dst.flags |= DST_NOXFRM; 2419 rth->dst.flags |= DST_NOXFRM;
2422 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2420 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2423 rth->u.dst.flags |= DST_NOPOLICY; 2421 rth->dst.flags |= DST_NOPOLICY;
2424 2422
2425 rth->fl.fl4_dst = oldflp->fl4_dst; 2423 rth->fl.fl4_dst = oldflp->fl4_dst;
2426 rth->fl.fl4_tos = tos; 2424 rth->fl.fl4_tos = tos;
@@ -2432,35 +2430,35 @@ static int __mkroute_output(struct rtable **result,
2432 rth->rt_iif = oldflp->oif ? : dev_out->ifindex; 2430 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2433 /* get references to the devices that are to be hold by the routing 2431 /* get references to the devices that are to be hold by the routing
2434 cache entry */ 2432 cache entry */
2435 rth->u.dst.dev = dev_out; 2433 rth->dst.dev = dev_out;
2436 dev_hold(dev_out); 2434 dev_hold(dev_out);
2437 rth->idev = in_dev_get(dev_out); 2435 rth->idev = in_dev_get(dev_out);
2438 rth->rt_gateway = fl->fl4_dst; 2436 rth->rt_gateway = fl->fl4_dst;
2439 rth->rt_spec_dst= fl->fl4_src; 2437 rth->rt_spec_dst= fl->fl4_src;
2440 2438
2441 rth->u.dst.output=ip_output; 2439 rth->dst.output=ip_output;
2442 rth->u.dst.obsolete = -1; 2440 rth->dst.obsolete = -1;
2443 rth->rt_genid = rt_genid(dev_net(dev_out)); 2441 rth->rt_genid = rt_genid(dev_net(dev_out));
2444 2442
2445 RT_CACHE_STAT_INC(out_slow_tot); 2443 RT_CACHE_STAT_INC(out_slow_tot);
2446 2444
2447 if (flags & RTCF_LOCAL) { 2445 if (flags & RTCF_LOCAL) {
2448 rth->u.dst.input = ip_local_deliver; 2446 rth->dst.input = ip_local_deliver;
2449 rth->rt_spec_dst = fl->fl4_dst; 2447 rth->rt_spec_dst = fl->fl4_dst;
2450 } 2448 }
2451 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2449 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2452 rth->rt_spec_dst = fl->fl4_src; 2450 rth->rt_spec_dst = fl->fl4_src;
2453 if (flags & RTCF_LOCAL && 2451 if (flags & RTCF_LOCAL &&
2454 !(dev_out->flags & IFF_LOOPBACK)) { 2452 !(dev_out->flags & IFF_LOOPBACK)) {
2455 rth->u.dst.output = ip_mc_output; 2453 rth->dst.output = ip_mc_output;
2456 RT_CACHE_STAT_INC(out_slow_mc); 2454 RT_CACHE_STAT_INC(out_slow_mc);
2457 } 2455 }
2458#ifdef CONFIG_IP_MROUTE 2456#ifdef CONFIG_IP_MROUTE
2459 if (res->type == RTN_MULTICAST) { 2457 if (res->type == RTN_MULTICAST) {
2460 if (IN_DEV_MFORWARD(in_dev) && 2458 if (IN_DEV_MFORWARD(in_dev) &&
2461 !ipv4_is_local_multicast(oldflp->fl4_dst)) { 2459 !ipv4_is_local_multicast(oldflp->fl4_dst)) {
2462 rth->u.dst.input = ip_mr_input; 2460 rth->dst.input = ip_mr_input;
2463 rth->u.dst.output = ip_mc_output; 2461 rth->dst.output = ip_mc_output;
2464 } 2462 }
2465 } 2463 }
2466#endif 2464#endif
@@ -2715,7 +2713,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2715 2713
2716 rcu_read_lock_bh(); 2714 rcu_read_lock_bh();
2717 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; 2715 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2718 rth = rcu_dereference_bh(rth->u.dst.rt_next)) { 2716 rth = rcu_dereference_bh(rth->dst.rt_next)) {
2719 if (rth->fl.fl4_dst == flp->fl4_dst && 2717 if (rth->fl.fl4_dst == flp->fl4_dst &&
2720 rth->fl.fl4_src == flp->fl4_src && 2718 rth->fl.fl4_src == flp->fl4_src &&
2721 rth->fl.iif == 0 && 2719 rth->fl.iif == 0 &&
@@ -2723,9 +2721,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2723 rth->fl.mark == flp->mark && 2721 rth->fl.mark == flp->mark &&
2724 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2722 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2725 (IPTOS_RT_MASK | RTO_ONLINK)) && 2723 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2726 net_eq(dev_net(rth->u.dst.dev), net) && 2724 net_eq(dev_net(rth->dst.dev), net) &&
2727 !rt_is_expired(rth)) { 2725 !rt_is_expired(rth)) {
2728 dst_use(&rth->u.dst, jiffies); 2726 dst_use(&rth->dst, jiffies);
2729 RT_CACHE_STAT_INC(out_hit); 2727 RT_CACHE_STAT_INC(out_hit);
2730 rcu_read_unlock_bh(); 2728 rcu_read_unlock_bh();
2731 *rp = rth; 2729 *rp = rth;
@@ -2738,7 +2736,6 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2738slow_output: 2736slow_output:
2739 return ip_route_output_slow(net, rp, flp); 2737 return ip_route_output_slow(net, rp, flp);
2740} 2738}
2741
2742EXPORT_SYMBOL_GPL(__ip_route_output_key); 2739EXPORT_SYMBOL_GPL(__ip_route_output_key);
2743 2740
2744static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 2741static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -2762,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2762 dst_alloc(&ipv4_dst_blackhole_ops); 2759 dst_alloc(&ipv4_dst_blackhole_ops);
2763 2760
2764 if (rt) { 2761 if (rt) {
2765 struct dst_entry *new = &rt->u.dst; 2762 struct dst_entry *new = &rt->dst;
2766 2763
2767 atomic_set(&new->__refcnt, 1); 2764 atomic_set(&new->__refcnt, 1);
2768 new->__use = 1; 2765 new->__use = 1;
2769 new->input = dst_discard; 2766 new->input = dst_discard;
2770 new->output = dst_discard; 2767 new->output = dst_discard;
2771 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 2768 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
2772 2769
2773 new->dev = ort->u.dst.dev; 2770 new->dev = ort->dst.dev;
2774 if (new->dev) 2771 if (new->dev)
2775 dev_hold(new->dev); 2772 dev_hold(new->dev);
2776 2773
@@ -2794,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2794 dst_free(new); 2791 dst_free(new);
2795 } 2792 }
2796 2793
2797 dst_release(&(*rp)->u.dst); 2794 dst_release(&(*rp)->dst);
2798 *rp = rt; 2795 *rp = rt;
2799 return (rt ? 0 : -ENOMEM); 2796 return (rt ? 0 : -ENOMEM);
2800} 2797}
@@ -2822,13 +2819,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2822 2819
2823 return 0; 2820 return 0;
2824} 2821}
2825
2826EXPORT_SYMBOL_GPL(ip_route_output_flow); 2822EXPORT_SYMBOL_GPL(ip_route_output_flow);
2827 2823
2828int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) 2824int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2829{ 2825{
2830 return ip_route_output_flow(net, rp, flp, NULL, 0); 2826 return ip_route_output_flow(net, rp, flp, NULL, 0);
2831} 2827}
2828EXPORT_SYMBOL(ip_route_output_key);
2832 2829
2833static int rt_fill_info(struct net *net, 2830static int rt_fill_info(struct net *net,
2834 struct sk_buff *skb, u32 pid, u32 seq, int event, 2831 struct sk_buff *skb, u32 pid, u32 seq, int event,
@@ -2864,11 +2861,11 @@ static int rt_fill_info(struct net *net,
2864 r->rtm_src_len = 32; 2861 r->rtm_src_len = 32;
2865 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); 2862 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
2866 } 2863 }
2867 if (rt->u.dst.dev) 2864 if (rt->dst.dev)
2868 NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); 2865 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2869#ifdef CONFIG_NET_CLS_ROUTE 2866#ifdef CONFIG_NET_CLS_ROUTE
2870 if (rt->u.dst.tclassid) 2867 if (rt->dst.tclassid)
2871 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); 2868 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2872#endif 2869#endif
2873 if (rt->fl.iif) 2870 if (rt->fl.iif)
2874 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2871 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
@@ -2878,12 +2875,16 @@ static int rt_fill_info(struct net *net,
2878 if (rt->rt_dst != rt->rt_gateway) 2875 if (rt->rt_dst != rt->rt_gateway)
2879 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); 2876 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
2880 2877
2881 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2878 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2882 goto nla_put_failure; 2879 goto nla_put_failure;
2883 2880
2884 error = rt->u.dst.error; 2881 if (rt->fl.mark)
2885 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; 2882 NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark);
2883
2884 error = rt->dst.error;
2885 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
2886 if (rt->peer) { 2886 if (rt->peer) {
2887 inet_peer_refcheck(rt->peer);
2887 id = atomic_read(&rt->peer->ip_id_count) & 0xffff; 2888 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
2888 if (rt->peer->tcp_ts_stamp) { 2889 if (rt->peer->tcp_ts_stamp) {
2889 ts = rt->peer->tcp_ts; 2890 ts = rt->peer->tcp_ts;
@@ -2914,7 +2915,7 @@ static int rt_fill_info(struct net *net,
2914 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); 2915 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
2915 } 2916 }
2916 2917
2917 if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, 2918 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
2918 expires, error) < 0) 2919 expires, error) < 0)
2919 goto nla_put_failure; 2920 goto nla_put_failure;
2920 2921
@@ -2935,6 +2936,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2935 __be32 src = 0; 2936 __be32 src = 0;
2936 u32 iif; 2937 u32 iif;
2937 int err; 2938 int err;
2939 int mark;
2938 struct sk_buff *skb; 2940 struct sk_buff *skb;
2939 2941
2940 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); 2942 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
@@ -2962,6 +2964,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2962 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; 2964 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2963 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; 2965 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
2964 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; 2966 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2967 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
2965 2968
2966 if (iif) { 2969 if (iif) {
2967 struct net_device *dev; 2970 struct net_device *dev;
@@ -2974,13 +2977,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2974 2977
2975 skb->protocol = htons(ETH_P_IP); 2978 skb->protocol = htons(ETH_P_IP);
2976 skb->dev = dev; 2979 skb->dev = dev;
2980 skb->mark = mark;
2977 local_bh_disable(); 2981 local_bh_disable();
2978 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2982 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2979 local_bh_enable(); 2983 local_bh_enable();
2980 2984
2981 rt = skb_rtable(skb); 2985 rt = skb_rtable(skb);
2982 if (err == 0 && rt->u.dst.error) 2986 if (err == 0 && rt->dst.error)
2983 err = -rt->u.dst.error; 2987 err = -rt->dst.error;
2984 } else { 2988 } else {
2985 struct flowi fl = { 2989 struct flowi fl = {
2986 .nl_u = { 2990 .nl_u = {
@@ -2991,6 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2991 }, 2995 },
2992 }, 2996 },
2993 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2997 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2998 .mark = mark,
2994 }; 2999 };
2995 err = ip_route_output_key(net, &rt, &fl); 3000 err = ip_route_output_key(net, &rt, &fl);
2996 } 3001 }
@@ -2998,7 +3003,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2998 if (err) 3003 if (err)
2999 goto errout_free; 3004 goto errout_free;
3000 3005
3001 skb_dst_set(skb, &rt->u.dst); 3006 skb_dst_set(skb, &rt->dst);
3002 if (rtm->rtm_flags & RTM_F_NOTIFY) 3007 if (rtm->rtm_flags & RTM_F_NOTIFY)
3003 rt->rt_flags |= RTCF_NOTIFY; 3008 rt->rt_flags |= RTCF_NOTIFY;
3004 3009
@@ -3034,12 +3039,12 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
3034 continue; 3039 continue;
3035 rcu_read_lock_bh(); 3040 rcu_read_lock_bh();
3036 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; 3041 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
3037 rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { 3042 rt = rcu_dereference_bh(rt->dst.rt_next), idx++) {
3038 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) 3043 if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx)
3039 continue; 3044 continue;
3040 if (rt_is_expired(rt)) 3045 if (rt_is_expired(rt))
3041 continue; 3046 continue;
3042 skb_dst_set_noref(skb, &rt->u.dst); 3047 skb_dst_set_noref(skb, &rt->dst);
3043 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, 3048 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
3044 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 3049 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
3045 1, NLM_F_MULTI) <= 0) { 3050 1, NLM_F_MULTI) <= 0) {
@@ -3365,6 +3370,3 @@ void __init ip_static_sysctl_init(void)
3365 register_sysctl_paths(ipv4_path, ipv4_skeleton); 3370 register_sysctl_paths(ipv4_path, ipv4_skeleton);
3366} 3371}
3367#endif 3372#endif
3368
3369EXPORT_SYMBOL(__ip_select_ident);
3370EXPORT_SYMBOL(ip_route_output_key);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 9f6b22206c52..650cace2180d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -18,8 +18,8 @@
18#include <net/tcp.h> 18#include <net/tcp.h>
19#include <net/route.h> 19#include <net/route.h>
20 20
21/* Timestamps: lowest 9 bits store TCP options */ 21/* Timestamps: lowest bits store TCP options */
22#define TSBITS 9 22#define TSBITS 6
23#define TSMASK (((__u32)1 << TSBITS) - 1) 23#define TSMASK (((__u32)1 << TSBITS) - 1)
24 24
25extern int sysctl_tcp_syncookies; 25extern int sysctl_tcp_syncookies;
@@ -58,7 +58,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
58 58
59/* 59/*
60 * when syncookies are in effect and tcp timestamps are enabled we encode 60 * when syncookies are in effect and tcp timestamps are enabled we encode
61 * tcp options in the lowest 9 bits of the timestamp value that will be 61 * tcp options in the lower bits of the timestamp value that will be
62 * sent in the syn-ack. 62 * sent in the syn-ack.
63 * Since subsequent timestamps use the normal tcp_time_stamp value, we 63 * Since subsequent timestamps use the normal tcp_time_stamp value, we
64 * must make sure that the resulting initial timestamp is <= tcp_time_stamp. 64 * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
@@ -70,11 +70,10 @@ __u32 cookie_init_timestamp(struct request_sock *req)
70 u32 options = 0; 70 u32 options = 0;
71 71
72 ireq = inet_rsk(req); 72 ireq = inet_rsk(req);
73 if (ireq->wscale_ok) { 73
74 options = ireq->snd_wscale; 74 options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
75 options |= ireq->rcv_wscale << 4; 75 options |= ireq->sack_ok << 4;
76 } 76 options |= ireq->ecn_ok << 5;
77 options |= ireq->sack_ok << 8;
78 77
79 ts = ts_now & ~TSMASK; 78 ts = ts_now & ~TSMASK;
80 ts |= options; 79 ts |= options;
@@ -138,23 +137,23 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr,
138} 137}
139 138
140/* 139/*
141 * This table has to be sorted and terminated with (__u16)-1. 140 * MSS Values are taken from the 2009 paper
142 * XXX generate a better table. 141 * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson:
143 * Unresolved Issues: HIPPI with a 64k MSS is not well supported. 142 * - values 1440 to 1460 accounted for 80% of observed mss values
143 * - values outside the 536-1460 range are rare (<0.2%).
144 *
145 * Table must be sorted.
144 */ 146 */
145static __u16 const msstab[] = { 147static __u16 const msstab[] = {
146 64 - 1, 148 64,
147 256 - 1, 149 512,
148 512 - 1, 150 536,
149 536 - 1, 151 1024,
150 1024 - 1, 152 1440,
151 1440 - 1, 153 1460,
152 1460 - 1, 154 4312,
153 4312 - 1, 155 8960,
154 (__u16)-1
155}; 156};
156/* The number doesn't include the -1 terminator */
157#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
158 157
159/* 158/*
160 * Generate a syncookie. mssp points to the mss, which is returned 159 * Generate a syncookie. mssp points to the mss, which is returned
@@ -169,10 +168,10 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
169 168
170 tcp_synq_overflow(sk); 169 tcp_synq_overflow(sk);
171 170
172 /* XXX sort msstab[] by probability? Binary search? */ 171 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
173 for (mssind = 0; mss > msstab[mssind + 1]; mssind++) 172 if (mss >= msstab[mssind])
174 ; 173 break;
175 *mssp = msstab[mssind] + 1; 174 *mssp = msstab[mssind];
176 175
177 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); 176 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
178 177
@@ -202,7 +201,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
202 jiffies / (HZ * 60), 201 jiffies / (HZ * 60),
203 COUNTER_TRIES); 202 COUNTER_TRIES);
204 203
205 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; 204 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
206} 205}
207 206
208static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, 207static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
@@ -227,26 +226,38 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
227 * additional tcp options in the timestamp. 226 * additional tcp options in the timestamp.
228 * This extracts these options from the timestamp echo. 227 * This extracts these options from the timestamp echo.
229 * 228 *
230 * The lowest 4 bits are for snd_wscale 229 * The lowest 4 bits store snd_wscale.
231 * The next 4 lsb are for rcv_wscale 230 * next 2 bits indicate SACK and ECN support.
232 * The next lsb is for sack_ok 231 *
232 * return false if we decode an option that should not be.
233 */ 233 */
234void cookie_check_timestamp(struct tcp_options_received *tcp_opt) 234bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
235{ 235{
236 /* echoed timestamp, 9 lowest bits contain options */ 236 /* echoed timestamp, lowest bits contain options */
237 u32 options = tcp_opt->rcv_tsecr & TSMASK; 237 u32 options = tcp_opt->rcv_tsecr & TSMASK;
238 238
239 tcp_opt->snd_wscale = options & 0xf; 239 if (!tcp_opt->saw_tstamp) {
240 options >>= 4; 240 tcp_clear_options(tcp_opt);
241 tcp_opt->rcv_wscale = options & 0xf; 241 return true;
242 }
243
244 if (!sysctl_tcp_timestamps)
245 return false;
242 246
243 tcp_opt->sack_ok = (options >> 4) & 0x1; 247 tcp_opt->sack_ok = (options >> 4) & 0x1;
248 *ecn_ok = (options >> 5) & 1;
249 if (*ecn_ok && !sysctl_tcp_ecn)
250 return false;
251
252 if (tcp_opt->sack_ok && !sysctl_tcp_sack)
253 return false;
244 254
245 if (tcp_opt->sack_ok) 255 if ((options & 0xf) == 0xf)
246 tcp_sack_reset(tcp_opt); 256 return true; /* no window scaling */
247 257
248 if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) 258 tcp_opt->wscale_ok = 1;
249 tcp_opt->wscale_ok = 1; 259 tcp_opt->snd_wscale = options & 0xf;
260 return sysctl_tcp_window_scaling != 0;
250} 261}
251EXPORT_SYMBOL(cookie_check_timestamp); 262EXPORT_SYMBOL(cookie_check_timestamp);
252 263
@@ -265,8 +276,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
265 int mss; 276 int mss;
266 struct rtable *rt; 277 struct rtable *rt;
267 __u8 rcv_wscale; 278 __u8 rcv_wscale;
279 bool ecn_ok;
268 280
269 if (!sysctl_tcp_syncookies || !th->ack) 281 if (!sysctl_tcp_syncookies || !th->ack || th->rst)
270 goto out; 282 goto out;
271 283
272 if (tcp_synq_no_recent_overflow(sk) || 284 if (tcp_synq_no_recent_overflow(sk) ||
@@ -281,8 +293,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
281 memset(&tcp_opt, 0, sizeof(tcp_opt)); 293 memset(&tcp_opt, 0, sizeof(tcp_opt));
282 tcp_parse_options(skb, &tcp_opt, &hash_location, 0); 294 tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
283 295
284 if (tcp_opt.saw_tstamp) 296 if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
285 cookie_check_timestamp(&tcp_opt); 297 goto out;
286 298
287 ret = NULL; 299 ret = NULL;
288 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 300 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
@@ -298,9 +310,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
298 ireq->rmt_port = th->source; 310 ireq->rmt_port = th->source;
299 ireq->loc_addr = ip_hdr(skb)->daddr; 311 ireq->loc_addr = ip_hdr(skb)->daddr;
300 ireq->rmt_addr = ip_hdr(skb)->saddr; 312 ireq->rmt_addr = ip_hdr(skb)->saddr;
301 ireq->ecn_ok = 0; 313 ireq->ecn_ok = ecn_ok;
302 ireq->snd_wscale = tcp_opt.snd_wscale; 314 ireq->snd_wscale = tcp_opt.snd_wscale;
303 ireq->rcv_wscale = tcp_opt.rcv_wscale;
304 ireq->sack_ok = tcp_opt.sack_ok; 315 ireq->sack_ok = tcp_opt.sack_ok;
305 ireq->wscale_ok = tcp_opt.wscale_ok; 316 ireq->wscale_ok = tcp_opt.wscale_ok;
306 ireq->tstamp_ok = tcp_opt.saw_tstamp; 317 ireq->tstamp_ok = tcp_opt.saw_tstamp;
@@ -354,15 +365,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
354 } 365 }
355 366
356 /* Try to redo what tcp_v4_send_synack did. */ 367 /* Try to redo what tcp_v4_send_synack did. */
357 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); 368 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
358 369
359 tcp_select_initial_window(tcp_full_space(sk), req->mss, 370 tcp_select_initial_window(tcp_full_space(sk), req->mss,
360 &req->rcv_wnd, &req->window_clamp, 371 &req->rcv_wnd, &req->window_clamp,
361 ireq->wscale_ok, &rcv_wscale, 372 ireq->wscale_ok, &rcv_wscale,
362 dst_metric(&rt->u.dst, RTAX_INITRWND)); 373 dst_metric(&rt->dst, RTAX_INITRWND));
363 374
364 ireq->rcv_wscale = rcv_wscale; 375 ireq->rcv_wscale = rcv_wscale;
365 376
366 ret = get_cookie_sock(sk, skb, req, &rt->u.dst); 377 ret = get_cookie_sock(sk, skb, req, &rt->dst);
367out: return ret; 378out: return ret;
368} 379}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 65afeaec15b7..176e11aaea77 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -315,7 +315,6 @@ struct tcp_splice_state {
315 * is strict, actions are advisory and have some latency. 315 * is strict, actions are advisory and have some latency.
316 */ 316 */
317int tcp_memory_pressure __read_mostly; 317int tcp_memory_pressure __read_mostly;
318
319EXPORT_SYMBOL(tcp_memory_pressure); 318EXPORT_SYMBOL(tcp_memory_pressure);
320 319
321void tcp_enter_memory_pressure(struct sock *sk) 320void tcp_enter_memory_pressure(struct sock *sk)
@@ -325,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk)
325 tcp_memory_pressure = 1; 324 tcp_memory_pressure = 1;
326 } 325 }
327} 326}
328
329EXPORT_SYMBOL(tcp_enter_memory_pressure); 327EXPORT_SYMBOL(tcp_enter_memory_pressure);
330 328
331/* Convert seconds to retransmits based on initial and max timeout */ 329/* Convert seconds to retransmits based on initial and max timeout */
@@ -460,6 +458,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
460 } 458 }
461 return mask; 459 return mask;
462} 460}
461EXPORT_SYMBOL(tcp_poll);
463 462
464int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) 463int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
465{ 464{
@@ -508,10 +507,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
508 507
509 return put_user(answ, (int __user *)arg); 508 return put_user(answ, (int __user *)arg);
510} 509}
510EXPORT_SYMBOL(tcp_ioctl);
511 511
512static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) 512static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
513{ 513{
514 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 514 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
515 tp->pushed_seq = tp->write_seq; 515 tp->pushed_seq = tp->write_seq;
516} 516}
517 517
@@ -527,7 +527,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
527 527
528 skb->csum = 0; 528 skb->csum = 0;
529 tcb->seq = tcb->end_seq = tp->write_seq; 529 tcb->seq = tcb->end_seq = tp->write_seq;
530 tcb->flags = TCPCB_FLAG_ACK; 530 tcb->flags = TCPHDR_ACK;
531 tcb->sacked = 0; 531 tcb->sacked = 0;
532 skb_header_release(skb); 532 skb_header_release(skb);
533 tcp_add_write_queue_tail(sk, skb); 533 tcp_add_write_queue_tail(sk, skb);
@@ -676,6 +676,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
676 676
677 return ret; 677 return ret;
678} 678}
679EXPORT_SYMBOL(tcp_splice_read);
679 680
680struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) 681struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
681{ 682{
@@ -816,7 +817,7 @@ new_segment:
816 skb_shinfo(skb)->gso_segs = 0; 817 skb_shinfo(skb)->gso_segs = 0;
817 818
818 if (!copied) 819 if (!copied)
819 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; 820 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
820 821
821 copied += copy; 822 copied += copy;
822 poffset += copy; 823 poffset += copy;
@@ -857,15 +858,15 @@ out_err:
857 return sk_stream_error(sk, flags, err); 858 return sk_stream_error(sk, flags, err);
858} 859}
859 860
860ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, 861int tcp_sendpage(struct sock *sk, struct page *page, int offset,
861 size_t size, int flags) 862 size_t size, int flags)
862{ 863{
863 ssize_t res; 864 ssize_t res;
864 struct sock *sk = sock->sk;
865 865
866 if (!(sk->sk_route_caps & NETIF_F_SG) || 866 if (!(sk->sk_route_caps & NETIF_F_SG) ||
867 !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) 867 !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
868 return sock_no_sendpage(sock, page, offset, size, flags); 868 return sock_no_sendpage(sk->sk_socket, page, offset, size,
869 flags);
869 870
870 lock_sock(sk); 871 lock_sock(sk);
871 TCP_CHECK_TIMER(sk); 872 TCP_CHECK_TIMER(sk);
@@ -874,6 +875,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
874 release_sock(sk); 875 release_sock(sk);
875 return res; 876 return res;
876} 877}
878EXPORT_SYMBOL(tcp_sendpage);
877 879
878#define TCP_PAGE(sk) (sk->sk_sndmsg_page) 880#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
879#define TCP_OFF(sk) (sk->sk_sndmsg_off) 881#define TCP_OFF(sk) (sk->sk_sndmsg_off)
@@ -898,10 +900,9 @@ static inline int select_size(struct sock *sk, int sg)
898 return tmp; 900 return tmp;
899} 901}
900 902
901int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, 903int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
902 size_t size) 904 size_t size)
903{ 905{
904 struct sock *sk = sock->sk;
905 struct iovec *iov; 906 struct iovec *iov;
906 struct tcp_sock *tp = tcp_sk(sk); 907 struct tcp_sock *tp = tcp_sk(sk);
907 struct sk_buff *skb; 908 struct sk_buff *skb;
@@ -1062,7 +1063,7 @@ new_segment:
1062 } 1063 }
1063 1064
1064 if (!copied) 1065 if (!copied)
1065 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; 1066 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
1066 1067
1067 tp->write_seq += copy; 1068 tp->write_seq += copy;
1068 TCP_SKB_CB(skb)->end_seq += copy; 1069 TCP_SKB_CB(skb)->end_seq += copy;
@@ -1122,6 +1123,7 @@ out_err:
1122 release_sock(sk); 1123 release_sock(sk);
1123 return err; 1124 return err;
1124} 1125}
1126EXPORT_SYMBOL(tcp_sendmsg);
1125 1127
1126/* 1128/*
1127 * Handle reading urgent data. BSD has very simple semantics for 1129 * Handle reading urgent data. BSD has very simple semantics for
@@ -1381,6 +1383,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1381 tcp_cleanup_rbuf(sk, copied); 1383 tcp_cleanup_rbuf(sk, copied);
1382 return copied; 1384 return copied;
1383} 1385}
1386EXPORT_SYMBOL(tcp_read_sock);
1384 1387
1385/* 1388/*
1386 * This routine copies from a sock struct into the user buffer. 1389 * This routine copies from a sock struct into the user buffer.
@@ -1775,6 +1778,7 @@ recv_urg:
1775 err = tcp_recv_urg(sk, msg, len, flags); 1778 err = tcp_recv_urg(sk, msg, len, flags);
1776 goto out; 1779 goto out;
1777} 1780}
1781EXPORT_SYMBOL(tcp_recvmsg);
1778 1782
1779void tcp_set_state(struct sock *sk, int state) 1783void tcp_set_state(struct sock *sk, int state)
1780{ 1784{
@@ -1867,6 +1871,7 @@ void tcp_shutdown(struct sock *sk, int how)
1867 tcp_send_fin(sk); 1871 tcp_send_fin(sk);
1868 } 1872 }
1869} 1873}
1874EXPORT_SYMBOL(tcp_shutdown);
1870 1875
1871void tcp_close(struct sock *sk, long timeout) 1876void tcp_close(struct sock *sk, long timeout)
1872{ 1877{
@@ -1899,6 +1904,10 @@ void tcp_close(struct sock *sk, long timeout)
1899 1904
1900 sk_mem_reclaim(sk); 1905 sk_mem_reclaim(sk);
1901 1906
1907 /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
1908 if (sk->sk_state == TCP_CLOSE)
1909 goto adjudge_to_death;
1910
1902 /* As outlined in RFC 2525, section 2.17, we send a RST here because 1911 /* As outlined in RFC 2525, section 2.17, we send a RST here because
1903 * data was lost. To witness the awful effects of the old behavior of 1912 * data was lost. To witness the awful effects of the old behavior of
1904 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk 1913 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
@@ -2026,6 +2035,7 @@ out:
2026 local_bh_enable(); 2035 local_bh_enable();
2027 sock_put(sk); 2036 sock_put(sk);
2028} 2037}
2038EXPORT_SYMBOL(tcp_close);
2029 2039
2030/* These states need RST on ABORT according to RFC793 */ 2040/* These states need RST on ABORT according to RFC793 */
2031 2041
@@ -2099,6 +2109,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2099 sk->sk_error_report(sk); 2109 sk->sk_error_report(sk);
2100 return err; 2110 return err;
2101} 2111}
2112EXPORT_SYMBOL(tcp_disconnect);
2102 2113
2103/* 2114/*
2104 * Socket option code for TCP. 2115 * Socket option code for TCP.
@@ -2176,6 +2187,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2176 GFP_KERNEL); 2187 GFP_KERNEL);
2177 if (cvp == NULL) 2188 if (cvp == NULL)
2178 return -ENOMEM; 2189 return -ENOMEM;
2190
2191 kref_init(&cvp->kref);
2179 } 2192 }
2180 lock_sock(sk); 2193 lock_sock(sk);
2181 tp->rx_opt.cookie_in_always = 2194 tp->rx_opt.cookie_in_always =
@@ -2190,12 +2203,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2190 */ 2203 */
2191 kref_put(&tp->cookie_values->kref, 2204 kref_put(&tp->cookie_values->kref,
2192 tcp_cookie_values_release); 2205 tcp_cookie_values_release);
2193 kref_init(&cvp->kref);
2194 tp->cookie_values = cvp;
2195 } else { 2206 } else {
2196 cvp = tp->cookie_values; 2207 cvp = tp->cookie_values;
2197 } 2208 }
2198 } 2209 }
2210
2199 if (cvp != NULL) { 2211 if (cvp != NULL) {
2200 cvp->cookie_desired = ctd.tcpct_cookie_desired; 2212 cvp->cookie_desired = ctd.tcpct_cookie_desired;
2201 2213
@@ -2209,6 +2221,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2209 cvp->s_data_desired = ctd.tcpct_s_data_desired; 2221 cvp->s_data_desired = ctd.tcpct_s_data_desired;
2210 cvp->s_data_constant = 0; /* false */ 2222 cvp->s_data_constant = 0; /* false */
2211 } 2223 }
2224
2225 tp->cookie_values = cvp;
2212 } 2226 }
2213 release_sock(sk); 2227 release_sock(sk);
2214 return err; 2228 return err;
@@ -2397,6 +2411,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
2397 optval, optlen); 2411 optval, optlen);
2398 return do_tcp_setsockopt(sk, level, optname, optval, optlen); 2412 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2399} 2413}
2414EXPORT_SYMBOL(tcp_setsockopt);
2400 2415
2401#ifdef CONFIG_COMPAT 2416#ifdef CONFIG_COMPAT
2402int compat_tcp_setsockopt(struct sock *sk, int level, int optname, 2417int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
@@ -2407,7 +2422,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
2407 optval, optlen); 2422 optval, optlen);
2408 return do_tcp_setsockopt(sk, level, optname, optval, optlen); 2423 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2409} 2424}
2410
2411EXPORT_SYMBOL(compat_tcp_setsockopt); 2425EXPORT_SYMBOL(compat_tcp_setsockopt);
2412#endif 2426#endif
2413 2427
@@ -2473,7 +2487,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2473 2487
2474 info->tcpi_total_retrans = tp->total_retrans; 2488 info->tcpi_total_retrans = tp->total_retrans;
2475} 2489}
2476
2477EXPORT_SYMBOL_GPL(tcp_get_info); 2490EXPORT_SYMBOL_GPL(tcp_get_info);
2478 2491
2479static int do_tcp_getsockopt(struct sock *sk, int level, 2492static int do_tcp_getsockopt(struct sock *sk, int level,
@@ -2591,6 +2604,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2591 return -EFAULT; 2604 return -EFAULT;
2592 return 0; 2605 return 0;
2593 } 2606 }
2607 case TCP_THIN_LINEAR_TIMEOUTS:
2608 val = tp->thin_lto;
2609 break;
2610 case TCP_THIN_DUPACK:
2611 val = tp->thin_dupack;
2612 break;
2594 default: 2613 default:
2595 return -ENOPROTOOPT; 2614 return -ENOPROTOOPT;
2596 } 2615 }
@@ -2612,6 +2631,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2612 optval, optlen); 2631 optval, optlen);
2613 return do_tcp_getsockopt(sk, level, optname, optval, optlen); 2632 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2614} 2633}
2634EXPORT_SYMBOL(tcp_getsockopt);
2615 2635
2616#ifdef CONFIG_COMPAT 2636#ifdef CONFIG_COMPAT
2617int compat_tcp_getsockopt(struct sock *sk, int level, int optname, 2637int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
@@ -2622,7 +2642,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2622 optval, optlen); 2642 optval, optlen);
2623 return do_tcp_getsockopt(sk, level, optname, optval, optlen); 2643 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2624} 2644}
2625
2626EXPORT_SYMBOL(compat_tcp_getsockopt); 2645EXPORT_SYMBOL(compat_tcp_getsockopt);
2627#endif 2646#endif
2628 2647
@@ -2859,7 +2878,6 @@ void tcp_free_md5sig_pool(void)
2859 if (pool) 2878 if (pool)
2860 __tcp_free_md5sig_pool(pool); 2879 __tcp_free_md5sig_pool(pool);
2861} 2880}
2862
2863EXPORT_SYMBOL(tcp_free_md5sig_pool); 2881EXPORT_SYMBOL(tcp_free_md5sig_pool);
2864 2882
2865static struct tcp_md5sig_pool * __percpu * 2883static struct tcp_md5sig_pool * __percpu *
@@ -2935,7 +2953,6 @@ retry:
2935 } 2953 }
2936 return pool; 2954 return pool;
2937} 2955}
2938
2939EXPORT_SYMBOL(tcp_alloc_md5sig_pool); 2956EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2940 2957
2941 2958
@@ -2959,7 +2976,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
2959 spin_unlock(&tcp_md5sig_pool_lock); 2976 spin_unlock(&tcp_md5sig_pool_lock);
2960 2977
2961 if (p) 2978 if (p)
2962 return *per_cpu_ptr(p, smp_processor_id()); 2979 return *this_cpu_ptr(p);
2963 2980
2964 local_bh_enable(); 2981 local_bh_enable();
2965 return NULL; 2982 return NULL;
@@ -2987,7 +3004,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
2987 th->check = old_checksum; 3004 th->check = old_checksum;
2988 return err; 3005 return err;
2989} 3006}
2990
2991EXPORT_SYMBOL(tcp_md5_hash_header); 3007EXPORT_SYMBOL(tcp_md5_hash_header);
2992 3008
2993int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, 3009int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
@@ -3000,6 +3016,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3000 const unsigned head_data_len = skb_headlen(skb) > header_len ? 3016 const unsigned head_data_len = skb_headlen(skb) > header_len ?
3001 skb_headlen(skb) - header_len : 0; 3017 skb_headlen(skb) - header_len : 0;
3002 const struct skb_shared_info *shi = skb_shinfo(skb); 3018 const struct skb_shared_info *shi = skb_shinfo(skb);
3019 struct sk_buff *frag_iter;
3003 3020
3004 sg_init_table(&sg, 1); 3021 sg_init_table(&sg, 1);
3005 3022
@@ -3014,9 +3031,12 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3014 return 1; 3031 return 1;
3015 } 3032 }
3016 3033
3034 skb_walk_frags(skb, frag_iter)
3035 if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
3036 return 1;
3037
3017 return 0; 3038 return 0;
3018} 3039}
3019
3020EXPORT_SYMBOL(tcp_md5_hash_skb_data); 3040EXPORT_SYMBOL(tcp_md5_hash_skb_data);
3021 3041
3022int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) 3042int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
@@ -3026,7 +3046,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
3026 sg_init_one(&sg, key->key, key->keylen); 3046 sg_init_one(&sg, key->key, key->keylen);
3027 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); 3047 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
3028} 3048}
3029
3030EXPORT_SYMBOL(tcp_md5_hash_key); 3049EXPORT_SYMBOL(tcp_md5_hash_key);
3031 3050
3032#endif 3051#endif
@@ -3298,16 +3317,3 @@ void __init tcp_init(void)
3298 tcp_secret_retiring = &tcp_secret_two; 3317 tcp_secret_retiring = &tcp_secret_two;
3299 tcp_secret_secondary = &tcp_secret_two; 3318 tcp_secret_secondary = &tcp_secret_two;
3300} 3319}
3301
3302EXPORT_SYMBOL(tcp_close);
3303EXPORT_SYMBOL(tcp_disconnect);
3304EXPORT_SYMBOL(tcp_getsockopt);
3305EXPORT_SYMBOL(tcp_ioctl);
3306EXPORT_SYMBOL(tcp_poll);
3307EXPORT_SYMBOL(tcp_read_sock);
3308EXPORT_SYMBOL(tcp_recvmsg);
3309EXPORT_SYMBOL(tcp_sendmsg);
3310EXPORT_SYMBOL(tcp_splice_read);
3311EXPORT_SYMBOL(tcp_sendpage);
3312EXPORT_SYMBOL(tcp_setsockopt);
3313EXPORT_SYMBOL(tcp_shutdown);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 548d575e6cc6..e663b78a2ef6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
78int sysctl_tcp_sack __read_mostly = 1; 78int sysctl_tcp_sack __read_mostly = 1;
79int sysctl_tcp_fack __read_mostly = 1; 79int sysctl_tcp_fack __read_mostly = 1;
80int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; 80int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
81EXPORT_SYMBOL(sysctl_tcp_reordering);
81int sysctl_tcp_ecn __read_mostly = 2; 82int sysctl_tcp_ecn __read_mostly = 2;
83EXPORT_SYMBOL(sysctl_tcp_ecn);
82int sysctl_tcp_dsack __read_mostly = 1; 84int sysctl_tcp_dsack __read_mostly = 1;
83int sysctl_tcp_app_win __read_mostly = 31; 85int sysctl_tcp_app_win __read_mostly = 31;
84int sysctl_tcp_adv_win_scale __read_mostly = 2; 86int sysctl_tcp_adv_win_scale __read_mostly = 2;
87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
85 88
86int sysctl_tcp_stdurg __read_mostly; 89int sysctl_tcp_stdurg __read_mostly;
87int sysctl_tcp_rfc1337 __read_mostly; 90int sysctl_tcp_rfc1337 __read_mostly;
@@ -419,6 +422,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
419 422
420 inet_csk(sk)->icsk_ack.rcv_mss = hint; 423 inet_csk(sk)->icsk_ack.rcv_mss = hint;
421} 424}
425EXPORT_SYMBOL(tcp_initialize_rcv_mss);
422 426
423/* Receiver "autotuning" code. 427/* Receiver "autotuning" code.
424 * 428 *
@@ -2938,6 +2942,7 @@ void tcp_simple_retransmit(struct sock *sk)
2938 } 2942 }
2939 tcp_xmit_retransmit_queue(sk); 2943 tcp_xmit_retransmit_queue(sk);
2940} 2944}
2945EXPORT_SYMBOL(tcp_simple_retransmit);
2941 2946
2942/* Process an event, which can update packets-in-flight not trivially. 2947/* Process an event, which can update packets-in-flight not trivially.
2943 * Main goal of this function is to calculate new estimate for left_out, 2948 * Main goal of this function is to calculate new estimate for left_out,
@@ -3286,7 +3291,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3286 * connection startup slow start one packet too 3291 * connection startup slow start one packet too
3287 * quickly. This is severely frowned upon behavior. 3292 * quickly. This is severely frowned upon behavior.
3288 */ 3293 */
3289 if (!(scb->flags & TCPCB_FLAG_SYN)) { 3294 if (!(scb->flags & TCPHDR_SYN)) {
3290 flag |= FLAG_DATA_ACKED; 3295 flag |= FLAG_DATA_ACKED;
3291 } else { 3296 } else {
3292 flag |= FLAG_SYN_ACKED; 3297 flag |= FLAG_SYN_ACKED;
@@ -3858,6 +3863,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3858 } 3863 }
3859 } 3864 }
3860} 3865}
3866EXPORT_SYMBOL(tcp_parse_options);
3861 3867
3862static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) 3868static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3863{ 3869{
@@ -3924,13 +3930,14 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th)
3924 if (opsize < 2 || opsize > length) 3930 if (opsize < 2 || opsize > length)
3925 return NULL; 3931 return NULL;
3926 if (opcode == TCPOPT_MD5SIG) 3932 if (opcode == TCPOPT_MD5SIG)
3927 return ptr; 3933 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3928 } 3934 }
3929 ptr += opsize - 2; 3935 ptr += opsize - 2;
3930 length -= opsize; 3936 length -= opsize;
3931 } 3937 }
3932 return NULL; 3938 return NULL;
3933} 3939}
3940EXPORT_SYMBOL(tcp_parse_md5sig_option);
3934#endif 3941#endif
3935 3942
3936static inline void tcp_store_ts_recent(struct tcp_sock *tp) 3943static inline void tcp_store_ts_recent(struct tcp_sock *tp)
@@ -5432,6 +5439,7 @@ discard:
5432 __kfree_skb(skb); 5439 __kfree_skb(skb);
5433 return 0; 5440 return 0;
5434} 5441}
5442EXPORT_SYMBOL(tcp_rcv_established);
5435 5443
5436static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5444static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5437 struct tcphdr *th, unsigned len) 5445 struct tcphdr *th, unsigned len)
@@ -5931,14 +5939,4 @@ discard:
5931 } 5939 }
5932 return 0; 5940 return 0;
5933} 5941}
5934
5935EXPORT_SYMBOL(sysctl_tcp_ecn);
5936EXPORT_SYMBOL(sysctl_tcp_reordering);
5937EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
5938EXPORT_SYMBOL(tcp_parse_options);
5939#ifdef CONFIG_TCP_MD5SIG
5940EXPORT_SYMBOL(tcp_parse_md5sig_option);
5941#endif
5942EXPORT_SYMBOL(tcp_rcv_established);
5943EXPORT_SYMBOL(tcp_rcv_state_process); 5942EXPORT_SYMBOL(tcp_rcv_state_process);
5944EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fe193e53af44..020766292bb0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,6 +84,7 @@
84 84
85int sysctl_tcp_tw_reuse __read_mostly; 85int sysctl_tcp_tw_reuse __read_mostly;
86int sysctl_tcp_low_latency __read_mostly; 86int sysctl_tcp_low_latency __read_mostly;
87EXPORT_SYMBOL(sysctl_tcp_low_latency);
87 88
88 89
89#ifdef CONFIG_TCP_MD5SIG 90#ifdef CONFIG_TCP_MD5SIG
@@ -100,6 +101,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
100#endif 101#endif
101 102
102struct inet_hashinfo tcp_hashinfo; 103struct inet_hashinfo tcp_hashinfo;
104EXPORT_SYMBOL(tcp_hashinfo);
103 105
104static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) 106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
105{ 107{
@@ -139,7 +141,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
139 141
140 return 0; 142 return 0;
141} 143}
142
143EXPORT_SYMBOL_GPL(tcp_twsk_unique); 144EXPORT_SYMBOL_GPL(tcp_twsk_unique);
144 145
145/* This will initiate an outgoing connection. */ 146/* This will initiate an outgoing connection. */
@@ -204,10 +205,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
204 * TIME-WAIT * and initialize rx_opt.ts_recent from it, 205 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
205 * when trying new connection. 206 * when trying new connection.
206 */ 207 */
207 if (peer != NULL && 208 if (peer) {
208 (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { 209 inet_peer_refcheck(peer);
209 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 210 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
210 tp->rx_opt.ts_recent = peer->tcp_ts; 211 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
212 tp->rx_opt.ts_recent = peer->tcp_ts;
213 }
211 } 214 }
212 } 215 }
213 216
@@ -237,7 +240,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
237 240
238 /* OK, now commit destination to socket. */ 241 /* OK, now commit destination to socket. */
239 sk->sk_gso_type = SKB_GSO_TCPV4; 242 sk->sk_gso_type = SKB_GSO_TCPV4;
240 sk_setup_caps(sk, &rt->u.dst); 243 sk_setup_caps(sk, &rt->dst);
241 244
242 if (!tp->write_seq) 245 if (!tp->write_seq)
243 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 246 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
@@ -265,6 +268,7 @@ failure:
265 inet->inet_dport = 0; 268 inet->inet_dport = 0;
266 return err; 269 return err;
267} 270}
271EXPORT_SYMBOL(tcp_v4_connect);
268 272
269/* 273/*
270 * This routine does path mtu discovery as defined in RFC1191. 274 * This routine does path mtu discovery as defined in RFC1191.
@@ -543,6 +547,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
543 547
544 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 548 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
545} 549}
550EXPORT_SYMBOL(tcp_v4_send_check);
546 551
547int tcp_v4_gso_send_check(struct sk_buff *skb) 552int tcp_v4_gso_send_check(struct sk_buff *skb)
548{ 553{
@@ -793,19 +798,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
793 kfree(inet_rsk(req)->opt); 798 kfree(inet_rsk(req)->opt);
794} 799}
795 800
796#ifdef CONFIG_SYN_COOKIES 801static void syn_flood_warning(const struct sk_buff *skb)
797static void syn_flood_warning(struct sk_buff *skb)
798{ 802{
799 static unsigned long warntime; 803 const char *msg;
800 804
801 if (time_after(jiffies, (warntime + HZ * 60))) { 805#ifdef CONFIG_SYN_COOKIES
802 warntime = jiffies; 806 if (sysctl_tcp_syncookies)
803 printk(KERN_INFO 807 msg = "Sending cookies";
804 "possible SYN flooding on port %d. Sending cookies.\n", 808 else
805 ntohs(tcp_hdr(skb)->dest));
806 }
807}
808#endif 809#endif
810 msg = "Dropping request";
811
812 pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
813 ntohs(tcp_hdr(skb)->dest), msg);
814}
809 815
810/* 816/*
811 * Save and compile IPv4 options into the request_sock if needed. 817 * Save and compile IPv4 options into the request_sock if needed.
@@ -857,7 +863,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
857{ 863{
858 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); 864 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
859} 865}
860
861EXPORT_SYMBOL(tcp_v4_md5_lookup); 866EXPORT_SYMBOL(tcp_v4_md5_lookup);
862 867
863static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 868static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
@@ -924,7 +929,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
924 } 929 }
925 return 0; 930 return 0;
926} 931}
927
928EXPORT_SYMBOL(tcp_v4_md5_do_add); 932EXPORT_SYMBOL(tcp_v4_md5_do_add);
929 933
930static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, 934static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
@@ -962,7 +966,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
962 } 966 }
963 return -ENOENT; 967 return -ENOENT;
964} 968}
965
966EXPORT_SYMBOL(tcp_v4_md5_do_del); 969EXPORT_SYMBOL(tcp_v4_md5_do_del);
967 970
968static void tcp_v4_clear_md5_list(struct sock *sk) 971static void tcp_v4_clear_md5_list(struct sock *sk)
@@ -1135,7 +1138,6 @@ clear_hash_noput:
1135 memset(md5_hash, 0, 16); 1138 memset(md5_hash, 0, 16);
1136 return 1; 1139 return 1;
1137} 1140}
1138
1139EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1141EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1140 1142
1141static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) 1143static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
@@ -1243,6 +1245,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1243 * evidently real one. 1245 * evidently real one.
1244 */ 1246 */
1245 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1247 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1248 if (net_ratelimit())
1249 syn_flood_warning(skb);
1246#ifdef CONFIG_SYN_COOKIES 1250#ifdef CONFIG_SYN_COOKIES
1247 if (sysctl_tcp_syncookies) { 1251 if (sysctl_tcp_syncookies) {
1248 want_cookie = 1; 1252 want_cookie = 1;
@@ -1323,15 +1327,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1323 if (security_inet_conn_request(sk, skb, req)) 1327 if (security_inet_conn_request(sk, skb, req))
1324 goto drop_and_free; 1328 goto drop_and_free;
1325 1329
1326 if (!want_cookie) 1330 if (!want_cookie || tmp_opt.tstamp_ok)
1327 TCP_ECN_create_request(req, tcp_hdr(skb)); 1331 TCP_ECN_create_request(req, tcp_hdr(skb));
1328 1332
1329 if (want_cookie) { 1333 if (want_cookie) {
1330#ifdef CONFIG_SYN_COOKIES
1331 syn_flood_warning(skb);
1332 req->cookie_ts = tmp_opt.tstamp_ok;
1333#endif
1334 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1334 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1335 req->cookie_ts = tmp_opt.tstamp_ok;
1335 } else if (!isn) { 1336 } else if (!isn) {
1336 struct inet_peer *peer = NULL; 1337 struct inet_peer *peer = NULL;
1337 1338
@@ -1349,6 +1350,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1349 (dst = inet_csk_route_req(sk, req)) != NULL && 1350 (dst = inet_csk_route_req(sk, req)) != NULL &&
1350 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1351 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1351 peer->v4daddr == saddr) { 1352 peer->v4daddr == saddr) {
1353 inet_peer_refcheck(peer);
1352 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1354 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1353 (s32)(peer->tcp_ts - req->ts_recent) > 1355 (s32)(peer->tcp_ts - req->ts_recent) >
1354 TCP_PAWS_WINDOW) { 1356 TCP_PAWS_WINDOW) {
@@ -1393,6 +1395,7 @@ drop_and_free:
1393drop: 1395drop:
1394 return 0; 1396 return 0;
1395} 1397}
1398EXPORT_SYMBOL(tcp_v4_conn_request);
1396 1399
1397 1400
1398/* 1401/*
@@ -1478,6 +1481,7 @@ exit:
1478 dst_release(dst); 1481 dst_release(dst);
1479 return NULL; 1482 return NULL;
1480} 1483}
1484EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1481 1485
1482static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 1486static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1483{ 1487{
@@ -1504,7 +1508,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1504 } 1508 }
1505 1509
1506#ifdef CONFIG_SYN_COOKIES 1510#ifdef CONFIG_SYN_COOKIES
1507 if (!th->rst && !th->syn && th->ack) 1511 if (!th->syn)
1508 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 1512 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1509#endif 1513#endif
1510 return sk; 1514 return sk;
@@ -1607,6 +1611,7 @@ csum_err:
1607 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 1611 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1608 goto discard; 1612 goto discard;
1609} 1613}
1614EXPORT_SYMBOL(tcp_v4_do_rcv);
1610 1615
1611/* 1616/*
1612 * From tcp_input.c 1617 * From tcp_input.c
@@ -1793,6 +1798,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
1793 1798
1794 return 0; 1799 return 0;
1795} 1800}
1801EXPORT_SYMBOL(tcp_v4_remember_stamp);
1796 1802
1797int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) 1803int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1798{ 1804{
@@ -1832,6 +1838,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
1832 .compat_getsockopt = compat_ip_getsockopt, 1838 .compat_getsockopt = compat_ip_getsockopt,
1833#endif 1839#endif
1834}; 1840};
1841EXPORT_SYMBOL(ipv4_specific);
1835 1842
1836#ifdef CONFIG_TCP_MD5SIG 1843#ifdef CONFIG_TCP_MD5SIG
1837static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1844static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
@@ -1960,7 +1967,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
1960 1967
1961 percpu_counter_dec(&tcp_sockets_allocated); 1968 percpu_counter_dec(&tcp_sockets_allocated);
1962} 1969}
1963
1964EXPORT_SYMBOL(tcp_v4_destroy_sock); 1970EXPORT_SYMBOL(tcp_v4_destroy_sock);
1965 1971
1966#ifdef CONFIG_PROC_FS 1972#ifdef CONFIG_PROC_FS
@@ -1978,6 +1984,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1978 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 1984 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1979} 1985}
1980 1986
1987/*
1988 * Get next listener socket follow cur. If cur is NULL, get first socket
1989 * starting from bucket given in st->bucket; when st->bucket is zero the
1990 * very first socket in the hash table is returned.
1991 */
1981static void *listening_get_next(struct seq_file *seq, void *cur) 1992static void *listening_get_next(struct seq_file *seq, void *cur)
1982{ 1993{
1983 struct inet_connection_sock *icsk; 1994 struct inet_connection_sock *icsk;
@@ -1988,14 +1999,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1988 struct net *net = seq_file_net(seq); 1999 struct net *net = seq_file_net(seq);
1989 2000
1990 if (!sk) { 2001 if (!sk) {
1991 st->bucket = 0; 2002 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1992 ilb = &tcp_hashinfo.listening_hash[0];
1993 spin_lock_bh(&ilb->lock); 2003 spin_lock_bh(&ilb->lock);
1994 sk = sk_nulls_head(&ilb->head); 2004 sk = sk_nulls_head(&ilb->head);
2005 st->offset = 0;
1995 goto get_sk; 2006 goto get_sk;
1996 } 2007 }
1997 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2008 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1998 ++st->num; 2009 ++st->num;
2010 ++st->offset;
1999 2011
2000 if (st->state == TCP_SEQ_STATE_OPENREQ) { 2012 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2001 struct request_sock *req = cur; 2013 struct request_sock *req = cur;
@@ -2010,6 +2022,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2010 } 2022 }
2011 req = req->dl_next; 2023 req = req->dl_next;
2012 } 2024 }
2025 st->offset = 0;
2013 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 2026 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2014 break; 2027 break;
2015get_req: 2028get_req:
@@ -2045,6 +2058,7 @@ start_req:
2045 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2058 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2046 } 2059 }
2047 spin_unlock_bh(&ilb->lock); 2060 spin_unlock_bh(&ilb->lock);
2061 st->offset = 0;
2048 if (++st->bucket < INET_LHTABLE_SIZE) { 2062 if (++st->bucket < INET_LHTABLE_SIZE) {
2049 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2063 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2050 spin_lock_bh(&ilb->lock); 2064 spin_lock_bh(&ilb->lock);
@@ -2058,7 +2072,12 @@ out:
2058 2072
2059static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 2073static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2060{ 2074{
2061 void *rc = listening_get_next(seq, NULL); 2075 struct tcp_iter_state *st = seq->private;
2076 void *rc;
2077
2078 st->bucket = 0;
2079 st->offset = 0;
2080 rc = listening_get_next(seq, NULL);
2062 2081
2063 while (rc && *pos) { 2082 while (rc && *pos) {
2064 rc = listening_get_next(seq, rc); 2083 rc = listening_get_next(seq, rc);
@@ -2073,13 +2092,18 @@ static inline int empty_bucket(struct tcp_iter_state *st)
2073 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 2092 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2074} 2093}
2075 2094
2095/*
2096 * Get first established socket starting from bucket given in st->bucket.
2097 * If st->bucket is zero, the very first socket in the hash is returned.
2098 */
2076static void *established_get_first(struct seq_file *seq) 2099static void *established_get_first(struct seq_file *seq)
2077{ 2100{
2078 struct tcp_iter_state *st = seq->private; 2101 struct tcp_iter_state *st = seq->private;
2079 struct net *net = seq_file_net(seq); 2102 struct net *net = seq_file_net(seq);
2080 void *rc = NULL; 2103 void *rc = NULL;
2081 2104
2082 for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 2105 st->offset = 0;
2106 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2083 struct sock *sk; 2107 struct sock *sk;
2084 struct hlist_nulls_node *node; 2108 struct hlist_nulls_node *node;
2085 struct inet_timewait_sock *tw; 2109 struct inet_timewait_sock *tw;
@@ -2124,6 +2148,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
2124 struct net *net = seq_file_net(seq); 2148 struct net *net = seq_file_net(seq);
2125 2149
2126 ++st->num; 2150 ++st->num;
2151 ++st->offset;
2127 2152
2128 if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 2153 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2129 tw = cur; 2154 tw = cur;
@@ -2140,6 +2165,7 @@ get_tw:
2140 st->state = TCP_SEQ_STATE_ESTABLISHED; 2165 st->state = TCP_SEQ_STATE_ESTABLISHED;
2141 2166
2142 /* Look for next non empty bucket */ 2167 /* Look for next non empty bucket */
2168 st->offset = 0;
2143 while (++st->bucket <= tcp_hashinfo.ehash_mask && 2169 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2144 empty_bucket(st)) 2170 empty_bucket(st))
2145 ; 2171 ;
@@ -2167,7 +2193,11 @@ out:
2167 2193
2168static void *established_get_idx(struct seq_file *seq, loff_t pos) 2194static void *established_get_idx(struct seq_file *seq, loff_t pos)
2169{ 2195{
2170 void *rc = established_get_first(seq); 2196 struct tcp_iter_state *st = seq->private;
2197 void *rc;
2198
2199 st->bucket = 0;
2200 rc = established_get_first(seq);
2171 2201
2172 while (rc && pos) { 2202 while (rc && pos) {
2173 rc = established_get_next(seq, rc); 2203 rc = established_get_next(seq, rc);
@@ -2192,24 +2222,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2192 return rc; 2222 return rc;
2193} 2223}
2194 2224
2225static void *tcp_seek_last_pos(struct seq_file *seq)
2226{
2227 struct tcp_iter_state *st = seq->private;
2228 int offset = st->offset;
2229 int orig_num = st->num;
2230 void *rc = NULL;
2231
2232 switch (st->state) {
2233 case TCP_SEQ_STATE_OPENREQ:
2234 case TCP_SEQ_STATE_LISTENING:
2235 if (st->bucket >= INET_LHTABLE_SIZE)
2236 break;
2237 st->state = TCP_SEQ_STATE_LISTENING;
2238 rc = listening_get_next(seq, NULL);
2239 while (offset-- && rc)
2240 rc = listening_get_next(seq, rc);
2241 if (rc)
2242 break;
2243 st->bucket = 0;
2244 /* Fallthrough */
2245 case TCP_SEQ_STATE_ESTABLISHED:
2246 case TCP_SEQ_STATE_TIME_WAIT:
2247 st->state = TCP_SEQ_STATE_ESTABLISHED;
2248 if (st->bucket > tcp_hashinfo.ehash_mask)
2249 break;
2250 rc = established_get_first(seq);
2251 while (offset-- && rc)
2252 rc = established_get_next(seq, rc);
2253 }
2254
2255 st->num = orig_num;
2256
2257 return rc;
2258}
2259
2195static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 2260static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2196{ 2261{
2197 struct tcp_iter_state *st = seq->private; 2262 struct tcp_iter_state *st = seq->private;
2263 void *rc;
2264
2265 if (*pos && *pos == st->last_pos) {
2266 rc = tcp_seek_last_pos(seq);
2267 if (rc)
2268 goto out;
2269 }
2270
2198 st->state = TCP_SEQ_STATE_LISTENING; 2271 st->state = TCP_SEQ_STATE_LISTENING;
2199 st->num = 0; 2272 st->num = 0;
2200 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2273 st->bucket = 0;
2274 st->offset = 0;
2275 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2276
2277out:
2278 st->last_pos = *pos;
2279 return rc;
2201} 2280}
2202 2281
2203static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2282static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2204{ 2283{
2284 struct tcp_iter_state *st = seq->private;
2205 void *rc = NULL; 2285 void *rc = NULL;
2206 struct tcp_iter_state *st;
2207 2286
2208 if (v == SEQ_START_TOKEN) { 2287 if (v == SEQ_START_TOKEN) {
2209 rc = tcp_get_idx(seq, 0); 2288 rc = tcp_get_idx(seq, 0);
2210 goto out; 2289 goto out;
2211 } 2290 }
2212 st = seq->private;
2213 2291
2214 switch (st->state) { 2292 switch (st->state) {
2215 case TCP_SEQ_STATE_OPENREQ: 2293 case TCP_SEQ_STATE_OPENREQ:
@@ -2217,6 +2295,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2217 rc = listening_get_next(seq, v); 2295 rc = listening_get_next(seq, v);
2218 if (!rc) { 2296 if (!rc) {
2219 st->state = TCP_SEQ_STATE_ESTABLISHED; 2297 st->state = TCP_SEQ_STATE_ESTABLISHED;
2298 st->bucket = 0;
2299 st->offset = 0;
2220 rc = established_get_first(seq); 2300 rc = established_get_first(seq);
2221 } 2301 }
2222 break; 2302 break;
@@ -2227,6 +2307,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2227 } 2307 }
2228out: 2308out:
2229 ++*pos; 2309 ++*pos;
2310 st->last_pos = *pos;
2230 return rc; 2311 return rc;
2231} 2312}
2232 2313
@@ -2265,6 +2346,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
2265 2346
2266 s = ((struct seq_file *)file->private_data)->private; 2347 s = ((struct seq_file *)file->private_data)->private;
2267 s->family = afinfo->family; 2348 s->family = afinfo->family;
2349 s->last_pos = 0;
2268 return 0; 2350 return 0;
2269} 2351}
2270 2352
@@ -2288,11 +2370,13 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2288 rc = -ENOMEM; 2370 rc = -ENOMEM;
2289 return rc; 2371 return rc;
2290} 2372}
2373EXPORT_SYMBOL(tcp_proc_register);
2291 2374
2292void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 2375void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2293{ 2376{
2294 proc_net_remove(net, afinfo->name); 2377 proc_net_remove(net, afinfo->name);
2295} 2378}
2379EXPORT_SYMBOL(tcp_proc_unregister);
2296 2380
2297static void get_openreq4(struct sock *sk, struct request_sock *req, 2381static void get_openreq4(struct sock *sk, struct request_sock *req,
2298 struct seq_file *f, int i, int uid, int *len) 2382 struct seq_file *f, int i, int uid, int *len)
@@ -2516,6 +2600,8 @@ struct proto tcp_prot = {
2516 .setsockopt = tcp_setsockopt, 2600 .setsockopt = tcp_setsockopt,
2517 .getsockopt = tcp_getsockopt, 2601 .getsockopt = tcp_getsockopt,
2518 .recvmsg = tcp_recvmsg, 2602 .recvmsg = tcp_recvmsg,
2603 .sendmsg = tcp_sendmsg,
2604 .sendpage = tcp_sendpage,
2519 .backlog_rcv = tcp_v4_do_rcv, 2605 .backlog_rcv = tcp_v4_do_rcv,
2520 .hash = inet_hash, 2606 .hash = inet_hash,
2521 .unhash = inet_unhash, 2607 .unhash = inet_unhash,
@@ -2534,11 +2620,13 @@ struct proto tcp_prot = {
2534 .twsk_prot = &tcp_timewait_sock_ops, 2620 .twsk_prot = &tcp_timewait_sock_ops,
2535 .rsk_prot = &tcp_request_sock_ops, 2621 .rsk_prot = &tcp_request_sock_ops,
2536 .h.hashinfo = &tcp_hashinfo, 2622 .h.hashinfo = &tcp_hashinfo,
2623 .no_autobind = true,
2537#ifdef CONFIG_COMPAT 2624#ifdef CONFIG_COMPAT
2538 .compat_setsockopt = compat_tcp_setsockopt, 2625 .compat_setsockopt = compat_tcp_setsockopt,
2539 .compat_getsockopt = compat_tcp_getsockopt, 2626 .compat_getsockopt = compat_tcp_getsockopt,
2540#endif 2627#endif
2541}; 2628};
2629EXPORT_SYMBOL(tcp_prot);
2542 2630
2543 2631
2544static int __net_init tcp_sk_init(struct net *net) 2632static int __net_init tcp_sk_init(struct net *net)
@@ -2569,20 +2657,3 @@ void __init tcp_v4_init(void)
2569 if (register_pernet_subsys(&tcp_sk_ops)) 2657 if (register_pernet_subsys(&tcp_sk_ops))
2570 panic("Failed to create the TCP control socket.\n"); 2658 panic("Failed to create the TCP control socket.\n");
2571} 2659}
2572
2573EXPORT_SYMBOL(ipv4_specific);
2574EXPORT_SYMBOL(tcp_hashinfo);
2575EXPORT_SYMBOL(tcp_prot);
2576EXPORT_SYMBOL(tcp_v4_conn_request);
2577EXPORT_SYMBOL(tcp_v4_connect);
2578EXPORT_SYMBOL(tcp_v4_do_rcv);
2579EXPORT_SYMBOL(tcp_v4_remember_stamp);
2580EXPORT_SYMBOL(tcp_v4_send_check);
2581EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2582
2583#ifdef CONFIG_PROC_FS
2584EXPORT_SYMBOL(tcp_proc_register);
2585EXPORT_SYMBOL(tcp_proc_unregister);
2586#endif
2587EXPORT_SYMBOL(sysctl_tcp_low_latency);
2588
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 794c2e122a41..f25b56cb85cb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -47,7 +47,6 @@ struct inet_timewait_death_row tcp_death_row = {
47 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, 47 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
48 (unsigned long)&tcp_death_row), 48 (unsigned long)&tcp_death_row),
49}; 49};
50
51EXPORT_SYMBOL_GPL(tcp_death_row); 50EXPORT_SYMBOL_GPL(tcp_death_row);
52 51
53static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) 52static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
@@ -262,6 +261,7 @@ kill:
262 inet_twsk_put(tw); 261 inet_twsk_put(tw);
263 return TCP_TW_SUCCESS; 262 return TCP_TW_SUCCESS;
264} 263}
264EXPORT_SYMBOL(tcp_timewait_state_process);
265 265
266/* 266/*
267 * Move a socket to time-wait or dead fin-wait-2 state. 267 * Move a socket to time-wait or dead fin-wait-2 state.
@@ -362,7 +362,6 @@ void tcp_twsk_destructor(struct sock *sk)
362 tcp_free_md5sig_pool(); 362 tcp_free_md5sig_pool();
363#endif 363#endif
364} 364}
365
366EXPORT_SYMBOL_GPL(tcp_twsk_destructor); 365EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
367 366
368static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, 367static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
@@ -510,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
510 } 509 }
511 return newsk; 510 return newsk;
512} 511}
512EXPORT_SYMBOL(tcp_create_openreq_child);
513 513
514/* 514/*
515 * Process an incoming packet for SYN_RECV sockets represented 515 * Process an incoming packet for SYN_RECV sockets represented
@@ -706,6 +706,7 @@ embryonic_reset:
706 inet_csk_reqsk_queue_drop(sk, req, prev); 706 inet_csk_reqsk_queue_drop(sk, req, prev);
707 return NULL; 707 return NULL;
708} 708}
709EXPORT_SYMBOL(tcp_check_req);
709 710
710/* 711/*
711 * Queue segment on the new socket if the new socket is active, 712 * Queue segment on the new socket if the new socket is active,
@@ -737,8 +738,4 @@ int tcp_child_process(struct sock *parent, struct sock *child,
737 sock_put(child); 738 sock_put(child);
738 return ret; 739 return ret;
739} 740}
740
741EXPORT_SYMBOL(tcp_check_req);
742EXPORT_SYMBOL(tcp_child_process); 741EXPORT_SYMBOL(tcp_child_process);
743EXPORT_SYMBOL(tcp_create_openreq_child);
744EXPORT_SYMBOL(tcp_timewait_state_process);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7ed9dc1042d1..de3bd8458588 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -247,6 +247,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
247 /* Set the clamp no higher than max representable value */ 247 /* Set the clamp no higher than max representable value */
248 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); 248 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
249} 249}
250EXPORT_SYMBOL(tcp_select_initial_window);
250 251
251/* Chose a new window to advertise, update state in tcp_sock for the 252/* Chose a new window to advertise, update state in tcp_sock for the
252 * socket, and return result with RFC1323 scaling applied. The return 253 * socket, and return result with RFC1323 scaling applied. The return
@@ -294,9 +295,9 @@ static u16 tcp_select_window(struct sock *sk)
294/* Packet ECN state for a SYN-ACK */ 295/* Packet ECN state for a SYN-ACK */
295static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) 296static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
296{ 297{
297 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; 298 TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR;
298 if (!(tp->ecn_flags & TCP_ECN_OK)) 299 if (!(tp->ecn_flags & TCP_ECN_OK))
299 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; 300 TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE;
300} 301}
301 302
302/* Packet ECN state for a SYN. */ 303/* Packet ECN state for a SYN. */
@@ -306,7 +307,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
306 307
307 tp->ecn_flags = 0; 308 tp->ecn_flags = 0;
308 if (sysctl_tcp_ecn == 1) { 309 if (sysctl_tcp_ecn == 1) {
309 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR; 310 TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR;
310 tp->ecn_flags = TCP_ECN_OK; 311 tp->ecn_flags = TCP_ECN_OK;
311 } 312 }
312} 313}
@@ -361,7 +362,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
361 skb_shinfo(skb)->gso_type = 0; 362 skb_shinfo(skb)->gso_type = 0;
362 363
363 TCP_SKB_CB(skb)->seq = seq; 364 TCP_SKB_CB(skb)->seq = seq;
364 if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN)) 365 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
365 seq++; 366 seq++;
366 TCP_SKB_CB(skb)->end_seq = seq; 367 TCP_SKB_CB(skb)->end_seq = seq;
367} 368}
@@ -820,7 +821,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
820 tcb = TCP_SKB_CB(skb); 821 tcb = TCP_SKB_CB(skb);
821 memset(&opts, 0, sizeof(opts)); 822 memset(&opts, 0, sizeof(opts));
822 823
823 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) 824 if (unlikely(tcb->flags & TCPHDR_SYN))
824 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); 825 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
825 else 826 else
826 tcp_options_size = tcp_established_options(sk, skb, &opts, 827 tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -843,7 +844,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
843 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | 844 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
844 tcb->flags); 845 tcb->flags);
845 846
846 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { 847 if (unlikely(tcb->flags & TCPHDR_SYN)) {
847 /* RFC1323: The window in SYN & SYN/ACK segments 848 /* RFC1323: The window in SYN & SYN/ACK segments
848 * is never scaled. 849 * is never scaled.
849 */ 850 */
@@ -866,7 +867,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
866 } 867 }
867 868
868 tcp_options_write((__be32 *)(th + 1), tp, &opts); 869 tcp_options_write((__be32 *)(th + 1), tp, &opts);
869 if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) 870 if (likely((tcb->flags & TCPHDR_SYN) == 0))
870 TCP_ECN_send(sk, skb, tcp_header_size); 871 TCP_ECN_send(sk, skb, tcp_header_size);
871 872
872#ifdef CONFIG_TCP_MD5SIG 873#ifdef CONFIG_TCP_MD5SIG
@@ -880,7 +881,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
880 881
881 icsk->icsk_af_ops->send_check(sk, skb); 882 icsk->icsk_af_ops->send_check(sk, skb);
882 883
883 if (likely(tcb->flags & TCPCB_FLAG_ACK)) 884 if (likely(tcb->flags & TCPHDR_ACK))
884 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 885 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
885 886
886 if (skb->len != tcp_header_size) 887 if (skb->len != tcp_header_size)
@@ -1023,7 +1024,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1023 1024
1024 /* PSH and FIN should only be set in the second packet. */ 1025 /* PSH and FIN should only be set in the second packet. */
1025 flags = TCP_SKB_CB(skb)->flags; 1026 flags = TCP_SKB_CB(skb)->flags;
1026 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); 1027 TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1027 TCP_SKB_CB(buff)->flags = flags; 1028 TCP_SKB_CB(buff)->flags = flags;
1028 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 1029 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1029 1030
@@ -1189,6 +1190,7 @@ void tcp_mtup_init(struct sock *sk)
1189 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); 1190 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1190 icsk->icsk_mtup.probe_size = 0; 1191 icsk->icsk_mtup.probe_size = 0;
1191} 1192}
1193EXPORT_SYMBOL(tcp_mtup_init);
1192 1194
1193/* This function synchronize snd mss to current pmtu/exthdr set. 1195/* This function synchronize snd mss to current pmtu/exthdr set.
1194 1196
@@ -1232,6 +1234,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1232 1234
1233 return mss_now; 1235 return mss_now;
1234} 1236}
1237EXPORT_SYMBOL(tcp_sync_mss);
1235 1238
1236/* Compute the current effective MSS, taking SACKs and IP options, 1239/* Compute the current effective MSS, taking SACKs and IP options,
1237 * and even PMTU discovery events into account. 1240 * and even PMTU discovery events into account.
@@ -1328,8 +1331,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1328 u32 in_flight, cwnd; 1331 u32 in_flight, cwnd;
1329 1332
1330 /* Don't be strict about the congestion window for the final FIN. */ 1333 /* Don't be strict about the congestion window for the final FIN. */
1331 if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 1334 if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1)
1332 tcp_skb_pcount(skb) == 1)
1333 return 1; 1335 return 1;
1334 1336
1335 in_flight = tcp_packets_in_flight(tp); 1337 in_flight = tcp_packets_in_flight(tp);
@@ -1398,7 +1400,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
1398 * Nagle can be ignored during F-RTO too (see RFC4138). 1400 * Nagle can be ignored during F-RTO too (see RFC4138).
1399 */ 1401 */
1400 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || 1402 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1401 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) 1403 (TCP_SKB_CB(skb)->flags & TCPHDR_FIN))
1402 return 1; 1404 return 1;
1403 1405
1404 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) 1406 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1461,7 +1463,7 @@ int tcp_may_send_now(struct sock *sk)
1461 * packet has never been sent out before (and thus is not cloned). 1463 * packet has never been sent out before (and thus is not cloned).
1462 */ 1464 */
1463static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, 1465static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1464 unsigned int mss_now) 1466 unsigned int mss_now, gfp_t gfp)
1465{ 1467{
1466 struct sk_buff *buff; 1468 struct sk_buff *buff;
1467 int nlen = skb->len - len; 1469 int nlen = skb->len - len;
@@ -1471,7 +1473,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1471 if (skb->len != skb->data_len) 1473 if (skb->len != skb->data_len)
1472 return tcp_fragment(sk, skb, len, mss_now); 1474 return tcp_fragment(sk, skb, len, mss_now);
1473 1475
1474 buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC); 1476 buff = sk_stream_alloc_skb(sk, 0, gfp);
1475 if (unlikely(buff == NULL)) 1477 if (unlikely(buff == NULL))
1476 return -ENOMEM; 1478 return -ENOMEM;
1477 1479
@@ -1487,7 +1489,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1487 1489
1488 /* PSH and FIN should only be set in the second packet. */ 1490 /* PSH and FIN should only be set in the second packet. */
1489 flags = TCP_SKB_CB(skb)->flags; 1491 flags = TCP_SKB_CB(skb)->flags;
1490 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); 1492 TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1491 TCP_SKB_CB(buff)->flags = flags; 1493 TCP_SKB_CB(buff)->flags = flags;
1492 1494
1493 /* This packet was never sent out yet, so no SACK bits. */ 1495 /* This packet was never sent out yet, so no SACK bits. */
@@ -1518,7 +1520,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1518 const struct inet_connection_sock *icsk = inet_csk(sk); 1520 const struct inet_connection_sock *icsk = inet_csk(sk);
1519 u32 send_win, cong_win, limit, in_flight; 1521 u32 send_win, cong_win, limit, in_flight;
1520 1522
1521 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) 1523 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
1522 goto send_now; 1524 goto send_now;
1523 1525
1524 if (icsk->icsk_ca_state != TCP_CA_Open) 1526 if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1644,7 +1646,7 @@ static int tcp_mtu_probe(struct sock *sk)
1644 1646
1645 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 1647 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1646 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 1648 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1647 TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; 1649 TCP_SKB_CB(nskb)->flags = TCPHDR_ACK;
1648 TCP_SKB_CB(nskb)->sacked = 0; 1650 TCP_SKB_CB(nskb)->sacked = 0;
1649 nskb->csum = 0; 1651 nskb->csum = 0;
1650 nskb->ip_summed = skb->ip_summed; 1652 nskb->ip_summed = skb->ip_summed;
@@ -1669,7 +1671,7 @@ static int tcp_mtu_probe(struct sock *sk)
1669 sk_wmem_free_skb(sk, skb); 1671 sk_wmem_free_skb(sk, skb);
1670 } else { 1672 } else {
1671 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & 1673 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
1672 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1674 ~(TCPHDR_FIN|TCPHDR_PSH);
1673 if (!skb_shinfo(skb)->nr_frags) { 1675 if (!skb_shinfo(skb)->nr_frags) {
1674 skb_pull(skb, copy); 1676 skb_pull(skb, copy);
1675 if (skb->ip_summed != CHECKSUM_PARTIAL) 1677 if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -1769,7 +1771,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1769 cwnd_quota); 1771 cwnd_quota);
1770 1772
1771 if (skb->len > limit && 1773 if (skb->len > limit &&
1772 unlikely(tso_fragment(sk, skb, limit, mss_now))) 1774 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
1773 break; 1775 break;
1774 1776
1775 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1777 TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2020,7 +2022,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2020 2022
2021 if (!sysctl_tcp_retrans_collapse) 2023 if (!sysctl_tcp_retrans_collapse)
2022 return; 2024 return;
2023 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) 2025 if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN)
2024 return; 2026 return;
2025 2027
2026 tcp_for_write_queue_from_safe(skb, tmp, sk) { 2028 tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2112,7 +2114,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2112 * since it is cheap to do so and saves bytes on the network. 2114 * since it is cheap to do so and saves bytes on the network.
2113 */ 2115 */
2114 if (skb->len > 0 && 2116 if (skb->len > 0 &&
2115 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 2117 (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) &&
2116 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 2118 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2117 if (!pskb_trim(skb, 0)) { 2119 if (!pskb_trim(skb, 0)) {
2118 /* Reuse, even though it does some unnecessary work */ 2120 /* Reuse, even though it does some unnecessary work */
@@ -2304,7 +2306,7 @@ void tcp_send_fin(struct sock *sk)
2304 mss_now = tcp_current_mss(sk); 2306 mss_now = tcp_current_mss(sk);
2305 2307
2306 if (tcp_send_head(sk) != NULL) { 2308 if (tcp_send_head(sk) != NULL) {
2307 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; 2309 TCP_SKB_CB(skb)->flags |= TCPHDR_FIN;
2308 TCP_SKB_CB(skb)->end_seq++; 2310 TCP_SKB_CB(skb)->end_seq++;
2309 tp->write_seq++; 2311 tp->write_seq++;
2310 } else { 2312 } else {
@@ -2321,7 +2323,7 @@ void tcp_send_fin(struct sock *sk)
2321 skb_reserve(skb, MAX_TCP_HEADER); 2323 skb_reserve(skb, MAX_TCP_HEADER);
2322 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2324 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2323 tcp_init_nondata_skb(skb, tp->write_seq, 2325 tcp_init_nondata_skb(skb, tp->write_seq,
2324 TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); 2326 TCPHDR_ACK | TCPHDR_FIN);
2325 tcp_queue_skb(sk, skb); 2327 tcp_queue_skb(sk, skb);
2326 } 2328 }
2327 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); 2329 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2346,7 +2348,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2346 /* Reserve space for headers and prepare control bits. */ 2348 /* Reserve space for headers and prepare control bits. */
2347 skb_reserve(skb, MAX_TCP_HEADER); 2349 skb_reserve(skb, MAX_TCP_HEADER);
2348 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), 2350 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2349 TCPCB_FLAG_ACK | TCPCB_FLAG_RST); 2351 TCPHDR_ACK | TCPHDR_RST);
2350 /* Send it off. */ 2352 /* Send it off. */
2351 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2353 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2352 if (tcp_transmit_skb(sk, skb, 0, priority)) 2354 if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2366,11 +2368,11 @@ int tcp_send_synack(struct sock *sk)
2366 struct sk_buff *skb; 2368 struct sk_buff *skb;
2367 2369
2368 skb = tcp_write_queue_head(sk); 2370 skb = tcp_write_queue_head(sk);
2369 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) { 2371 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) {
2370 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); 2372 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2371 return -EFAULT; 2373 return -EFAULT;
2372 } 2374 }
2373 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) { 2375 if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) {
2374 if (skb_cloned(skb)) { 2376 if (skb_cloned(skb)) {
2375 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2377 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2376 if (nskb == NULL) 2378 if (nskb == NULL)
@@ -2384,7 +2386,7 @@ int tcp_send_synack(struct sock *sk)
2384 skb = nskb; 2386 skb = nskb;
2385 } 2387 }
2386 2388
2387 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK; 2389 TCP_SKB_CB(skb)->flags |= TCPHDR_ACK;
2388 TCP_ECN_send_synack(tcp_sk(sk), skb); 2390 TCP_ECN_send_synack(tcp_sk(sk), skb);
2389 } 2391 }
2390 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2392 TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2463,7 +2465,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2463 * not even correctly set) 2465 * not even correctly set)
2464 */ 2466 */
2465 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, 2467 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2466 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); 2468 TCPHDR_SYN | TCPHDR_ACK);
2467 2469
2468 if (OPTION_COOKIE_EXTENSION & opts.options) { 2470 if (OPTION_COOKIE_EXTENSION & opts.options) {
2469 if (s_data_desired) { 2471 if (s_data_desired) {
@@ -2518,6 +2520,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2518 2520
2519 return skb; 2521 return skb;
2520} 2522}
2523EXPORT_SYMBOL(tcp_make_synack);
2521 2524
2522/* Do all connect socket setups that can be done AF independent. */ 2525/* Do all connect socket setups that can be done AF independent. */
2523static void tcp_connect_init(struct sock *sk) 2526static void tcp_connect_init(struct sock *sk)
@@ -2595,7 +2598,7 @@ int tcp_connect(struct sock *sk)
2595 skb_reserve(buff, MAX_TCP_HEADER); 2598 skb_reserve(buff, MAX_TCP_HEADER);
2596 2599
2597 tp->snd_nxt = tp->write_seq; 2600 tp->snd_nxt = tp->write_seq;
2598 tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN); 2601 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
2599 TCP_ECN_send_syn(sk, buff); 2602 TCP_ECN_send_syn(sk, buff);
2600 2603
2601 /* Send it off. */ 2604 /* Send it off. */
@@ -2620,6 +2623,7 @@ int tcp_connect(struct sock *sk)
2620 inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 2623 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2621 return 0; 2624 return 0;
2622} 2625}
2626EXPORT_SYMBOL(tcp_connect);
2623 2627
2624/* Send out a delayed ack, the caller does the policy checking 2628/* Send out a delayed ack, the caller does the policy checking
2625 * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() 2629 * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check()
@@ -2701,7 +2705,7 @@ void tcp_send_ack(struct sock *sk)
2701 2705
2702 /* Reserve space for headers and prepare control bits. */ 2706 /* Reserve space for headers and prepare control bits. */
2703 skb_reserve(buff, MAX_TCP_HEADER); 2707 skb_reserve(buff, MAX_TCP_HEADER);
2704 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK); 2708 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
2705 2709
2706 /* Send it off, this clears delayed acks for us. */ 2710 /* Send it off, this clears delayed acks for us. */
2707 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2711 TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2735,7 +2739,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2735 * end to send an ack. Don't queue or clone SKB, just 2739 * end to send an ack. Don't queue or clone SKB, just
2736 * send it. 2740 * send it.
2737 */ 2741 */
2738 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK); 2742 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
2739 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2743 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2740 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2744 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2741} 2745}
@@ -2765,13 +2769,13 @@ int tcp_write_wakeup(struct sock *sk)
2765 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || 2769 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2766 skb->len > mss) { 2770 skb->len > mss) {
2767 seg_size = min(seg_size, mss); 2771 seg_size = min(seg_size, mss);
2768 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2772 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
2769 if (tcp_fragment(sk, skb, seg_size, mss)) 2773 if (tcp_fragment(sk, skb, seg_size, mss))
2770 return -1; 2774 return -1;
2771 } else if (!tcp_skb_pcount(skb)) 2775 } else if (!tcp_skb_pcount(skb))
2772 tcp_set_skb_tso_segs(sk, skb, mss); 2776 tcp_set_skb_tso_segs(sk, skb, mss);
2773 2777
2774 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2778 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
2775 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2779 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2776 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2780 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2777 if (!err) 2781 if (!err)
@@ -2824,10 +2828,3 @@ void tcp_send_probe0(struct sock *sk)
2824 TCP_RTO_MAX); 2828 TCP_RTO_MAX);
2825 } 2829 }
2826} 2830}
2827
2828EXPORT_SYMBOL(tcp_select_initial_window);
2829EXPORT_SYMBOL(tcp_connect);
2830EXPORT_SYMBOL(tcp_make_synack);
2831EXPORT_SYMBOL(tcp_simple_retransmit);
2832EXPORT_SYMBOL(tcp_sync_mss);
2833EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 440a5c6004f6..808bb920c9f5 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -41,7 +41,6 @@ void tcp_init_xmit_timers(struct sock *sk)
41 inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, 41 inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
42 &tcp_keepalive_timer); 42 &tcp_keepalive_timer);
43} 43}
44
45EXPORT_SYMBOL(tcp_init_xmit_timers); 44EXPORT_SYMBOL(tcp_init_xmit_timers);
46 45
47static void tcp_write_err(struct sock *sk) 46static void tcp_write_err(struct sock *sk)
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 3b3813cc80b9..59186ca7808a 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -48,7 +48,6 @@ err:
48 48
49 return ret; 49 return ret;
50} 50}
51
52EXPORT_SYMBOL(xfrm4_tunnel_register); 51EXPORT_SYMBOL(xfrm4_tunnel_register);
53 52
54int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) 53int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
@@ -72,7 +71,6 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
72 71
73 return ret; 72 return ret;
74} 73}
75
76EXPORT_SYMBOL(xfrm4_tunnel_deregister); 74EXPORT_SYMBOL(xfrm4_tunnel_deregister);
77 75
78static int tunnel4_rcv(struct sk_buff *skb) 76static int tunnel4_rcv(struct sk_buff *skb)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index eec4ff456e33..32e0bef60d0a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -914,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
914 !sock_flag(sk, SOCK_BROADCAST)) 914 !sock_flag(sk, SOCK_BROADCAST))
915 goto out; 915 goto out;
916 if (connected) 916 if (connected)
917 sk_dst_set(sk, dst_clone(&rt->u.dst)); 917 sk_dst_set(sk, dst_clone(&rt->dst));
918 } 918 }
919 919
920 if (msg->msg_flags&MSG_CONFIRM) 920 if (msg->msg_flags&MSG_CONFIRM)
@@ -978,7 +978,7 @@ out:
978 return err; 978 return err;
979 979
980do_confirm: 980do_confirm:
981 dst_confirm(&rt->u.dst); 981 dst_confirm(&rt->dst);
982 if (!(msg->msg_flags&MSG_PROBE) || len) 982 if (!(msg->msg_flags&MSG_PROBE) || len)
983 goto back_from_confirm; 983 goto back_from_confirm;
984 err = 0; 984 err = 0;
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 6610bf76369f..ab76aa928fa9 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -58,6 +58,7 @@ struct proto udplite_prot = {
58 .compat_getsockopt = compat_udp_getsockopt, 58 .compat_getsockopt = compat_udp_getsockopt,
59#endif 59#endif
60}; 60};
61EXPORT_SYMBOL(udplite_prot);
61 62
62static struct inet_protosw udplite4_protosw = { 63static struct inet_protosw udplite4_protosw = {
63 .type = SOCK_DGRAM, 64 .type = SOCK_DGRAM,
@@ -127,5 +128,3 @@ out_unregister_proto:
127out_register_err: 128out_register_err:
128 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); 129 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__);
129} 130}
130
131EXPORT_SYMBOL(udplite_prot);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad8fbb871aa0..06814b6216dc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -163,5 +163,4 @@ int xfrm4_rcv(struct sk_buff *skb)
163{ 163{
164 return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); 164 return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
165} 165}
166
167EXPORT_SYMBOL(xfrm4_rcv); 166EXPORT_SYMBOL(xfrm4_rcv);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 23883a48ebfb..869078d4eeb9 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
37 fl.fl4_src = saddr->a4; 37 fl.fl4_src = saddr->a4;
38 38
39 err = __ip_route_output_key(net, &rt, &fl); 39 err = __ip_route_output_key(net, &rt, &fl);
40 dst = &rt->u.dst; 40 dst = &rt->dst;
41 if (err) 41 if (err)
42 dst = ERR_PTR(err); 42 dst = ERR_PTR(err);
43 return dst; 43 return dst;