aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c68
-rw-r--r--net/ipv4/arp.c58
-rw-r--r--net/ipv4/cipso_ipv4.c1
-rw-r--r--net/ipv4/devinet.c35
-rw-r--r--net/ipv4/fib_frontend.c20
-rw-r--r--net/ipv4/fib_hash.c5
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_trie.c236
-rw-r--r--net/ipv4/icmp.c191
-rw-r--r--net/ipv4/igmp.c45
-rw-r--r--net/ipv4/inet_connection_sock.c38
-rw-r--r--net/ipv4/inet_fragment.c10
-rw-r--r--net/ipv4/inet_hashtables.c24
-rw-r--r--net/ipv4/inet_timewait_sock.c4
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c24
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_input.c19
-rw-r--r--net/ipv4/ip_options.c63
-rw-r--r--net/ipv4/ip_output.c28
-rw-r--r--net/ipv4/ip_sockglue.c13
-rw-r--r--net/ipv4/ipconfig.c11
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c23
-rw-r--r--net/ipv4/netfilter/ip_queue.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c18
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c18
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c556
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c12
-rw-r--r--net/ipv4/proc.c71
-rw-r--r--net/ipv4/raw.c47
-rw-r--r--net/ipv4/route.c252
-rw-r--r--net/ipv4/syncookies.c102
-rw-r--r--net/ipv4/sysctl_net_ipv4.c162
-rw-r--r--net/ipv4/tcp.c18
-rw-r--r--net/ipv4/tcp_cubic.c35
-rw-r--r--net/ipv4/tcp_input.c51
-rw-r--r--net/ipv4/tcp_ipv4.c191
-rw-r--r--net/ipv4/tcp_minisocks.c36
-rw-r--r--net/ipv4/tcp_output.c11
-rw-r--r--net/ipv4/tcp_timer.c5
-rw-r--r--net/ipv4/udp.c134
-rw-r--r--net/ipv4/udp_impl.h6
-rw-r--r--net/ipv4/udplite.c62
-rw-r--r--net/ipv4/xfrm4_policy.c2
52 files changed, 1661 insertions, 1086 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0d109504ed86..72ae8ed5a3d7 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -243,6 +243,23 @@ void build_ehash_secret(void)
243} 243}
244EXPORT_SYMBOL(build_ehash_secret); 244EXPORT_SYMBOL(build_ehash_secret);
245 245
246static inline int inet_netns_ok(struct net *net, int protocol)
247{
248 int hash;
249 struct net_protocol *ipprot;
250
251 if (net == &init_net)
252 return 1;
253
254 hash = protocol & (MAX_INET_PROTOS - 1);
255 ipprot = rcu_dereference(inet_protos[hash]);
256
257 if (ipprot == NULL)
258 /* raw IP is OK */
259 return 1;
260 return ipprot->netns_ok;
261}
262
246/* 263/*
247 * Create an inet socket. 264 * Create an inet socket.
248 */ 265 */
@@ -259,9 +276,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol)
259 int try_loading_module = 0; 276 int try_loading_module = 0;
260 int err; 277 int err;
261 278
262 if (net != &init_net)
263 return -EAFNOSUPPORT;
264
265 if (sock->type != SOCK_RAW && 279 if (sock->type != SOCK_RAW &&
266 sock->type != SOCK_DGRAM && 280 sock->type != SOCK_DGRAM &&
267 !inet_ehash_secret) 281 !inet_ehash_secret)
@@ -320,6 +334,10 @@ lookup_protocol:
320 if (answer->capability > 0 && !capable(answer->capability)) 334 if (answer->capability > 0 && !capable(answer->capability))
321 goto out_rcu_unlock; 335 goto out_rcu_unlock;
322 336
337 err = -EAFNOSUPPORT;
338 if (!inet_netns_ok(net, protocol))
339 goto out_rcu_unlock;
340
323 sock->ops = answer->ops; 341 sock->ops = answer->ops;
324 answer_prot = answer->prot; 342 answer_prot = answer->prot;
325 answer_no_check = answer->no_check; 343 answer_no_check = answer->no_check;
@@ -446,7 +464,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
446 if (addr_len < sizeof(struct sockaddr_in)) 464 if (addr_len < sizeof(struct sockaddr_in))
447 goto out; 465 goto out;
448 466
449 chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr); 467 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
450 468
451 /* Not specified by any standard per-se, however it breaks too 469 /* Not specified by any standard per-se, however it breaks too
452 * many applications when removed. It is unfortunate since 470 * many applications when removed. It is unfortunate since
@@ -784,6 +802,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
784{ 802{
785 struct sock *sk = sock->sk; 803 struct sock *sk = sock->sk;
786 int err = 0; 804 int err = 0;
805 struct net *net = sock_net(sk);
787 806
788 switch (cmd) { 807 switch (cmd) {
789 case SIOCGSTAMP: 808 case SIOCGSTAMP:
@@ -795,12 +814,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
795 case SIOCADDRT: 814 case SIOCADDRT:
796 case SIOCDELRT: 815 case SIOCDELRT:
797 case SIOCRTMSG: 816 case SIOCRTMSG:
798 err = ip_rt_ioctl(sk->sk_net, cmd, (void __user *)arg); 817 err = ip_rt_ioctl(net, cmd, (void __user *)arg);
799 break; 818 break;
800 case SIOCDARP: 819 case SIOCDARP:
801 case SIOCGARP: 820 case SIOCGARP:
802 case SIOCSARP: 821 case SIOCSARP:
803 err = arp_ioctl(sk->sk_net, cmd, (void __user *)arg); 822 err = arp_ioctl(net, cmd, (void __user *)arg);
804 break; 823 break;
805 case SIOCGIFADDR: 824 case SIOCGIFADDR:
806 case SIOCSIFADDR: 825 case SIOCSIFADDR:
@@ -813,7 +832,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
813 case SIOCSIFPFLAGS: 832 case SIOCSIFPFLAGS:
814 case SIOCGIFPFLAGS: 833 case SIOCGIFPFLAGS:
815 case SIOCSIFFLAGS: 834 case SIOCSIFFLAGS:
816 err = devinet_ioctl(cmd, (void __user *)arg); 835 err = devinet_ioctl(net, cmd, (void __user *)arg);
817 break; 836 break;
818 default: 837 default:
819 if (sk->sk_prot->ioctl) 838 if (sk->sk_prot->ioctl)
@@ -1059,7 +1078,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1059 if (sysctl_ip_dynaddr > 1) { 1078 if (sysctl_ip_dynaddr > 1) {
1060 printk(KERN_INFO "%s(): shifting inet->" 1079 printk(KERN_INFO "%s(): shifting inet->"
1061 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", 1080 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
1062 __FUNCTION__, 1081 __func__,
1063 NIPQUAD(old_saddr), 1082 NIPQUAD(old_saddr),
1064 NIPQUAD(new_saddr)); 1083 NIPQUAD(new_saddr));
1065 } 1084 }
@@ -1113,7 +1132,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1113 }; 1132 };
1114 1133
1115 security_sk_classify_flow(sk, &fl); 1134 security_sk_classify_flow(sk, &fl);
1116 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 0); 1135 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
1117} 1136}
1118 if (!err) 1137 if (!err)
1119 sk_setup_caps(sk, &rt->u.dst); 1138 sk_setup_caps(sk, &rt->u.dst);
@@ -1231,6 +1250,29 @@ out:
1231 return segs; 1250 return segs;
1232} 1251}
1233 1252
1253int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1254 unsigned short type, unsigned char protocol,
1255 struct net *net)
1256{
1257 struct socket *sock;
1258 int rc = sock_create_kern(family, type, protocol, &sock);
1259
1260 if (rc == 0) {
1261 *sk = sock->sk;
1262 (*sk)->sk_allocation = GFP_ATOMIC;
1263 /*
1264 * Unhash it so that IP input processing does not even see it,
1265 * we do not wish this socket to see incoming packets.
1266 */
1267 (*sk)->sk_prot->unhash(*sk);
1268
1269 sk_change_net(*sk, net);
1270 }
1271 return rc;
1272}
1273
1274EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1275
1234unsigned long snmp_fold_field(void *mib[], int offt) 1276unsigned long snmp_fold_field(void *mib[], int offt)
1235{ 1277{
1236 unsigned long res = 0; 1278 unsigned long res = 0;
@@ -1283,17 +1325,20 @@ static struct net_protocol tcp_protocol = {
1283 .gso_send_check = tcp_v4_gso_send_check, 1325 .gso_send_check = tcp_v4_gso_send_check,
1284 .gso_segment = tcp_tso_segment, 1326 .gso_segment = tcp_tso_segment,
1285 .no_policy = 1, 1327 .no_policy = 1,
1328 .netns_ok = 1,
1286}; 1329};
1287 1330
1288static struct net_protocol udp_protocol = { 1331static struct net_protocol udp_protocol = {
1289 .handler = udp_rcv, 1332 .handler = udp_rcv,
1290 .err_handler = udp_err, 1333 .err_handler = udp_err,
1291 .no_policy = 1, 1334 .no_policy = 1,
1335 .netns_ok = 1,
1292}; 1336};
1293 1337
1294static struct net_protocol icmp_protocol = { 1338static struct net_protocol icmp_protocol = {
1295 .handler = icmp_rcv, 1339 .handler = icmp_rcv,
1296 .no_policy = 1, 1340 .no_policy = 1,
1341 .netns_ok = 1,
1297}; 1342};
1298 1343
1299static int __init init_ipv4_mibs(void) 1344static int __init init_ipv4_mibs(void)
@@ -1414,7 +1459,7 @@ static int __init inet_init(void)
1414 1459
1415 ip_init(); 1460 ip_init();
1416 1461
1417 tcp_v4_init(&inet_family_ops); 1462 tcp_v4_init();
1418 1463
1419 /* Setup TCP slab cache for open requests. */ 1464 /* Setup TCP slab cache for open requests. */
1420 tcp_init(); 1465 tcp_init();
@@ -1429,7 +1474,8 @@ static int __init inet_init(void)
1429 * Set the ICMP layer up 1474 * Set the ICMP layer up
1430 */ 1475 */
1431 1476
1432 icmp_init(&inet_family_ops); 1477 if (icmp_init() < 0)
1478 panic("Failed to create the ICMP control socket.\n");
1433 1479
1434 /* 1480 /*
1435 * Initialise the multicast router 1481 * Initialise the multicast router
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8e17f65f4002..3ce2e137e7bc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -242,7 +242,7 @@ static int arp_constructor(struct neighbour *neigh)
242 return -EINVAL; 242 return -EINVAL;
243 } 243 }
244 244
245 neigh->type = inet_addr_type(&init_net, addr); 245 neigh->type = inet_addr_type(dev_net(dev), addr);
246 246
247 parms = in_dev->arp_parms; 247 parms = in_dev->arp_parms;
248 __neigh_parms_put(neigh->parms); 248 __neigh_parms_put(neigh->parms);
@@ -341,14 +341,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
341 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 341 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
342 default: 342 default:
343 case 0: /* By default announce any local IP */ 343 case 0: /* By default announce any local IP */
344 if (skb && inet_addr_type(&init_net, ip_hdr(skb)->saddr) == RTN_LOCAL) 344 if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL)
345 saddr = ip_hdr(skb)->saddr; 345 saddr = ip_hdr(skb)->saddr;
346 break; 346 break;
347 case 1: /* Restrict announcements of saddr in same subnet */ 347 case 1: /* Restrict announcements of saddr in same subnet */
348 if (!skb) 348 if (!skb)
349 break; 349 break;
350 saddr = ip_hdr(skb)->saddr; 350 saddr = ip_hdr(skb)->saddr;
351 if (inet_addr_type(&init_net, saddr) == RTN_LOCAL) { 351 if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) {
352 /* saddr should be known to target */ 352 /* saddr should be known to target */
353 if (inet_addr_onlink(in_dev, target, saddr)) 353 if (inet_addr_onlink(in_dev, target, saddr))
354 break; 354 break;
@@ -424,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
424 int flag = 0; 424 int flag = 0;
425 /*unsigned long now; */ 425 /*unsigned long now; */
426 426
427 if (ip_route_output_key(&init_net, &rt, &fl) < 0) 427 if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0)
428 return 1; 428 return 1;
429 if (rt->u.dst.dev != dev) { 429 if (rt->u.dst.dev != dev) {
430 NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); 430 NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
@@ -475,9 +475,9 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
475 return 1; 475 return 1;
476 } 476 }
477 477
478 paddr = ((struct rtable*)skb->dst)->rt_gateway; 478 paddr = skb->rtable->rt_gateway;
479 479
480 if (arp_set_predefined(inet_addr_type(&init_net, paddr), haddr, paddr, dev)) 480 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev))
481 return 0; 481 return 0;
482 482
483 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); 483 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
@@ -570,14 +570,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
570 * Allocate a buffer 570 * Allocate a buffer
571 */ 571 */
572 572
573 skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) 573 skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
574 + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
575 if (skb == NULL) 574 if (skb == NULL)
576 return NULL; 575 return NULL;
577 576
578 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 577 skb_reserve(skb, LL_RESERVED_SPACE(dev));
579 skb_reset_network_header(skb); 578 skb_reset_network_header(skb);
580 arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); 579 arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev));
581 skb->dev = dev; 580 skb->dev = dev;
582 skb->protocol = htons(ETH_P_ARP); 581 skb->protocol = htons(ETH_P_ARP);
583 if (src_hw == NULL) 582 if (src_hw == NULL)
@@ -710,6 +709,7 @@ static int arp_process(struct sk_buff *skb)
710 u16 dev_type = dev->type; 709 u16 dev_type = dev->type;
711 int addr_type; 710 int addr_type;
712 struct neighbour *n; 711 struct neighbour *n;
712 struct net *net = dev_net(dev);
713 713
714 /* arp_rcv below verifies the ARP header and verifies the device 714 /* arp_rcv below verifies the ARP header and verifies the device
715 * is ARP'able. 715 * is ARP'able.
@@ -805,7 +805,7 @@ static int arp_process(struct sk_buff *skb)
805 /* Special case: IPv4 duplicate address detection packet (RFC2131) */ 805 /* Special case: IPv4 duplicate address detection packet (RFC2131) */
806 if (sip == 0) { 806 if (sip == 0) {
807 if (arp->ar_op == htons(ARPOP_REQUEST) && 807 if (arp->ar_op == htons(ARPOP_REQUEST) &&
808 inet_addr_type(&init_net, tip) == RTN_LOCAL && 808 inet_addr_type(net, tip) == RTN_LOCAL &&
809 !arp_ignore(in_dev, sip, tip)) 809 !arp_ignore(in_dev, sip, tip))
810 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, 810 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
811 dev->dev_addr, sha); 811 dev->dev_addr, sha);
@@ -815,7 +815,7 @@ static int arp_process(struct sk_buff *skb)
815 if (arp->ar_op == htons(ARPOP_REQUEST) && 815 if (arp->ar_op == htons(ARPOP_REQUEST) &&
816 ip_route_input(skb, tip, sip, 0, dev) == 0) { 816 ip_route_input(skb, tip, sip, 0, dev) == 0) {
817 817
818 rt = (struct rtable*)skb->dst; 818 rt = skb->rtable;
819 addr_type = rt->rt_type; 819 addr_type = rt->rt_type;
820 820
821 if (addr_type == RTN_LOCAL) { 821 if (addr_type == RTN_LOCAL) {
@@ -835,7 +835,7 @@ static int arp_process(struct sk_buff *skb)
835 goto out; 835 goto out;
836 } else if (IN_DEV_FORWARD(in_dev)) { 836 } else if (IN_DEV_FORWARD(in_dev)) {
837 if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && 837 if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
838 (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0))) { 838 (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
839 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 839 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
840 if (n) 840 if (n)
841 neigh_release(n); 841 neigh_release(n);
@@ -858,14 +858,14 @@ static int arp_process(struct sk_buff *skb)
858 858
859 n = __neigh_lookup(&arp_tbl, &sip, dev, 0); 859 n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
860 860
861 if (IPV4_DEVCONF_ALL(dev->nd_net, ARP_ACCEPT)) { 861 if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) {
862 /* Unsolicited ARP is not accepted by default. 862 /* Unsolicited ARP is not accepted by default.
863 It is possible, that this option should be enabled for some 863 It is possible, that this option should be enabled for some
864 devices (strip is candidate) 864 devices (strip is candidate)
865 */ 865 */
866 if (n == NULL && 866 if (n == NULL &&
867 arp->ar_op == htons(ARPOP_REPLY) && 867 arp->ar_op == htons(ARPOP_REPLY) &&
868 inet_addr_type(&init_net, sip) == RTN_UNICAST) 868 inet_addr_type(net, sip) == RTN_UNICAST)
869 n = __neigh_lookup(&arp_tbl, &sip, dev, 1); 869 n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
870 } 870 }
871 871
@@ -912,13 +912,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
912{ 912{
913 struct arphdr *arp; 913 struct arphdr *arp;
914 914
915 if (dev->nd_net != &init_net)
916 goto freeskb;
917
918 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ 915 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
919 if (!pskb_may_pull(skb, (sizeof(struct arphdr) + 916 if (!pskb_may_pull(skb, arp_hdr_len(dev)))
920 (2 * dev->addr_len) +
921 (2 * sizeof(u32)))))
922 goto freeskb; 917 goto freeskb;
923 918
924 arp = arp_hdr(skb); 919 arp = arp_hdr(skb);
@@ -1201,9 +1196,6 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo
1201{ 1196{
1202 struct net_device *dev = ptr; 1197 struct net_device *dev = ptr;
1203 1198
1204 if (dev->nd_net != &init_net)
1205 return NOTIFY_DONE;
1206
1207 switch (event) { 1199 switch (event) {
1208 case NETDEV_CHANGEADDR: 1200 case NETDEV_CHANGEADDR:
1209 neigh_changeaddr(&arp_tbl, dev); 1201 neigh_changeaddr(&arp_tbl, dev);
@@ -1385,13 +1377,29 @@ static const struct file_operations arp_seq_fops = {
1385 .release = seq_release_net, 1377 .release = seq_release_net,
1386}; 1378};
1387 1379
1388static int __init arp_proc_init(void) 1380
1381static int __net_init arp_net_init(struct net *net)
1389{ 1382{
1390 if (!proc_net_fops_create(&init_net, "arp", S_IRUGO, &arp_seq_fops)) 1383 if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops))
1391 return -ENOMEM; 1384 return -ENOMEM;
1392 return 0; 1385 return 0;
1393} 1386}
1394 1387
1388static void __net_exit arp_net_exit(struct net *net)
1389{
1390 proc_net_remove(net, "arp");
1391}
1392
1393static struct pernet_operations arp_net_ops = {
1394 .init = arp_net_init,
1395 .exit = arp_net_exit,
1396};
1397
1398static int __init arp_proc_init(void)
1399{
1400 return register_pernet_subsys(&arp_net_ops);
1401}
1402
1395#else /* CONFIG_PROC_FS */ 1403#else /* CONFIG_PROC_FS */
1396 1404
1397static int __init arp_proc_init(void) 1405static int __init arp_proc_init(void)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 8cd357f41283..4637ded3dba8 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1800,7 +1800,6 @@ int cipso_v4_sock_setattr(struct sock *sk,
1800 } 1800 }
1801 memcpy(opt->__data, buf, buf_len); 1801 memcpy(opt->__data, buf, buf_len);
1802 opt->optlen = opt_len; 1802 opt->optlen = opt_len;
1803 opt->is_data = 1;
1804 opt->cipso = sizeof(struct iphdr); 1803 opt->cipso = sizeof(struct iphdr);
1805 kfree(buf); 1804 kfree(buf);
1806 buf = NULL; 1805 buf = NULL;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 87490f7bb0f7..6848e4760f34 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -165,7 +165,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
165 if (!in_dev) 165 if (!in_dev)
166 goto out; 166 goto out;
167 INIT_RCU_HEAD(&in_dev->rcu_head); 167 INIT_RCU_HEAD(&in_dev->rcu_head);
168 memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt, 168 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
169 sizeof(in_dev->cnf)); 169 sizeof(in_dev->cnf));
170 in_dev->cnf.sysctl = NULL; 170 in_dev->cnf.sysctl = NULL;
171 in_dev->dev = dev; 171 in_dev->dev = dev;
@@ -437,7 +437,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
437 437
438static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 438static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439{ 439{
440 struct net *net = skb->sk->sk_net; 440 struct net *net = sock_net(skb->sk);
441 struct nlattr *tb[IFA_MAX+1]; 441 struct nlattr *tb[IFA_MAX+1];
442 struct in_device *in_dev; 442 struct in_device *in_dev;
443 struct ifaddrmsg *ifm; 443 struct ifaddrmsg *ifm;
@@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
446 446
447 ASSERT_RTNL(); 447 ASSERT_RTNL();
448 448
449 if (net != &init_net)
450 return -EINVAL;
451
452 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 449 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 if (err < 0) 450 if (err < 0)
454 goto errout; 451 goto errout;
@@ -555,14 +552,11 @@ errout:
555 552
556static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 553static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
557{ 554{
558 struct net *net = skb->sk->sk_net; 555 struct net *net = sock_net(skb->sk);
559 struct in_ifaddr *ifa; 556 struct in_ifaddr *ifa;
560 557
561 ASSERT_RTNL(); 558 ASSERT_RTNL();
562 559
563 if (net != &init_net)
564 return -EINVAL;
565
566 ifa = rtm_to_ifaddr(net, nlh); 560 ifa = rtm_to_ifaddr(net, nlh);
567 if (IS_ERR(ifa)) 561 if (IS_ERR(ifa))
568 return PTR_ERR(ifa); 562 return PTR_ERR(ifa);
@@ -595,7 +589,7 @@ static __inline__ int inet_abc_len(__be32 addr)
595} 589}
596 590
597 591
598int devinet_ioctl(unsigned int cmd, void __user *arg) 592int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
599{ 593{
600 struct ifreq ifr; 594 struct ifreq ifr;
601 struct sockaddr_in sin_orig; 595 struct sockaddr_in sin_orig;
@@ -624,7 +618,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
624 *colon = 0; 618 *colon = 0;
625 619
626#ifdef CONFIG_KMOD 620#ifdef CONFIG_KMOD
627 dev_load(&init_net, ifr.ifr_name); 621 dev_load(net, ifr.ifr_name);
628#endif 622#endif
629 623
630 switch (cmd) { 624 switch (cmd) {
@@ -665,7 +659,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
665 rtnl_lock(); 659 rtnl_lock();
666 660
667 ret = -ENODEV; 661 ret = -ENODEV;
668 if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) 662 if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
669 goto done; 663 goto done;
670 664
671 if (colon) 665 if (colon)
@@ -878,6 +872,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
878{ 872{
879 __be32 addr = 0; 873 __be32 addr = 0;
880 struct in_device *in_dev; 874 struct in_device *in_dev;
875 struct net *net = dev_net(dev);
881 876
882 rcu_read_lock(); 877 rcu_read_lock();
883 in_dev = __in_dev_get_rcu(dev); 878 in_dev = __in_dev_get_rcu(dev);
@@ -906,7 +901,7 @@ no_in_dev:
906 */ 901 */
907 read_lock(&dev_base_lock); 902 read_lock(&dev_base_lock);
908 rcu_read_lock(); 903 rcu_read_lock();
909 for_each_netdev(&init_net, dev) { 904 for_each_netdev(net, dev) {
910 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) 905 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
911 continue; 906 continue;
912 907
@@ -979,7 +974,7 @@ __be32 inet_confirm_addr(struct in_device *in_dev,
979 if (scope != RT_SCOPE_LINK) 974 if (scope != RT_SCOPE_LINK)
980 return confirm_addr_indev(in_dev, dst, local, scope); 975 return confirm_addr_indev(in_dev, dst, local, scope);
981 976
982 net = in_dev->dev->nd_net; 977 net = dev_net(in_dev->dev);
983 read_lock(&dev_base_lock); 978 read_lock(&dev_base_lock);
984 rcu_read_lock(); 979 rcu_read_lock();
985 for_each_netdev(net, dev) { 980 for_each_netdev(net, dev) {
@@ -1045,9 +1040,6 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1045 struct net_device *dev = ptr; 1040 struct net_device *dev = ptr;
1046 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1041 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1047 1042
1048 if (dev->nd_net != &init_net)
1049 return NOTIFY_DONE;
1050
1051 ASSERT_RTNL(); 1043 ASSERT_RTNL();
1052 1044
1053 if (!in_dev) { 1045 if (!in_dev) {
@@ -1166,16 +1158,13 @@ nla_put_failure:
1166 1158
1167static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1159static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1168{ 1160{
1169 struct net *net = skb->sk->sk_net; 1161 struct net *net = sock_net(skb->sk);
1170 int idx, ip_idx; 1162 int idx, ip_idx;
1171 struct net_device *dev; 1163 struct net_device *dev;
1172 struct in_device *in_dev; 1164 struct in_device *in_dev;
1173 struct in_ifaddr *ifa; 1165 struct in_ifaddr *ifa;
1174 int s_ip_idx, s_idx = cb->args[0]; 1166 int s_ip_idx, s_idx = cb->args[0];
1175 1167
1176 if (net != &init_net)
1177 return 0;
1178
1179 s_ip_idx = ip_idx = cb->args[1]; 1168 s_ip_idx = ip_idx = cb->args[1];
1180 idx = 0; 1169 idx = 0;
1181 for_each_netdev(net, dev) { 1170 for_each_netdev(net, dev) {
@@ -1214,7 +1203,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1214 int err = -ENOBUFS; 1203 int err = -ENOBUFS;
1215 struct net *net; 1204 struct net *net;
1216 1205
1217 net = ifa->ifa_dev->dev->nd_net; 1206 net = dev_net(ifa->ifa_dev->dev);
1218 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1207 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1219 if (skb == NULL) 1208 if (skb == NULL)
1220 goto errout; 1209 goto errout;
@@ -1528,7 +1517,7 @@ static void devinet_sysctl_register(struct in_device *idev)
1528{ 1517{
1529 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, 1518 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1530 NET_IPV4_NEIGH, "ipv4", NULL, NULL); 1519 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1531 __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name, 1520 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1532 idev->dev->ifindex, &idev->cnf); 1521 idev->dev->ifindex, &idev->cnf);
1533} 1522}
1534 1523
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 86ff2711fc95..0f1557a4ac7a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -257,7 +257,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
257 if (in_dev == NULL) 257 if (in_dev == NULL)
258 goto e_inval; 258 goto e_inval;
259 259
260 net = dev->nd_net; 260 net = dev_net(dev);
261 if (fib_lookup(net, &fl, &res)) 261 if (fib_lookup(net, &fl, &res))
262 goto last_resort; 262 goto last_resort;
263 if (res.type != RTN_UNICAST) 263 if (res.type != RTN_UNICAST)
@@ -583,7 +583,7 @@ errout:
583 583
584static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 584static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
585{ 585{
586 struct net *net = skb->sk->sk_net; 586 struct net *net = sock_net(skb->sk);
587 struct fib_config cfg; 587 struct fib_config cfg;
588 struct fib_table *tb; 588 struct fib_table *tb;
589 int err; 589 int err;
@@ -605,7 +605,7 @@ errout:
605 605
606static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 606static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
607{ 607{
608 struct net *net = skb->sk->sk_net; 608 struct net *net = sock_net(skb->sk);
609 struct fib_config cfg; 609 struct fib_config cfg;
610 struct fib_table *tb; 610 struct fib_table *tb;
611 int err; 611 int err;
@@ -627,7 +627,7 @@ errout:
627 627
628static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 628static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
629{ 629{
630 struct net *net = skb->sk->sk_net; 630 struct net *net = sock_net(skb->sk);
631 unsigned int h, s_h; 631 unsigned int h, s_h;
632 unsigned int e = 0, s_e; 632 unsigned int e = 0, s_e;
633 struct fib_table *tb; 633 struct fib_table *tb;
@@ -674,7 +674,7 @@ out:
674 674
675static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 675static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
676{ 676{
677 struct net *net = ifa->ifa_dev->dev->nd_net; 677 struct net *net = dev_net(ifa->ifa_dev->dev);
678 struct fib_table *tb; 678 struct fib_table *tb;
679 struct fib_config cfg = { 679 struct fib_config cfg = {
680 .fc_protocol = RTPROT_KERNEL, 680 .fc_protocol = RTPROT_KERNEL,
@@ -801,15 +801,15 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
801 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 801 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
802 802
803 /* Check, that this local address finally disappeared. */ 803 /* Check, that this local address finally disappeared. */
804 if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) { 804 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
805 /* And the last, but not the least thing. 805 /* And the last, but not the least thing.
806 We must flush stray FIB entries. 806 We must flush stray FIB entries.
807 807
808 First of all, we scan fib_info list searching 808 First of all, we scan fib_info list searching
809 for stray nexthop entries, then ignite fib_flush. 809 for stray nexthop entries, then ignite fib_flush.
810 */ 810 */
811 if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local)) 811 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
812 fib_flush(dev->nd_net); 812 fib_flush(dev_net(dev));
813 } 813 }
814 } 814 }
815#undef LOCAL_OK 815#undef LOCAL_OK
@@ -857,7 +857,7 @@ static void nl_fib_input(struct sk_buff *skb)
857 struct fib_table *tb; 857 struct fib_table *tb;
858 u32 pid; 858 u32 pid;
859 859
860 net = skb->sk->sk_net; 860 net = sock_net(skb->sk);
861 nlh = nlmsg_hdr(skb); 861 nlh = nlmsg_hdr(skb);
862 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 862 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
863 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 863 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
@@ -899,7 +899,7 @@ static void nl_fib_lookup_exit(struct net *net)
899static void fib_disable_ip(struct net_device *dev, int force) 899static void fib_disable_ip(struct net_device *dev, int force)
900{ 900{
901 if (fib_sync_down_dev(dev, force)) 901 if (fib_sync_down_dev(dev, force))
902 fib_flush(dev->nd_net); 902 fib_flush(dev_net(dev));
903 rt_cache_flush(0); 903 rt_cache_flush(0);
904 arp_ifdown(dev); 904 arp_ifdown(dev);
905} 905}
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 8d58d85dfac6..02088deb0461 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -821,7 +821,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
821 struct fib_table *main_table; 821 struct fib_table *main_table;
822 struct fn_hash *table; 822 struct fn_hash *table;
823 823
824 main_table = fib_get_table(iter->p.net, RT_TABLE_MAIN); 824 main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
825 table = (struct fn_hash *)main_table->tb_data; 825 table = (struct fn_hash *)main_table->tb_data;
826 826
827 iter->bucket = 0; 827 iter->bucket = 0;
@@ -959,11 +959,10 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
959static void *fib_seq_start(struct seq_file *seq, loff_t *pos) 959static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
960 __acquires(fib_hash_lock) 960 __acquires(fib_hash_lock)
961{ 961{
962 struct fib_iter_state *iter = seq->private;
963 void *v = NULL; 962 void *v = NULL;
964 963
965 read_lock(&fib_hash_lock); 964 read_lock(&fib_hash_lock);
966 if (fib_get_table(iter->p.net, RT_TABLE_MAIN)) 965 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
967 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 966 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
968 return v; 967 return v;
969} 968}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 19274d01afa4..1fb56876be54 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -137,7 +137,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
137 struct nlmsghdr *nlh, struct fib_rule_hdr *frh, 137 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
138 struct nlattr **tb) 138 struct nlattr **tb)
139{ 139{
140 struct net *net = skb->sk->sk_net; 140 struct net *net = sock_net(skb->sk);
141 int err = -EINVAL; 141 int err = -EINVAL;
142 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 142 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
143 143
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f6cdc012eec5..1ada5a6b03ea 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -122,7 +122,10 @@ struct tnode {
122 unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 122 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
123 unsigned int full_children; /* KEYLENGTH bits needed */ 123 unsigned int full_children; /* KEYLENGTH bits needed */
124 unsigned int empty_children; /* KEYLENGTH bits needed */ 124 unsigned int empty_children; /* KEYLENGTH bits needed */
125 struct rcu_head rcu; 125 union {
126 struct rcu_head rcu;
127 struct work_struct work;
128 };
126 struct node *child[0]; 129 struct node *child[0];
127}; 130};
128 131
@@ -160,7 +163,6 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
160static struct node *resize(struct trie *t, struct tnode *tn); 163static struct node *resize(struct trie *t, struct tnode *tn);
161static struct tnode *inflate(struct trie *t, struct tnode *tn); 164static struct tnode *inflate(struct trie *t, struct tnode *tn);
162static struct tnode *halve(struct trie *t, struct tnode *tn); 165static struct tnode *halve(struct trie *t, struct tnode *tn);
163static void tnode_free(struct tnode *tn);
164 166
165static struct kmem_cache *fn_alias_kmem __read_mostly; 167static struct kmem_cache *fn_alias_kmem __read_mostly;
166static struct kmem_cache *trie_leaf_kmem __read_mostly; 168static struct kmem_cache *trie_leaf_kmem __read_mostly;
@@ -334,6 +336,11 @@ static void __leaf_free_rcu(struct rcu_head *head)
334 kmem_cache_free(trie_leaf_kmem, l); 336 kmem_cache_free(trie_leaf_kmem, l);
335} 337}
336 338
339static inline void free_leaf(struct leaf *l)
340{
341 call_rcu_bh(&l->rcu, __leaf_free_rcu);
342}
343
337static void __leaf_info_free_rcu(struct rcu_head *head) 344static void __leaf_info_free_rcu(struct rcu_head *head)
338{ 345{
339 kfree(container_of(head, struct leaf_info, rcu)); 346 kfree(container_of(head, struct leaf_info, rcu));
@@ -346,16 +353,16 @@ static inline void free_leaf_info(struct leaf_info *leaf)
346 353
347static struct tnode *tnode_alloc(size_t size) 354static struct tnode *tnode_alloc(size_t size)
348{ 355{
349 struct page *pages;
350
351 if (size <= PAGE_SIZE) 356 if (size <= PAGE_SIZE)
352 return kzalloc(size, GFP_KERNEL); 357 return kzalloc(size, GFP_KERNEL);
358 else
359 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
360}
353 361
354 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); 362static void __tnode_vfree(struct work_struct *arg)
355 if (!pages) 363{
356 return NULL; 364 struct tnode *tn = container_of(arg, struct tnode, work);
357 365 vfree(tn);
358 return page_address(pages);
359} 366}
360 367
361static void __tnode_free_rcu(struct rcu_head *head) 368static void __tnode_free_rcu(struct rcu_head *head)
@@ -366,16 +373,17 @@ static void __tnode_free_rcu(struct rcu_head *head)
366 373
367 if (size <= PAGE_SIZE) 374 if (size <= PAGE_SIZE)
368 kfree(tn); 375 kfree(tn);
369 else 376 else {
370 free_pages((unsigned long)tn, get_order(size)); 377 INIT_WORK(&tn->work, __tnode_vfree);
378 schedule_work(&tn->work);
379 }
371} 380}
372 381
373static inline void tnode_free(struct tnode *tn) 382static inline void tnode_free(struct tnode *tn)
374{ 383{
375 if (IS_LEAF(tn)) { 384 if (IS_LEAF(tn))
376 struct leaf *l = (struct leaf *) tn; 385 free_leaf((struct leaf *) tn);
377 call_rcu_bh(&l->rcu, __leaf_free_rcu); 386 else
378 } else
379 call_rcu(&tn->rcu, __tnode_free_rcu); 387 call_rcu(&tn->rcu, __tnode_free_rcu);
380} 388}
381 389
@@ -1086,7 +1094,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1086 li = leaf_info_new(plen); 1094 li = leaf_info_new(plen);
1087 1095
1088 if (!li) { 1096 if (!li) {
1089 tnode_free((struct tnode *) l); 1097 free_leaf(l);
1090 return NULL; 1098 return NULL;
1091 } 1099 }
1092 1100
@@ -1122,7 +1130,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1122 1130
1123 if (!tn) { 1131 if (!tn) {
1124 free_leaf_info(li); 1132 free_leaf_info(li);
1125 tnode_free((struct tnode *) l); 1133 free_leaf(l);
1126 return NULL; 1134 return NULL;
1127 } 1135 }
1128 1136
@@ -1578,7 +1586,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
1578 } else 1586 } else
1579 rcu_assign_pointer(t->trie, NULL); 1587 rcu_assign_pointer(t->trie, NULL);
1580 1588
1581 tnode_free((struct tnode *) l); 1589 free_leaf(l);
1582} 1590}
1583 1591
1584/* 1592/*
@@ -1665,7 +1673,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
1665 return 0; 1673 return 0;
1666} 1674}
1667 1675
1668static int trie_flush_list(struct trie *t, struct list_head *head) 1676static int trie_flush_list(struct list_head *head)
1669{ 1677{
1670 struct fib_alias *fa, *fa_node; 1678 struct fib_alias *fa, *fa_node;
1671 int found = 0; 1679 int found = 0;
@@ -1683,7 +1691,7 @@ static int trie_flush_list(struct trie *t, struct list_head *head)
1683 return found; 1691 return found;
1684} 1692}
1685 1693
1686static int trie_flush_leaf(struct trie *t, struct leaf *l) 1694static int trie_flush_leaf(struct leaf *l)
1687{ 1695{
1688 int found = 0; 1696 int found = 0;
1689 struct hlist_head *lih = &l->list; 1697 struct hlist_head *lih = &l->list;
@@ -1691,7 +1699,7 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l)
1691 struct leaf_info *li = NULL; 1699 struct leaf_info *li = NULL;
1692 1700
1693 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { 1701 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1694 found += trie_flush_list(t, &li->falh); 1702 found += trie_flush_list(&li->falh);
1695 1703
1696 if (list_empty(&li->falh)) { 1704 if (list_empty(&li->falh)) {
1697 hlist_del_rcu(&li->hlist); 1705 hlist_del_rcu(&li->hlist);
@@ -1782,7 +1790,7 @@ static int fn_trie_flush(struct fib_table *tb)
1782 int found = 0; 1790 int found = 0;
1783 1791
1784 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { 1792 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
1785 found += trie_flush_leaf(t, l); 1793 found += trie_flush_leaf(l);
1786 1794
1787 if (ll && hlist_empty(&ll->list)) 1795 if (ll && hlist_empty(&ll->list))
1788 trie_leaf_remove(t, ll); 1796 trie_leaf_remove(t, ll);
@@ -2029,9 +2037,8 @@ struct fib_table *fib_hash_table(u32 id)
2029/* Depth first Trie walk iterator */ 2037/* Depth first Trie walk iterator */
2030struct fib_trie_iter { 2038struct fib_trie_iter {
2031 struct seq_net_private p; 2039 struct seq_net_private p;
2032 struct trie *trie_local, *trie_main; 2040 struct fib_table *tb;
2033 struct tnode *tnode; 2041 struct tnode *tnode;
2034 struct trie *trie;
2035 unsigned index; 2042 unsigned index;
2036 unsigned depth; 2043 unsigned depth;
2037}; 2044};
@@ -2084,31 +2091,26 @@ rescan:
2084static struct node *fib_trie_get_first(struct fib_trie_iter *iter, 2091static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2085 struct trie *t) 2092 struct trie *t)
2086{ 2093{
2087 struct node *n ; 2094 struct node *n;
2088 2095
2089 if (!t) 2096 if (!t)
2090 return NULL; 2097 return NULL;
2091 2098
2092 n = rcu_dereference(t->trie); 2099 n = rcu_dereference(t->trie);
2093 2100 if (!n)
2094 if (!iter)
2095 return NULL; 2101 return NULL;
2096 2102
2097 if (n) { 2103 if (IS_TNODE(n)) {
2098 if (IS_TNODE(n)) { 2104 iter->tnode = (struct tnode *) n;
2099 iter->tnode = (struct tnode *) n; 2105 iter->index = 0;
2100 iter->trie = t; 2106 iter->depth = 1;
2101 iter->index = 0; 2107 } else {
2102 iter->depth = 1; 2108 iter->tnode = NULL;
2103 } else { 2109 iter->index = 0;
2104 iter->tnode = NULL; 2110 iter->depth = 0;
2105 iter->trie = t;
2106 iter->index = 0;
2107 iter->depth = 0;
2108 }
2109 return n;
2110 } 2111 }
2111 return NULL; 2112
2113 return n;
2112} 2114}
2113 2115
2114static void trie_collect_stats(struct trie *t, struct trie_stat *s) 2116static void trie_collect_stats(struct trie *t, struct trie_stat *s)
@@ -2119,8 +2121,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2119 memset(s, 0, sizeof(*s)); 2121 memset(s, 0, sizeof(*s));
2120 2122
2121 rcu_read_lock(); 2123 rcu_read_lock();
2122 for (n = fib_trie_get_first(&iter, t); n; 2124 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2123 n = fib_trie_get_next(&iter)) {
2124 if (IS_LEAF(n)) { 2125 if (IS_LEAF(n)) {
2125 struct leaf *l = (struct leaf *)n; 2126 struct leaf *l = (struct leaf *)n;
2126 struct leaf_info *li; 2127 struct leaf_info *li;
@@ -2209,36 +2210,48 @@ static void trie_show_usage(struct seq_file *seq,
2209} 2210}
2210#endif /* CONFIG_IP_FIB_TRIE_STATS */ 2211#endif /* CONFIG_IP_FIB_TRIE_STATS */
2211 2212
2212static void fib_trie_show(struct seq_file *seq, const char *name, 2213static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2213 struct trie *trie)
2214{ 2214{
2215 struct trie_stat stat; 2215 if (tb->tb_id == RT_TABLE_LOCAL)
2216 2216 seq_puts(seq, "Local:\n");
2217 trie_collect_stats(trie, &stat); 2217 else if (tb->tb_id == RT_TABLE_MAIN)
2218 seq_printf(seq, "%s:\n", name); 2218 seq_puts(seq, "Main:\n");
2219 trie_show_stats(seq, &stat); 2219 else
2220#ifdef CONFIG_IP_FIB_TRIE_STATS 2220 seq_printf(seq, "Id %d:\n", tb->tb_id);
2221 trie_show_usage(seq, &trie->stats);
2222#endif
2223} 2221}
2224 2222
2223
2225static int fib_triestat_seq_show(struct seq_file *seq, void *v) 2224static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2226{ 2225{
2227 struct net *net = (struct net *)seq->private; 2226 struct net *net = (struct net *)seq->private;
2228 struct fib_table *tb; 2227 unsigned int h;
2229 2228
2230 seq_printf(seq, 2229 seq_printf(seq,
2231 "Basic info: size of leaf:" 2230 "Basic info: size of leaf:"
2232 " %Zd bytes, size of tnode: %Zd bytes.\n", 2231 " %Zd bytes, size of tnode: %Zd bytes.\n",
2233 sizeof(struct leaf), sizeof(struct tnode)); 2232 sizeof(struct leaf), sizeof(struct tnode));
2234 2233
2235 tb = fib_get_table(net, RT_TABLE_LOCAL); 2234 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2236 if (tb) 2235 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2237 fib_trie_show(seq, "Local", (struct trie *) tb->tb_data); 2236 struct hlist_node *node;
2237 struct fib_table *tb;
2238
2239 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2240 struct trie *t = (struct trie *) tb->tb_data;
2241 struct trie_stat stat;
2242
2243 if (!t)
2244 continue;
2238 2245
2239 tb = fib_get_table(net, RT_TABLE_MAIN); 2246 fib_table_print(seq, tb);
2240 if (tb) 2247
2241 fib_trie_show(seq, "Main", (struct trie *) tb->tb_data); 2248 trie_collect_stats(t, &stat);
2249 trie_show_stats(seq, &stat);
2250#ifdef CONFIG_IP_FIB_TRIE_STATS
2251 trie_show_usage(seq, &t->stats);
2252#endif
2253 }
2254 }
2242 2255
2243 return 0; 2256 return 0;
2244} 2257}
@@ -2274,67 +2287,79 @@ static const struct file_operations fib_triestat_fops = {
2274 .release = fib_triestat_seq_release, 2287 .release = fib_triestat_seq_release,
2275}; 2288};
2276 2289
2277static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, 2290static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2278 loff_t pos)
2279{ 2291{
2292 struct fib_trie_iter *iter = seq->private;
2293 struct net *net = seq_file_net(seq);
2280 loff_t idx = 0; 2294 loff_t idx = 0;
2281 struct node *n; 2295 unsigned int h;
2282 2296
2283 for (n = fib_trie_get_first(iter, iter->trie_local); 2297 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2284 n; ++idx, n = fib_trie_get_next(iter)) { 2298 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2285 if (pos == idx) 2299 struct hlist_node *node;
2286 return n; 2300 struct fib_table *tb;
2287 }
2288 2301
2289 for (n = fib_trie_get_first(iter, iter->trie_main); 2302 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2290 n; ++idx, n = fib_trie_get_next(iter)) { 2303 struct node *n;
2291 if (pos == idx) 2304
2292 return n; 2305 for (n = fib_trie_get_first(iter,
2306 (struct trie *) tb->tb_data);
2307 n; n = fib_trie_get_next(iter))
2308 if (pos == idx++) {
2309 iter->tb = tb;
2310 return n;
2311 }
2312 }
2293 } 2313 }
2314
2294 return NULL; 2315 return NULL;
2295} 2316}
2296 2317
2297static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2318static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2298 __acquires(RCU) 2319 __acquires(RCU)
2299{ 2320{
2300 struct fib_trie_iter *iter = seq->private;
2301 struct fib_table *tb;
2302
2303 if (!iter->trie_local) {
2304 tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL);
2305 if (tb)
2306 iter->trie_local = (struct trie *) tb->tb_data;
2307 }
2308 if (!iter->trie_main) {
2309 tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
2310 if (tb)
2311 iter->trie_main = (struct trie *) tb->tb_data;
2312 }
2313 rcu_read_lock(); 2321 rcu_read_lock();
2314 if (*pos == 0) 2322 return fib_trie_get_idx(seq, *pos);
2315 return SEQ_START_TOKEN;
2316 return fib_trie_get_idx(iter, *pos - 1);
2317} 2323}
2318 2324
2319static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2325static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2320{ 2326{
2321 struct fib_trie_iter *iter = seq->private; 2327 struct fib_trie_iter *iter = seq->private;
2322 void *l = v; 2328 struct net *net = seq_file_net(seq);
2329 struct fib_table *tb = iter->tb;
2330 struct hlist_node *tb_node;
2331 unsigned int h;
2332 struct node *n;
2323 2333
2324 ++*pos; 2334 ++*pos;
2325 if (v == SEQ_START_TOKEN) 2335 /* next node in same table */
2326 return fib_trie_get_idx(iter, 0); 2336 n = fib_trie_get_next(iter);
2327 2337 if (n)
2328 v = fib_trie_get_next(iter); 2338 return n;
2329 BUG_ON(v == l);
2330 if (v)
2331 return v;
2332 2339
2333 /* continue scan in next trie */ 2340 /* walk rest of this hash chain */
2334 if (iter->trie == iter->trie_local) 2341 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2335 return fib_trie_get_first(iter, iter->trie_main); 2342 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
2343 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2344 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2345 if (n)
2346 goto found;
2347 }
2336 2348
2349 /* new hash chain */
2350 while (++h < FIB_TABLE_HASHSZ) {
2351 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2352 hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
2353 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2354 if (n)
2355 goto found;
2356 }
2357 }
2337 return NULL; 2358 return NULL;
2359
2360found:
2361 iter->tb = tb;
2362 return n;
2338} 2363}
2339 2364
2340static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2365static void fib_trie_seq_stop(struct seq_file *seq, void *v)
@@ -2391,15 +2416,8 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2391 const struct fib_trie_iter *iter = seq->private; 2416 const struct fib_trie_iter *iter = seq->private;
2392 struct node *n = v; 2417 struct node *n = v;
2393 2418
2394 if (v == SEQ_START_TOKEN) 2419 if (!node_parent_rcu(n))
2395 return 0; 2420 fib_table_print(seq, iter->tb);
2396
2397 if (!node_parent_rcu(n)) {
2398 if (iter->trie == iter->trie_local)
2399 seq_puts(seq, "<local>:\n");
2400 else
2401 seq_puts(seq, "<main>:\n");
2402 }
2403 2421
2404 if (IS_TNODE(n)) { 2422 if (IS_TNODE(n)) {
2405 struct tnode *tn = (struct tnode *) n; 2423 struct tnode *tn = (struct tnode *) n;
@@ -2502,7 +2520,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2502 struct fib_table *tb; 2520 struct fib_table *tb;
2503 2521
2504 rcu_read_lock(); 2522 rcu_read_lock();
2505 tb = fib_get_table(iter->p.net, RT_TABLE_MAIN); 2523 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
2506 if (!tb) 2524 if (!tb)
2507 return NULL; 2525 return NULL;
2508 2526
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 40508babad8c..3e14d9cd29b3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -93,6 +93,7 @@
93#include <asm/uaccess.h> 93#include <asm/uaccess.h>
94#include <net/checksum.h> 94#include <net/checksum.h>
95#include <net/xfrm.h> 95#include <net/xfrm.h>
96#include <net/inet_common.h>
96 97
97/* 98/*
98 * Build xmit assembly blocks 99 * Build xmit assembly blocks
@@ -188,29 +189,6 @@ struct icmp_err icmp_err_convert[] = {
188 }, 189 },
189}; 190};
190 191
191/* Control parameters for ECHO replies. */
192int sysctl_icmp_echo_ignore_all __read_mostly;
193int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
194
195/* Control parameter - ignore bogus broadcast responses? */
196int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
197
198/*
199 * Configurable global rate limit.
200 *
201 * ratelimit defines tokens/packet consumed for dst->rate_token bucket
202 * ratemask defines which icmp types are ratelimited by setting
203 * it's bit position.
204 *
205 * default:
206 * dest unreachable (3), source quench (4),
207 * time exceeded (11), parameter problem (12)
208 */
209
210int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
211int sysctl_icmp_ratemask __read_mostly = 0x1818;
212int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
213
214/* 192/*
215 * ICMP control array. This specifies what to do with each ICMP. 193 * ICMP control array. This specifies what to do with each ICMP.
216 */ 194 */
@@ -229,14 +207,16 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
229 * 207 *
230 * On SMP we have one ICMP socket per-cpu. 208 * On SMP we have one ICMP socket per-cpu.
231 */ 209 */
232static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; 210static struct sock *icmp_sk(struct net *net)
233#define icmp_socket __get_cpu_var(__icmp_socket) 211{
212 return net->ipv4.icmp_sk[smp_processor_id()];
213}
234 214
235static inline int icmp_xmit_lock(void) 215static inline int icmp_xmit_lock(struct sock *sk)
236{ 216{
237 local_bh_disable(); 217 local_bh_disable();
238 218
239 if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { 219 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
240 /* This can happen if the output path signals a 220 /* This can happen if the output path signals a
241 * dst_link_failure() for an outgoing ICMP packet. 221 * dst_link_failure() for an outgoing ICMP packet.
242 */ 222 */
@@ -246,9 +226,9 @@ static inline int icmp_xmit_lock(void)
246 return 0; 226 return 0;
247} 227}
248 228
249static inline void icmp_xmit_unlock(void) 229static inline void icmp_xmit_unlock(struct sock *sk)
250{ 230{
251 spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); 231 spin_unlock_bh(&sk->sk_lock.slock);
252} 232}
253 233
254/* 234/*
@@ -291,7 +271,8 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
291 return rc; 271 return rc;
292} 272}
293 273
294static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) 274static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
275 int type, int code)
295{ 276{
296 struct dst_entry *dst = &rt->u.dst; 277 struct dst_entry *dst = &rt->u.dst;
297 int rc = 1; 278 int rc = 1;
@@ -308,8 +289,8 @@ static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code)
308 goto out; 289 goto out;
309 290
310 /* Limit if icmp type is enabled in ratemask. */ 291 /* Limit if icmp type is enabled in ratemask. */
311 if ((1 << type) & sysctl_icmp_ratemask) 292 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask)
312 rc = xrlim_allow(dst, sysctl_icmp_ratelimit); 293 rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit);
313out: 294out:
314 return rc; 295 return rc;
315} 296}
@@ -346,19 +327,21 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
346static void icmp_push_reply(struct icmp_bxm *icmp_param, 327static void icmp_push_reply(struct icmp_bxm *icmp_param,
347 struct ipcm_cookie *ipc, struct rtable *rt) 328 struct ipcm_cookie *ipc, struct rtable *rt)
348{ 329{
330 struct sock *sk;
349 struct sk_buff *skb; 331 struct sk_buff *skb;
350 332
351 if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, 333 sk = icmp_sk(dev_net(rt->u.dst.dev));
334 if (ip_append_data(sk, icmp_glue_bits, icmp_param,
352 icmp_param->data_len+icmp_param->head_len, 335 icmp_param->data_len+icmp_param->head_len,
353 icmp_param->head_len, 336 icmp_param->head_len,
354 ipc, rt, MSG_DONTWAIT) < 0) 337 ipc, rt, MSG_DONTWAIT) < 0)
355 ip_flush_pending_frames(icmp_socket->sk); 338 ip_flush_pending_frames(sk);
356 else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { 339 else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
357 struct icmphdr *icmph = icmp_hdr(skb); 340 struct icmphdr *icmph = icmp_hdr(skb);
358 __wsum csum = 0; 341 __wsum csum = 0;
359 struct sk_buff *skb1; 342 struct sk_buff *skb1;
360 343
361 skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { 344 skb_queue_walk(&sk->sk_write_queue, skb1) {
362 csum = csum_add(csum, skb1->csum); 345 csum = csum_add(csum, skb1->csum);
363 } 346 }
364 csum = csum_partial_copy_nocheck((void *)&icmp_param->data, 347 csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
@@ -366,7 +349,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
366 icmp_param->head_len, csum); 349 icmp_param->head_len, csum);
367 icmph->checksum = csum_fold(csum); 350 icmph->checksum = csum_fold(csum);
368 skb->ip_summed = CHECKSUM_NONE; 351 skb->ip_summed = CHECKSUM_NONE;
369 ip_push_pending_frames(icmp_socket->sk); 352 ip_push_pending_frames(sk);
370 } 353 }
371} 354}
372 355
@@ -376,16 +359,17 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
376 359
377static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) 360static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
378{ 361{
379 struct sock *sk = icmp_socket->sk;
380 struct inet_sock *inet = inet_sk(sk);
381 struct ipcm_cookie ipc; 362 struct ipcm_cookie ipc;
382 struct rtable *rt = (struct rtable *)skb->dst; 363 struct rtable *rt = skb->rtable;
364 struct net *net = dev_net(rt->u.dst.dev);
365 struct sock *sk = icmp_sk(net);
366 struct inet_sock *inet = inet_sk(sk);
383 __be32 daddr; 367 __be32 daddr;
384 368
385 if (ip_options_echo(&icmp_param->replyopts, skb)) 369 if (ip_options_echo(&icmp_param->replyopts, skb))
386 return; 370 return;
387 371
388 if (icmp_xmit_lock()) 372 if (icmp_xmit_lock(sk))
389 return; 373 return;
390 374
391 icmp_param->data.icmph.checksum = 0; 375 icmp_param->data.icmph.checksum = 0;
@@ -405,15 +389,15 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
405 .tos = RT_TOS(ip_hdr(skb)->tos) } }, 389 .tos = RT_TOS(ip_hdr(skb)->tos) } },
406 .proto = IPPROTO_ICMP }; 390 .proto = IPPROTO_ICMP };
407 security_skb_classify_flow(skb, &fl); 391 security_skb_classify_flow(skb, &fl);
408 if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl)) 392 if (ip_route_output_key(net, &rt, &fl))
409 goto out_unlock; 393 goto out_unlock;
410 } 394 }
411 if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 395 if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
412 icmp_param->data.icmph.code)) 396 icmp_param->data.icmph.code))
413 icmp_push_reply(icmp_param, &ipc, rt); 397 icmp_push_reply(icmp_param, &ipc, rt);
414 ip_rt_put(rt); 398 ip_rt_put(rt);
415out_unlock: 399out_unlock:
416 icmp_xmit_unlock(); 400 icmp_xmit_unlock(sk);
417} 401}
418 402
419 403
@@ -433,15 +417,17 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
433 struct iphdr *iph; 417 struct iphdr *iph;
434 int room; 418 int room;
435 struct icmp_bxm icmp_param; 419 struct icmp_bxm icmp_param;
436 struct rtable *rt = (struct rtable *)skb_in->dst; 420 struct rtable *rt = skb_in->rtable;
437 struct ipcm_cookie ipc; 421 struct ipcm_cookie ipc;
438 __be32 saddr; 422 __be32 saddr;
439 u8 tos; 423 u8 tos;
440 struct net *net; 424 struct net *net;
425 struct sock *sk;
441 426
442 if (!rt) 427 if (!rt)
443 goto out; 428 goto out;
444 net = rt->u.dst.dev->nd_net; 429 net = dev_net(rt->u.dst.dev);
430 sk = icmp_sk(net);
445 431
446 /* 432 /*
447 * Find the original header. It is expected to be valid, of course. 433 * Find the original header. It is expected to be valid, of course.
@@ -505,7 +491,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
505 } 491 }
506 } 492 }
507 493
508 if (icmp_xmit_lock()) 494 if (icmp_xmit_lock(sk))
509 return; 495 return;
510 496
511 /* 497 /*
@@ -516,7 +502,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
516 if (!(rt->rt_flags & RTCF_LOCAL)) { 502 if (!(rt->rt_flags & RTCF_LOCAL)) {
517 struct net_device *dev = NULL; 503 struct net_device *dev = NULL;
518 504
519 if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) 505 if (rt->fl.iif &&
506 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
520 dev = dev_get_by_index(net, rt->fl.iif); 507 dev = dev_get_by_index(net, rt->fl.iif);
521 508
522 if (dev) { 509 if (dev) {
@@ -544,7 +531,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
544 icmp_param.data.icmph.checksum = 0; 531 icmp_param.data.icmph.checksum = 0;
545 icmp_param.skb = skb_in; 532 icmp_param.skb = skb_in;
546 icmp_param.offset = skb_network_offset(skb_in); 533 icmp_param.offset = skb_network_offset(skb_in);
547 inet_sk(icmp_socket->sk)->tos = tos; 534 inet_sk(sk)->tos = tos;
548 ipc.addr = iph->saddr; 535 ipc.addr = iph->saddr;
549 ipc.opt = &icmp_param.replyopts; 536 ipc.opt = &icmp_param.replyopts;
550 537
@@ -609,7 +596,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
609 RT_TOS(tos), rt2->u.dst.dev); 596 RT_TOS(tos), rt2->u.dst.dev);
610 597
611 dst_release(&rt2->u.dst); 598 dst_release(&rt2->u.dst);
612 rt2 = (struct rtable *)skb_in->dst; 599 rt2 = skb_in->rtable;
613 skb_in->dst = odst; 600 skb_in->dst = odst;
614 } 601 }
615 602
@@ -634,7 +621,7 @@ relookup_failed:
634 } 621 }
635 622
636route_done: 623route_done:
637 if (!icmpv4_xrlim_allow(rt, type, code)) 624 if (!icmpv4_xrlim_allow(net, rt, type, code))
638 goto ende; 625 goto ende;
639 626
640 /* RFC says return as much as we can without exceeding 576 bytes. */ 627 /* RFC says return as much as we can without exceeding 576 bytes. */
@@ -654,7 +641,7 @@ route_done:
654ende: 641ende:
655 ip_rt_put(rt); 642 ip_rt_put(rt);
656out_unlock: 643out_unlock:
657 icmp_xmit_unlock(); 644 icmp_xmit_unlock(sk);
658out:; 645out:;
659} 646}
660 647
@@ -672,7 +659,7 @@ static void icmp_unreach(struct sk_buff *skb)
672 u32 info = 0; 659 u32 info = 0;
673 struct net *net; 660 struct net *net;
674 661
675 net = skb->dst->dev->nd_net; 662 net = dev_net(skb->dst->dev);
676 663
677 /* 664 /*
678 * Incomplete header ? 665 * Incomplete header ?
@@ -740,7 +727,7 @@ static void icmp_unreach(struct sk_buff *skb)
740 * get the other vendor to fix their kit. 727 * get the other vendor to fix their kit.
741 */ 728 */
742 729
743 if (!sysctl_icmp_ignore_bogus_error_responses && 730 if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
744 inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { 731 inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
745 if (net_ratelimit()) 732 if (net_ratelimit())
746 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " 733 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
@@ -835,7 +822,10 @@ out_err:
835 822
836static void icmp_echo(struct sk_buff *skb) 823static void icmp_echo(struct sk_buff *skb)
837{ 824{
838 if (!sysctl_icmp_echo_ignore_all) { 825 struct net *net;
826
827 net = dev_net(skb->dst->dev);
828 if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
839 struct icmp_bxm icmp_param; 829 struct icmp_bxm icmp_param;
840 830
841 icmp_param.data.icmph = *icmp_hdr(skb); 831 icmp_param.data.icmph = *icmp_hdr(skb);
@@ -938,7 +928,7 @@ static void icmp_address(struct sk_buff *skb)
938 928
939static void icmp_address_reply(struct sk_buff *skb) 929static void icmp_address_reply(struct sk_buff *skb)
940{ 930{
941 struct rtable *rt = (struct rtable *)skb->dst; 931 struct rtable *rt = skb->rtable;
942 struct net_device *dev = skb->dev; 932 struct net_device *dev = skb->dev;
943 struct in_device *in_dev; 933 struct in_device *in_dev;
944 struct in_ifaddr *ifa; 934 struct in_ifaddr *ifa;
@@ -983,7 +973,7 @@ static void icmp_discard(struct sk_buff *skb)
983int icmp_rcv(struct sk_buff *skb) 973int icmp_rcv(struct sk_buff *skb)
984{ 974{
985 struct icmphdr *icmph; 975 struct icmphdr *icmph;
986 struct rtable *rt = (struct rtable *)skb->dst; 976 struct rtable *rt = skb->rtable;
987 977
988 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 978 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
989 int nh; 979 int nh;
@@ -1038,6 +1028,9 @@ int icmp_rcv(struct sk_buff *skb)
1038 */ 1028 */
1039 1029
1040 if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 1030 if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
1031 struct net *net;
1032
1033 net = dev_net(rt->u.dst.dev);
1041 /* 1034 /*
1042 * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be 1035 * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
1043 * silently ignored (we let user decide with a sysctl). 1036 * silently ignored (we let user decide with a sysctl).
@@ -1046,7 +1039,7 @@ int icmp_rcv(struct sk_buff *skb)
1046 */ 1039 */
1047 if ((icmph->type == ICMP_ECHO || 1040 if ((icmph->type == ICMP_ECHO ||
1048 icmph->type == ICMP_TIMESTAMP) && 1041 icmph->type == ICMP_TIMESTAMP) &&
1049 sysctl_icmp_echo_ignore_broadcasts) { 1042 net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
1050 goto error; 1043 goto error;
1051 } 1044 }
1052 if (icmph->type != ICMP_ECHO && 1045 if (icmph->type != ICMP_ECHO &&
@@ -1141,38 +1134,84 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
1141 }, 1134 },
1142}; 1135};
1143 1136
1144void __init icmp_init(struct net_proto_family *ops) 1137static void __net_exit icmp_sk_exit(struct net *net)
1145{ 1138{
1146 struct inet_sock *inet;
1147 int i; 1139 int i;
1148 1140
1149 for_each_possible_cpu(i) { 1141 for_each_possible_cpu(i)
1150 int err; 1142 inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
1143 kfree(net->ipv4.icmp_sk);
1144 net->ipv4.icmp_sk = NULL;
1145}
1146
1147int __net_init icmp_sk_init(struct net *net)
1148{
1149 int i, err;
1151 1150
1152 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, 1151 net->ipv4.icmp_sk =
1153 &per_cpu(__icmp_socket, i)); 1152 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
1153 if (net->ipv4.icmp_sk == NULL)
1154 return -ENOMEM;
1154 1155
1156 for_each_possible_cpu(i) {
1157 struct sock *sk;
1158
1159 err = inet_ctl_sock_create(&sk, PF_INET,
1160 SOCK_RAW, IPPROTO_ICMP, net);
1155 if (err < 0) 1161 if (err < 0)
1156 panic("Failed to create the ICMP control socket.\n"); 1162 goto fail;
1157 1163
1158 per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; 1164 net->ipv4.icmp_sk[i] = sk;
1159 1165
1160 /* Enough space for 2 64K ICMP packets, including 1166 /* Enough space for 2 64K ICMP packets, including
1161 * sk_buff struct overhead. 1167 * sk_buff struct overhead.
1162 */ 1168 */
1163 per_cpu(__icmp_socket, i)->sk->sk_sndbuf = 1169 sk->sk_sndbuf =
1164 (2 * ((64 * 1024) + sizeof(struct sk_buff))); 1170 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
1165 1171
1166 inet = inet_sk(per_cpu(__icmp_socket, i)->sk); 1172 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
1167 inet->uc_ttl = -1;
1168 inet->pmtudisc = IP_PMTUDISC_DONT;
1169
1170 /* Unhash it so that IP input processing does not even
1171 * see it, we do not wish this socket to see incoming
1172 * packets.
1173 */
1174 per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
1175 } 1173 }
1174
1175 /* Control parameters for ECHO replies. */
1176 net->ipv4.sysctl_icmp_echo_ignore_all = 0;
1177 net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
1178
1179 /* Control parameter - ignore bogus broadcast responses? */
1180 net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;
1181
1182 /*
1183 * Configurable global rate limit.
1184 *
1185 * ratelimit defines tokens/packet consumed for dst->rate_token
1186 * bucket ratemask defines which icmp types are ratelimited by
1187 * setting it's bit position.
1188 *
1189 * default:
1190 * dest unreachable (3), source quench (4),
1191 * time exceeded (11), parameter problem (12)
1192 */
1193
1194 net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
1195 net->ipv4.sysctl_icmp_ratemask = 0x1818;
1196 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
1197
1198 return 0;
1199
1200fail:
1201 for_each_possible_cpu(i)
1202 inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
1203 kfree(net->ipv4.icmp_sk);
1204 return err;
1205}
1206
1207static struct pernet_operations __net_initdata icmp_sk_ops = {
1208 .init = icmp_sk_init,
1209 .exit = icmp_sk_exit,
1210};
1211
1212int __init icmp_init(void)
1213{
1214 return register_pernet_device(&icmp_sk_ops);
1176} 1215}
1177 1216
1178EXPORT_SYMBOL(icmp_err_convert); 1217EXPORT_SYMBOL(icmp_err_convert);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 732cd07e6071..6250f4239b61 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -130,12 +130,12 @@
130 */ 130 */
131 131
132#define IGMP_V1_SEEN(in_dev) \ 132#define IGMP_V1_SEEN(in_dev) \
133 (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \ 133 (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
134 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ 134 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
135 ((in_dev)->mr_v1_seen && \ 135 ((in_dev)->mr_v1_seen && \
136 time_before(jiffies, (in_dev)->mr_v1_seen))) 136 time_before(jiffies, (in_dev)->mr_v1_seen)))
137#define IGMP_V2_SEEN(in_dev) \ 137#define IGMP_V2_SEEN(in_dev) \
138 (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \ 138 (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
139 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ 139 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
140 ((in_dev)->mr_v2_seen && \ 140 ((in_dev)->mr_v2_seen && \
141 time_before(jiffies, (in_dev)->mr_v2_seen))) 141 time_before(jiffies, (in_dev)->mr_v2_seen)))
@@ -948,7 +948,7 @@ int igmp_rcv(struct sk_buff *skb)
948 case IGMPV2_HOST_MEMBERSHIP_REPORT: 948 case IGMPV2_HOST_MEMBERSHIP_REPORT:
949 case IGMPV3_HOST_MEMBERSHIP_REPORT: 949 case IGMPV3_HOST_MEMBERSHIP_REPORT:
950 /* Is it our report looped back? */ 950 /* Is it our report looped back? */
951 if (((struct rtable*)skb->dst)->fl.iif == 0) 951 if (skb->rtable->fl.iif == 0)
952 break; 952 break;
953 /* don't rely on MC router hearing unicast reports */ 953 /* don't rely on MC router hearing unicast reports */
954 if (skb->pkt_type == PACKET_MULTICAST || 954 if (skb->pkt_type == PACKET_MULTICAST ||
@@ -1198,6 +1198,9 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1198 1198
1199 ASSERT_RTNL(); 1199 ASSERT_RTNL();
1200 1200
1201 if (dev_net(in_dev->dev) != &init_net)
1202 return;
1203
1201 for (im=in_dev->mc_list; im; im=im->next) { 1204 for (im=in_dev->mc_list; im; im=im->next) {
1202 if (im->multiaddr == addr) { 1205 if (im->multiaddr == addr) {
1203 im->users++; 1206 im->users++;
@@ -1277,6 +1280,9 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1277 1280
1278 ASSERT_RTNL(); 1281 ASSERT_RTNL();
1279 1282
1283 if (dev_net(in_dev->dev) != &init_net)
1284 return;
1285
1280 for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { 1286 for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
1281 if (i->multiaddr==addr) { 1287 if (i->multiaddr==addr) {
1282 if (--i->users == 0) { 1288 if (--i->users == 0) {
@@ -1304,6 +1310,9 @@ void ip_mc_down(struct in_device *in_dev)
1304 1310
1305 ASSERT_RTNL(); 1311 ASSERT_RTNL();
1306 1312
1313 if (dev_net(in_dev->dev) != &init_net)
1314 return;
1315
1307 for (i=in_dev->mc_list; i; i=i->next) 1316 for (i=in_dev->mc_list; i; i=i->next)
1308 igmp_group_dropped(i); 1317 igmp_group_dropped(i);
1309 1318
@@ -1324,6 +1333,9 @@ void ip_mc_init_dev(struct in_device *in_dev)
1324{ 1333{
1325 ASSERT_RTNL(); 1334 ASSERT_RTNL();
1326 1335
1336 if (dev_net(in_dev->dev) != &init_net)
1337 return;
1338
1327 in_dev->mc_tomb = NULL; 1339 in_dev->mc_tomb = NULL;
1328#ifdef CONFIG_IP_MULTICAST 1340#ifdef CONFIG_IP_MULTICAST
1329 in_dev->mr_gq_running = 0; 1341 in_dev->mr_gq_running = 0;
@@ -1347,6 +1359,9 @@ void ip_mc_up(struct in_device *in_dev)
1347 1359
1348 ASSERT_RTNL(); 1360 ASSERT_RTNL();
1349 1361
1362 if (dev_net(in_dev->dev) != &init_net)
1363 return;
1364
1350 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); 1365 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
1351 1366
1352 for (i=in_dev->mc_list; i; i=i->next) 1367 for (i=in_dev->mc_list; i; i=i->next)
@@ -1363,6 +1378,9 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
1363 1378
1364 ASSERT_RTNL(); 1379 ASSERT_RTNL();
1365 1380
1381 if (dev_net(in_dev->dev) != &init_net)
1382 return;
1383
1366 /* Deactivate timers */ 1384 /* Deactivate timers */
1367 ip_mc_down(in_dev); 1385 ip_mc_down(in_dev);
1368 1386
@@ -1744,6 +1762,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1744 if (!ipv4_is_multicast(addr)) 1762 if (!ipv4_is_multicast(addr))
1745 return -EINVAL; 1763 return -EINVAL;
1746 1764
1765 if (sock_net(sk) != &init_net)
1766 return -EPROTONOSUPPORT;
1767
1747 rtnl_lock(); 1768 rtnl_lock();
1748 1769
1749 in_dev = ip_mc_find_dev(imr); 1770 in_dev = ip_mc_find_dev(imr);
@@ -1812,6 +1833,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1812 u32 ifindex; 1833 u32 ifindex;
1813 int ret = -EADDRNOTAVAIL; 1834 int ret = -EADDRNOTAVAIL;
1814 1835
1836 if (sock_net(sk) != &init_net)
1837 return -EPROTONOSUPPORT;
1838
1815 rtnl_lock(); 1839 rtnl_lock();
1816 in_dev = ip_mc_find_dev(imr); 1840 in_dev = ip_mc_find_dev(imr);
1817 ifindex = imr->imr_ifindex; 1841 ifindex = imr->imr_ifindex;
@@ -1857,6 +1881,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1857 if (!ipv4_is_multicast(addr)) 1881 if (!ipv4_is_multicast(addr))
1858 return -EINVAL; 1882 return -EINVAL;
1859 1883
1884 if (sock_net(sk) != &init_net)
1885 return -EPROTONOSUPPORT;
1886
1860 rtnl_lock(); 1887 rtnl_lock();
1861 1888
1862 imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; 1889 imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
@@ -1990,6 +2017,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1990 msf->imsf_fmode != MCAST_EXCLUDE) 2017 msf->imsf_fmode != MCAST_EXCLUDE)
1991 return -EINVAL; 2018 return -EINVAL;
1992 2019
2020 if (sock_net(sk) != &init_net)
2021 return -EPROTONOSUPPORT;
2022
1993 rtnl_lock(); 2023 rtnl_lock();
1994 2024
1995 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; 2025 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
@@ -2070,6 +2100,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2070 if (!ipv4_is_multicast(addr)) 2100 if (!ipv4_is_multicast(addr))
2071 return -EINVAL; 2101 return -EINVAL;
2072 2102
2103 if (sock_net(sk) != &init_net)
2104 return -EPROTONOSUPPORT;
2105
2073 rtnl_lock(); 2106 rtnl_lock();
2074 2107
2075 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; 2108 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
@@ -2132,6 +2165,9 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2132 if (!ipv4_is_multicast(addr)) 2165 if (!ipv4_is_multicast(addr))
2133 return -EINVAL; 2166 return -EINVAL;
2134 2167
2168 if (sock_net(sk) != &init_net)
2169 return -EPROTONOSUPPORT;
2170
2135 rtnl_lock(); 2171 rtnl_lock();
2136 2172
2137 err = -EADDRNOTAVAIL; 2173 err = -EADDRNOTAVAIL;
@@ -2216,6 +2252,9 @@ void ip_mc_drop_socket(struct sock *sk)
2216 if (inet->mc_list == NULL) 2252 if (inet->mc_list == NULL)
2217 return; 2253 return;
2218 2254
2255 if (sock_net(sk) != &init_net)
2256 return;
2257
2219 rtnl_lock(); 2258 rtnl_lock();
2220 while ((iml = inet->mc_list) != NULL) { 2259 while ((iml = inet->mc_list) != NULL) {
2221 struct in_device *in_dev; 2260 struct in_device *in_dev;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b189278c7bc1..8d70cfbacb78 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -80,12 +80,12 @@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
80 */ 80 */
81int inet_csk_get_port(struct sock *sk, unsigned short snum) 81int inet_csk_get_port(struct sock *sk, unsigned short snum)
82{ 82{
83 struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; 83 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
84 struct inet_bind_hashbucket *head; 84 struct inet_bind_hashbucket *head;
85 struct hlist_node *node; 85 struct hlist_node *node;
86 struct inet_bind_bucket *tb; 86 struct inet_bind_bucket *tb;
87 int ret; 87 int ret;
88 struct net *net = sk->sk_net; 88 struct net *net = sock_net(sk);
89 89
90 local_bh_disable(); 90 local_bh_disable();
91 if (!snum) { 91 if (!snum) {
@@ -133,8 +133,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
133 goto tb_not_found; 133 goto tb_not_found;
134tb_found: 134tb_found:
135 if (!hlist_empty(&tb->owners)) { 135 if (!hlist_empty(&tb->owners)) {
136 if (sk->sk_reuse > 1)
137 goto success;
138 if (tb->fastreuse > 0 && 136 if (tb->fastreuse > 0 &&
139 sk->sk_reuse && sk->sk_state != TCP_LISTEN) { 137 sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
140 goto success; 138 goto success;
@@ -333,7 +331,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
333 .dport = ireq->rmt_port } } }; 331 .dport = ireq->rmt_port } } };
334 332
335 security_req_classify_flow(req, &fl); 333 security_req_classify_flow(req, &fl);
336 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) { 334 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) {
337 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 335 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
338 return NULL; 336 return NULL;
339 } 337 }
@@ -414,8 +412,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
414 struct inet_connection_sock *icsk = inet_csk(parent); 412 struct inet_connection_sock *icsk = inet_csk(parent);
415 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 413 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
416 struct listen_sock *lopt = queue->listen_opt; 414 struct listen_sock *lopt = queue->listen_opt;
417 int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 415 int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
418 int thresh = max_retries;
419 unsigned long now = jiffies; 416 unsigned long now = jiffies;
420 struct request_sock **reqp, *req; 417 struct request_sock **reqp, *req;
421 int i, budget; 418 int i, budget;
@@ -451,9 +448,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
451 } 448 }
452 } 449 }
453 450
454 if (queue->rskq_defer_accept)
455 max_retries = queue->rskq_defer_accept;
456
457 budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 451 budget = 2 * (lopt->nr_table_entries / (timeout / interval));
458 i = lopt->clock_hand; 452 i = lopt->clock_hand;
459 453
@@ -461,9 +455,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
461 reqp=&lopt->syn_table[i]; 455 reqp=&lopt->syn_table[i];
462 while ((req = *reqp) != NULL) { 456 while ((req = *reqp) != NULL) {
463 if (time_after_eq(now, req->expires)) { 457 if (time_after_eq(now, req->expires)) {
464 if ((req->retrans < thresh || 458 if (req->retrans < thresh &&
465 (inet_rsk(req)->acked && req->retrans < max_retries)) 459 !req->rsk_ops->rtx_syn_ack(parent, req)) {
466 && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
467 unsigned long timeo; 460 unsigned long timeo;
468 461
469 if (req->retrans++ == 0) 462 if (req->retrans++ == 0)
@@ -656,25 +649,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
656 649
657EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 650EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
658 651
659int inet_csk_ctl_sock_create(struct socket **sock, unsigned short family,
660 unsigned short type, unsigned char protocol)
661{
662 int rc = sock_create_kern(family, type, protocol, sock);
663
664 if (rc == 0) {
665 (*sock)->sk->sk_allocation = GFP_ATOMIC;
666 inet_sk((*sock)->sk)->uc_ttl = -1;
667 /*
668 * Unhash it so that IP input processing does not even see it,
669 * we do not wish this socket to see incoming packets.
670 */
671 (*sock)->sk->sk_prot->unhash((*sock)->sk);
672 }
673 return rc;
674}
675
676EXPORT_SYMBOL_GPL(inet_csk_ctl_sock_create);
677
678#ifdef CONFIG_COMPAT 652#ifdef CONFIG_COMPAT
679int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, 653int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
680 char __user *optval, int __user *optlen) 654 char __user *optval, int __user *optlen)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index a0a3c78cb5e0..4ed429bd5951 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -107,10 +107,10 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
107 if (del_timer(&fq->timer)) 107 if (del_timer(&fq->timer))
108 atomic_dec(&fq->refcnt); 108 atomic_dec(&fq->refcnt);
109 109
110 if (!(fq->last_in & COMPLETE)) { 110 if (!(fq->last_in & INET_FRAG_COMPLETE)) {
111 fq_unlink(fq, f); 111 fq_unlink(fq, f);
112 atomic_dec(&fq->refcnt); 112 atomic_dec(&fq->refcnt);
113 fq->last_in |= COMPLETE; 113 fq->last_in |= INET_FRAG_COMPLETE;
114 } 114 }
115} 115}
116 116
@@ -134,7 +134,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
134 struct sk_buff *fp; 134 struct sk_buff *fp;
135 struct netns_frags *nf; 135 struct netns_frags *nf;
136 136
137 BUG_TRAP(q->last_in & COMPLETE); 137 BUG_TRAP(q->last_in & INET_FRAG_COMPLETE);
138 BUG_TRAP(del_timer(&q->timer) == 0); 138 BUG_TRAP(del_timer(&q->timer) == 0);
139 139
140 /* Release all fragment data. */ 140 /* Release all fragment data. */
@@ -177,7 +177,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
177 read_unlock(&f->lock); 177 read_unlock(&f->lock);
178 178
179 spin_lock(&q->lock); 179 spin_lock(&q->lock);
180 if (!(q->last_in & COMPLETE)) 180 if (!(q->last_in & INET_FRAG_COMPLETE))
181 inet_frag_kill(q, f); 181 inet_frag_kill(q, f);
182 spin_unlock(&q->lock); 182 spin_unlock(&q->lock);
183 183
@@ -209,7 +209,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
209 if (qp->net == nf && f->match(qp, arg)) { 209 if (qp->net == nf && f->match(qp, arg)) {
210 atomic_inc(&qp->refcnt); 210 atomic_inc(&qp->refcnt);
211 write_unlock(&f->lock); 211 write_unlock(&f->lock);
212 qp_in->last_in |= COMPLETE; 212 qp_in->last_in |= INET_FRAG_COMPLETE;
213 inet_frag_put(qp_in, f); 213 inet_frag_put(qp_in, f);
214 return qp; 214 return qp;
215 } 215 }
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1aba606f6bbb..32ca2f8b581c 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -68,7 +68,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
68 */ 68 */
69static void __inet_put_port(struct sock *sk) 69static void __inet_put_port(struct sock *sk)
70{ 70{
71 struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; 71 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
72 const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); 72 const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
73 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 73 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
74 struct inet_bind_bucket *tb; 74 struct inet_bind_bucket *tb;
@@ -139,7 +139,7 @@ static struct sock *inet_lookup_listener_slow(struct net *net,
139 sk_for_each(sk, node, head) { 139 sk_for_each(sk, node, head) {
140 const struct inet_sock *inet = inet_sk(sk); 140 const struct inet_sock *inet = inet_sk(sk);
141 141
142 if (sk->sk_net == net && inet->num == hnum && 142 if (net_eq(sock_net(sk), net) && inet->num == hnum &&
143 !ipv6_only_sock(sk)) { 143 !ipv6_only_sock(sk)) {
144 const __be32 rcv_saddr = inet->rcv_saddr; 144 const __be32 rcv_saddr = inet->rcv_saddr;
145 int score = sk->sk_family == PF_INET ? 1 : 0; 145 int score = sk->sk_family == PF_INET ? 1 : 0;
@@ -182,7 +182,7 @@ struct sock *__inet_lookup_listener(struct net *net,
182 if (inet->num == hnum && !sk->sk_node.next && 182 if (inet->num == hnum && !sk->sk_node.next &&
183 (!inet->rcv_saddr || inet->rcv_saddr == daddr) && 183 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
184 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && 184 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
185 !sk->sk_bound_dev_if && sk->sk_net == net) 185 !sk->sk_bound_dev_if && net_eq(sock_net(sk), net))
186 goto sherry_cache; 186 goto sherry_cache;
187 sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); 187 sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif);
188 } 188 }
@@ -254,7 +254,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
254 struct sock *sk2; 254 struct sock *sk2;
255 const struct hlist_node *node; 255 const struct hlist_node *node;
256 struct inet_timewait_sock *tw; 256 struct inet_timewait_sock *tw;
257 struct net *net = sk->sk_net; 257 struct net *net = sock_net(sk);
258 258
259 prefetch(head->chain.first); 259 prefetch(head->chain.first);
260 write_lock(lock); 260 write_lock(lock);
@@ -288,7 +288,7 @@ unique:
288 sk->sk_hash = hash; 288 sk->sk_hash = hash;
289 BUG_TRAP(sk_unhashed(sk)); 289 BUG_TRAP(sk_unhashed(sk));
290 __sk_add_node(sk, &head->chain); 290 __sk_add_node(sk, &head->chain);
291 sock_prot_inuse_add(sk->sk_prot, 1); 291 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
292 write_unlock(lock); 292 write_unlock(lock);
293 293
294 if (twp) { 294 if (twp) {
@@ -318,7 +318,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk)
318 318
319void __inet_hash_nolisten(struct sock *sk) 319void __inet_hash_nolisten(struct sock *sk)
320{ 320{
321 struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; 321 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
322 struct hlist_head *list; 322 struct hlist_head *list;
323 rwlock_t *lock; 323 rwlock_t *lock;
324 struct inet_ehash_bucket *head; 324 struct inet_ehash_bucket *head;
@@ -332,14 +332,14 @@ void __inet_hash_nolisten(struct sock *sk)
332 332
333 write_lock(lock); 333 write_lock(lock);
334 __sk_add_node(sk, list); 334 __sk_add_node(sk, list);
335 sock_prot_inuse_add(sk->sk_prot, 1); 335 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
336 write_unlock(lock); 336 write_unlock(lock);
337} 337}
338EXPORT_SYMBOL_GPL(__inet_hash_nolisten); 338EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
339 339
340static void __inet_hash(struct sock *sk) 340static void __inet_hash(struct sock *sk)
341{ 341{
342 struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; 342 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
343 struct hlist_head *list; 343 struct hlist_head *list;
344 rwlock_t *lock; 344 rwlock_t *lock;
345 345
@@ -354,7 +354,7 @@ static void __inet_hash(struct sock *sk)
354 354
355 inet_listen_wlock(hashinfo); 355 inet_listen_wlock(hashinfo);
356 __sk_add_node(sk, list); 356 __sk_add_node(sk, list);
357 sock_prot_inuse_add(sk->sk_prot, 1); 357 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
358 write_unlock(lock); 358 write_unlock(lock);
359 wake_up(&hashinfo->lhash_wait); 359 wake_up(&hashinfo->lhash_wait);
360} 360}
@@ -372,7 +372,7 @@ EXPORT_SYMBOL_GPL(inet_hash);
372void inet_unhash(struct sock *sk) 372void inet_unhash(struct sock *sk)
373{ 373{
374 rwlock_t *lock; 374 rwlock_t *lock;
375 struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; 375 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
376 376
377 if (sk_unhashed(sk)) 377 if (sk_unhashed(sk))
378 goto out; 378 goto out;
@@ -387,7 +387,7 @@ void inet_unhash(struct sock *sk)
387 } 387 }
388 388
389 if (__sk_del_node_init(sk)) 389 if (__sk_del_node_init(sk))
390 sock_prot_inuse_add(sk->sk_prot, -1); 390 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
391 write_unlock_bh(lock); 391 write_unlock_bh(lock);
392out: 392out:
393 if (sk->sk_state == TCP_LISTEN) 393 if (sk->sk_state == TCP_LISTEN)
@@ -406,7 +406,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
406 struct inet_bind_hashbucket *head; 406 struct inet_bind_hashbucket *head;
407 struct inet_bind_bucket *tb; 407 struct inet_bind_bucket *tb;
408 int ret; 408 int ret;
409 struct net *net = sk->sk_net; 409 struct net *net = sock_net(sk);
410 410
411 if (!snum) { 411 if (!snum) {
412 int i, remaining, low, high, port; 412 int i, remaining, low, high, port;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 717c411a5c6b..a74137866fbc 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -91,7 +91,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
91 91
92 /* Step 2: Remove SK from established hash. */ 92 /* Step 2: Remove SK from established hash. */
93 if (__sk_del_node_init(sk)) 93 if (__sk_del_node_init(sk))
94 sock_prot_inuse_add(sk->sk_prot, -1); 94 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
95 95
96 /* Step 3: Hash TW into TIMEWAIT chain. */ 96 /* Step 3: Hash TW into TIMEWAIT chain. */
97 inet_twsk_add_node(tw, &ehead->twchain); 97 inet_twsk_add_node(tw, &ehead->twchain);
@@ -124,7 +124,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
124 tw->tw_hash = sk->sk_hash; 124 tw->tw_hash = sk->sk_hash;
125 tw->tw_ipv6only = 0; 125 tw->tw_ipv6only = 0;
126 tw->tw_prot = sk->sk_prot_creator; 126 tw->tw_prot = sk->sk_prot_creator;
127 tw->tw_net = sk->sk_net; 127 twsk_net_set(tw, sock_net(sk));
128 atomic_set(&tw->tw_refcnt, 1); 128 atomic_set(&tw->tw_refcnt, 1);
129 inet_twsk_dead_node_init(tw); 129 inet_twsk_dead_node_init(tw);
130 __module_get(tw->tw_prot->owner); 130 __module_get(tw->tw_prot->owner);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index a4506c8cfef0..4813c39b438b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -80,7 +80,7 @@ int ip_forward(struct sk_buff *skb)
80 if (!xfrm4_route_forward(skb)) 80 if (!xfrm4_route_forward(skb))
81 goto drop; 81 goto drop;
82 82
83 rt = (struct rtable*)skb->dst; 83 rt = skb->rtable;
84 84
85 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 85 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
86 goto sr_failed; 86 goto sr_failed;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3b2e5adca838..02ae470fe60c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -194,7 +194,7 @@ static void ip_expire(unsigned long arg)
194 194
195 spin_lock(&qp->q.lock); 195 spin_lock(&qp->q.lock);
196 196
197 if (qp->q.last_in & COMPLETE) 197 if (qp->q.last_in & INET_FRAG_COMPLETE)
198 goto out; 198 goto out;
199 199
200 ipq_kill(qp); 200 ipq_kill(qp);
@@ -202,10 +202,13 @@ static void ip_expire(unsigned long arg)
202 IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); 202 IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
203 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); 203 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
204 204
205 if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) { 205 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
206 struct sk_buff *head = qp->q.fragments; 206 struct sk_buff *head = qp->q.fragments;
207 struct net *net;
208
209 net = container_of(qp->q.net, struct net, ipv4.frags);
207 /* Send an ICMP "Fragment Reassembly Timeout" message. */ 210 /* Send an ICMP "Fragment Reassembly Timeout" message. */
208 if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { 211 if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) {
209 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); 212 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
210 dev_put(head->dev); 213 dev_put(head->dev);
211 } 214 }
@@ -298,7 +301,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
298 int ihl, end; 301 int ihl, end;
299 int err = -ENOENT; 302 int err = -ENOENT;
300 303
301 if (qp->q.last_in & COMPLETE) 304 if (qp->q.last_in & INET_FRAG_COMPLETE)
302 goto err; 305 goto err;
303 306
304 if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && 307 if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
@@ -324,9 +327,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
324 * or have different end, the segment is corrrupted. 327 * or have different end, the segment is corrrupted.
325 */ 328 */
326 if (end < qp->q.len || 329 if (end < qp->q.len ||
327 ((qp->q.last_in & LAST_IN) && end != qp->q.len)) 330 ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len))
328 goto err; 331 goto err;
329 qp->q.last_in |= LAST_IN; 332 qp->q.last_in |= INET_FRAG_LAST_IN;
330 qp->q.len = end; 333 qp->q.len = end;
331 } else { 334 } else {
332 if (end&7) { 335 if (end&7) {
@@ -336,7 +339,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
336 } 339 }
337 if (end > qp->q.len) { 340 if (end > qp->q.len) {
338 /* Some bits beyond end -> corruption. */ 341 /* Some bits beyond end -> corruption. */
339 if (qp->q.last_in & LAST_IN) 342 if (qp->q.last_in & INET_FRAG_LAST_IN)
340 goto err; 343 goto err;
341 qp->q.len = end; 344 qp->q.len = end;
342 } 345 }
@@ -435,9 +438,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
435 qp->q.meat += skb->len; 438 qp->q.meat += skb->len;
436 atomic_add(skb->truesize, &qp->q.net->mem); 439 atomic_add(skb->truesize, &qp->q.net->mem);
437 if (offset == 0) 440 if (offset == 0)
438 qp->q.last_in |= FIRST_IN; 441 qp->q.last_in |= INET_FRAG_FIRST_IN;
439 442
440 if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len) 443 if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
444 qp->q.meat == qp->q.len)
441 return ip_frag_reasm(qp, prev, dev); 445 return ip_frag_reasm(qp, prev, dev);
442 446
443 write_lock(&ip4_frags.lock); 447 write_lock(&ip4_frags.lock);
@@ -568,7 +572,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
568 572
569 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); 573 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
570 574
571 net = skb->dev ? skb->dev->nd_net : skb->dst->dev->nd_net; 575 net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
572 /* Start by cleaning up the memory. */ 576 /* Start by cleaning up the memory. */
573 if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) 577 if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
574 ip_evictor(net); 578 ip_evictor(net);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e7821ba7a9a0..50972b397a9a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -619,7 +619,7 @@ static int ipgre_rcv(struct sk_buff *skb)
619#ifdef CONFIG_NET_IPGRE_BROADCAST 619#ifdef CONFIG_NET_IPGRE_BROADCAST
620 if (ipv4_is_multicast(iph->daddr)) { 620 if (ipv4_is_multicast(iph->daddr)) {
621 /* Looped back packet, drop it! */ 621 /* Looped back packet, drop it! */
622 if (((struct rtable*)skb->dst)->fl.iif == 0) 622 if (skb->rtable->fl.iif == 0)
623 goto drop; 623 goto drop;
624 tunnel->stat.multicast++; 624 tunnel->stat.multicast++;
625 skb->pkt_type = PACKET_BROADCAST; 625 skb->pkt_type = PACKET_BROADCAST;
@@ -699,7 +699,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
699 } 699 }
700 700
701 if (skb->protocol == htons(ETH_P_IP)) { 701 if (skb->protocol == htons(ETH_P_IP)) {
702 rt = (struct rtable*)skb->dst; 702 rt = skb->rtable;
703 if ((dst = rt->rt_gateway) == 0) 703 if ((dst = rt->rt_gateway) == 0)
704 goto tx_error_icmp; 704 goto tx_error_icmp;
705 } 705 }
@@ -1190,7 +1190,7 @@ static int ipgre_close(struct net_device *dev)
1190 struct ip_tunnel *t = netdev_priv(dev); 1190 struct ip_tunnel *t = netdev_priv(dev);
1191 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 1191 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1192 struct in_device *in_dev; 1192 struct in_device *in_dev;
1193 in_dev = inetdev_by_index(dev->nd_net, t->mlink); 1193 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1194 if (in_dev) { 1194 if (in_dev) {
1195 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1195 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1196 in_dev_put(in_dev); 1196 in_dev_put(in_dev);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 65631391d479..4be00959b748 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -160,6 +160,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
160 struct ip_ra_chain *ra; 160 struct ip_ra_chain *ra;
161 u8 protocol = ip_hdr(skb)->protocol; 161 u8 protocol = ip_hdr(skb)->protocol;
162 struct sock *last = NULL; 162 struct sock *last = NULL;
163 struct net_device *dev = skb->dev;
163 164
164 read_lock(&ip_ra_lock); 165 read_lock(&ip_ra_lock);
165 for (ra = ip_ra_chain; ra; ra = ra->next) { 166 for (ra = ip_ra_chain; ra; ra = ra->next) {
@@ -170,7 +171,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
170 */ 171 */
171 if (sk && inet_sk(sk)->num == protocol && 172 if (sk && inet_sk(sk)->num == protocol &&
172 (!sk->sk_bound_dev_if || 173 (!sk->sk_bound_dev_if ||
173 sk->sk_bound_dev_if == skb->dev->ifindex)) { 174 sk->sk_bound_dev_if == dev->ifindex) &&
175 sock_net(sk) == dev_net(dev)) {
174 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 176 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
175 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { 177 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
176 read_unlock(&ip_ra_lock); 178 read_unlock(&ip_ra_lock);
@@ -197,6 +199,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
197 199
198static int ip_local_deliver_finish(struct sk_buff *skb) 200static int ip_local_deliver_finish(struct sk_buff *skb)
199{ 201{
202 struct net *net = dev_net(skb->dev);
203
200 __skb_pull(skb, ip_hdrlen(skb)); 204 __skb_pull(skb, ip_hdrlen(skb));
201 205
202 /* Point into the IP datagram, just past the header. */ 206 /* Point into the IP datagram, just past the header. */
@@ -212,7 +216,8 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
212 raw = raw_local_deliver(skb, protocol); 216 raw = raw_local_deliver(skb, protocol);
213 217
214 hash = protocol & (MAX_INET_PROTOS - 1); 218 hash = protocol & (MAX_INET_PROTOS - 1);
215 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { 219 ipprot = rcu_dereference(inet_protos[hash]);
220 if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) {
216 int ret; 221 int ret;
217 222
218 if (!ipprot->no_policy) { 223 if (!ipprot->no_policy) {
@@ -283,13 +288,14 @@ static inline int ip_rcv_options(struct sk_buff *skb)
283 } 288 }
284 289
285 iph = ip_hdr(skb); 290 iph = ip_hdr(skb);
291 opt = &(IPCB(skb)->opt);
292 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
286 293
287 if (ip_options_compile(NULL, skb)) { 294 if (ip_options_compile(dev_net(dev), opt, skb)) {
288 IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); 295 IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
289 goto drop; 296 goto drop;
290 } 297 }
291 298
292 opt = &(IPCB(skb)->opt);
293 if (unlikely(opt->srr)) { 299 if (unlikely(opt->srr)) {
294 struct in_device *in_dev = in_dev_get(dev); 300 struct in_device *in_dev = in_dev_get(dev);
295 if (in_dev) { 301 if (in_dev) {
@@ -351,7 +357,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
351 if (iph->ihl > 5 && ip_rcv_options(skb)) 357 if (iph->ihl > 5 && ip_rcv_options(skb))
352 goto drop; 358 goto drop;
353 359
354 rt = (struct rtable*)skb->dst; 360 rt = skb->rtable;
355 if (rt->rt_type == RTN_MULTICAST) 361 if (rt->rt_type == RTN_MULTICAST)
356 IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); 362 IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
357 else if (rt->rt_type == RTN_BROADCAST) 363 else if (rt->rt_type == RTN_BROADCAST)
@@ -372,9 +378,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
372 struct iphdr *iph; 378 struct iphdr *iph;
373 u32 len; 379 u32 len;
374 380
375 if (dev->nd_net != &init_net)
376 goto drop;
377
378 /* When the interface is in promisc. mode, drop all the crap 381 /* When the interface is in promisc. mode, drop all the crap
379 * that it receives, do not try to analyse it. 382 * that it receives, do not try to analyse it.
380 */ 383 */
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4d315158fd3c..d107543d3f81 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -45,7 +45,6 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
45 memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); 45 memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
46 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); 46 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
47 opt = &(IPCB(skb)->opt); 47 opt = &(IPCB(skb)->opt);
48 opt->is_data = 0;
49 48
50 if (opt->srr) 49 if (opt->srr)
51 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4); 50 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
@@ -95,8 +94,6 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
95 94
96 memset(dopt, 0, sizeof(struct ip_options)); 95 memset(dopt, 0, sizeof(struct ip_options));
97 96
98 dopt->is_data = 1;
99
100 sopt = &(IPCB(skb)->opt); 97 sopt = &(IPCB(skb)->opt);
101 98
102 if (sopt->optlen == 0) { 99 if (sopt->optlen == 0) {
@@ -107,10 +104,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
107 sptr = skb_network_header(skb); 104 sptr = skb_network_header(skb);
108 dptr = dopt->__data; 105 dptr = dopt->__data;
109 106
110 if (skb->dst) 107 daddr = skb->rtable->rt_spec_dst;
111 daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
112 else
113 daddr = ip_hdr(skb)->daddr;
114 108
115 if (sopt->rr) { 109 if (sopt->rr) {
116 optlen = sptr[sopt->rr+1]; 110 optlen = sptr[sopt->rr+1];
@@ -151,7 +145,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
151 __be32 addr; 145 __be32 addr;
152 146
153 memcpy(&addr, sptr+soffset-1, 4); 147 memcpy(&addr, sptr+soffset-1, 4);
154 if (inet_addr_type(&init_net, addr) != RTN_LOCAL) { 148 if (inet_addr_type(dev_net(skb->dst->dev), addr) != RTN_LOCAL) {
155 dopt->ts_needtime = 1; 149 dopt->ts_needtime = 1;
156 soffset += 8; 150 soffset += 8;
157 } 151 }
@@ -254,26 +248,22 @@ void ip_options_fragment(struct sk_buff * skb)
254 * If opt == NULL, then skb->data should point to IP header. 248 * If opt == NULL, then skb->data should point to IP header.
255 */ 249 */
256 250
257int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) 251int ip_options_compile(struct net *net,
252 struct ip_options * opt, struct sk_buff * skb)
258{ 253{
259 int l; 254 int l;
260 unsigned char * iph; 255 unsigned char * iph;
261 unsigned char * optptr; 256 unsigned char * optptr;
262 int optlen; 257 int optlen;
263 unsigned char * pp_ptr = NULL; 258 unsigned char * pp_ptr = NULL;
264 struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL; 259 struct rtable *rt = NULL;
265 260
266 if (!opt) { 261 if (skb != NULL) {
267 opt = &(IPCB(skb)->opt); 262 rt = skb->rtable;
268 iph = skb_network_header(skb); 263 optptr = (unsigned char *)&(ip_hdr(skb)[1]);
269 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); 264 } else
270 optptr = iph + sizeof(struct iphdr); 265 optptr = opt->__data;
271 opt->is_data = 0; 266 iph = optptr - sizeof(struct iphdr);
272 } else {
273 optptr = opt->is_data ? opt->__data :
274 (unsigned char *)&(ip_hdr(skb)[1]);
275 iph = optptr - sizeof(struct iphdr);
276 }
277 267
278 for (l = opt->optlen; l > 0; ) { 268 for (l = opt->optlen; l > 0; ) {
279 switch (*optptr) { 269 switch (*optptr) {
@@ -400,7 +390,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
400 { 390 {
401 __be32 addr; 391 __be32 addr;
402 memcpy(&addr, &optptr[optptr[2]-1], 4); 392 memcpy(&addr, &optptr[optptr[2]-1], 4);
403 if (inet_addr_type(&init_net, addr) == RTN_UNICAST) 393 if (inet_addr_type(net, addr) == RTN_UNICAST)
404 break; 394 break;
405 if (skb) 395 if (skb)
406 timeptr = (__be32*)&optptr[optptr[2]+3]; 396 timeptr = (__be32*)&optptr[optptr[2]+3];
@@ -517,14 +507,13 @@ static struct ip_options *ip_options_get_alloc(const int optlen)
517 GFP_KERNEL); 507 GFP_KERNEL);
518} 508}
519 509
520static int ip_options_get_finish(struct ip_options **optp, 510static int ip_options_get_finish(struct net *net, struct ip_options **optp,
521 struct ip_options *opt, int optlen) 511 struct ip_options *opt, int optlen)
522{ 512{
523 while (optlen & 3) 513 while (optlen & 3)
524 opt->__data[optlen++] = IPOPT_END; 514 opt->__data[optlen++] = IPOPT_END;
525 opt->optlen = optlen; 515 opt->optlen = optlen;
526 opt->is_data = 1; 516 if (optlen && ip_options_compile(net, opt, NULL)) {
527 if (optlen && ip_options_compile(opt, NULL)) {
528 kfree(opt); 517 kfree(opt);
529 return -EINVAL; 518 return -EINVAL;
530 } 519 }
@@ -533,7 +522,8 @@ static int ip_options_get_finish(struct ip_options **optp,
533 return 0; 522 return 0;
534} 523}
535 524
536int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen) 525int ip_options_get_from_user(struct net *net, struct ip_options **optp,
526 unsigned char __user *data, int optlen)
537{ 527{
538 struct ip_options *opt = ip_options_get_alloc(optlen); 528 struct ip_options *opt = ip_options_get_alloc(optlen);
539 529
@@ -543,10 +533,11 @@ int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *dat
543 kfree(opt); 533 kfree(opt);
544 return -EFAULT; 534 return -EFAULT;
545 } 535 }
546 return ip_options_get_finish(optp, opt, optlen); 536 return ip_options_get_finish(net, optp, opt, optlen);
547} 537}
548 538
549int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) 539int ip_options_get(struct net *net, struct ip_options **optp,
540 unsigned char *data, int optlen)
550{ 541{
551 struct ip_options *opt = ip_options_get_alloc(optlen); 542 struct ip_options *opt = ip_options_get_alloc(optlen);
552 543
@@ -554,14 +545,14 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen)
554 return -ENOMEM; 545 return -ENOMEM;
555 if (optlen) 546 if (optlen)
556 memcpy(opt->__data, data, optlen); 547 memcpy(opt->__data, data, optlen);
557 return ip_options_get_finish(optp, opt, optlen); 548 return ip_options_get_finish(net, optp, opt, optlen);
558} 549}
559 550
560void ip_forward_options(struct sk_buff *skb) 551void ip_forward_options(struct sk_buff *skb)
561{ 552{
562 struct ip_options * opt = &(IPCB(skb)->opt); 553 struct ip_options * opt = &(IPCB(skb)->opt);
563 unsigned char * optptr; 554 unsigned char * optptr;
564 struct rtable *rt = (struct rtable*)skb->dst; 555 struct rtable *rt = skb->rtable;
565 unsigned char *raw = skb_network_header(skb); 556 unsigned char *raw = skb_network_header(skb);
566 557
567 if (opt->rr_needaddr) { 558 if (opt->rr_needaddr) {
@@ -609,7 +600,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
609 __be32 nexthop; 600 __be32 nexthop;
610 struct iphdr *iph = ip_hdr(skb); 601 struct iphdr *iph = ip_hdr(skb);
611 unsigned char *optptr = skb_network_header(skb) + opt->srr; 602 unsigned char *optptr = skb_network_header(skb) + opt->srr;
612 struct rtable *rt = (struct rtable*)skb->dst; 603 struct rtable *rt = skb->rtable;
613 struct rtable *rt2; 604 struct rtable *rt2;
614 int err; 605 int err;
615 606
@@ -634,13 +625,13 @@ int ip_options_rcv_srr(struct sk_buff *skb)
634 } 625 }
635 memcpy(&nexthop, &optptr[srrptr-1], 4); 626 memcpy(&nexthop, &optptr[srrptr-1], 4);
636 627
637 rt = (struct rtable*)skb->dst; 628 rt = skb->rtable;
638 skb->dst = NULL; 629 skb->rtable = NULL;
639 err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); 630 err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
640 rt2 = (struct rtable*)skb->dst; 631 rt2 = skb->rtable;
641 if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { 632 if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
642 ip_rt_put(rt2); 633 ip_rt_put(rt2);
643 skb->dst = &rt->u.dst; 634 skb->rtable = rt;
644 return -EINVAL; 635 return -EINVAL;
645 } 636 }
646 ip_rt_put(rt); 637 ip_rt_put(rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 341779e685d9..08349267ceb4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -142,7 +142,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
142 __be32 saddr, __be32 daddr, struct ip_options *opt) 142 __be32 saddr, __be32 daddr, struct ip_options *opt)
143{ 143{
144 struct inet_sock *inet = inet_sk(sk); 144 struct inet_sock *inet = inet_sk(sk);
145 struct rtable *rt = (struct rtable *)skb->dst; 145 struct rtable *rt = skb->rtable;
146 struct iphdr *iph; 146 struct iphdr *iph;
147 147
148 /* Build the IP header. */ 148 /* Build the IP header. */
@@ -240,7 +240,7 @@ static int ip_finish_output(struct sk_buff *skb)
240int ip_mc_output(struct sk_buff *skb) 240int ip_mc_output(struct sk_buff *skb)
241{ 241{
242 struct sock *sk = skb->sk; 242 struct sock *sk = skb->sk;
243 struct rtable *rt = (struct rtable*)skb->dst; 243 struct rtable *rt = skb->rtable;
244 struct net_device *dev = rt->u.dst.dev; 244 struct net_device *dev = rt->u.dst.dev;
245 245
246 /* 246 /*
@@ -321,7 +321,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
321 /* Skip all of this if the packet is already routed, 321 /* Skip all of this if the packet is already routed,
322 * f.e. by something like SCTP. 322 * f.e. by something like SCTP.
323 */ 323 */
324 rt = (struct rtable *) skb->dst; 324 rt = skb->rtable;
325 if (rt != NULL) 325 if (rt != NULL)
326 goto packet_routed; 326 goto packet_routed;
327 327
@@ -351,7 +351,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
351 * itself out. 351 * itself out.
352 */ 352 */
353 security_sk_classify_flow(sk, &fl); 353 security_sk_classify_flow(sk, &fl);
354 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) 354 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
355 goto no_route; 355 goto no_route;
356 } 356 }
357 sk_setup_caps(sk, &rt->u.dst); 357 sk_setup_caps(sk, &rt->u.dst);
@@ -441,7 +441,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
441 unsigned int mtu, hlen, left, len, ll_rs, pad; 441 unsigned int mtu, hlen, left, len, ll_rs, pad;
442 int offset; 442 int offset;
443 __be16 not_last_frag; 443 __be16 not_last_frag;
444 struct rtable *rt = (struct rtable*)skb->dst; 444 struct rtable *rt = skb->rtable;
445 int err = 0; 445 int err = 0;
446 446
447 dev = rt->u.dst.dev; 447 dev = rt->u.dst.dev;
@@ -825,7 +825,7 @@ int ip_append_data(struct sock *sk,
825 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? 825 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
826 rt->u.dst.dev->mtu : 826 rt->u.dst.dev->mtu :
827 dst_mtu(rt->u.dst.path); 827 dst_mtu(rt->u.dst.path);
828 inet->cork.rt = rt; 828 inet->cork.dst = &rt->u.dst;
829 inet->cork.length = 0; 829 inet->cork.length = 0;
830 sk->sk_sndmsg_page = NULL; 830 sk->sk_sndmsg_page = NULL;
831 sk->sk_sndmsg_off = 0; 831 sk->sk_sndmsg_off = 0;
@@ -834,7 +834,7 @@ int ip_append_data(struct sock *sk,
834 transhdrlen += exthdrlen; 834 transhdrlen += exthdrlen;
835 } 835 }
836 } else { 836 } else {
837 rt = inet->cork.rt; 837 rt = (struct rtable *)inet->cork.dst;
838 if (inet->cork.flags & IPCORK_OPT) 838 if (inet->cork.flags & IPCORK_OPT)
839 opt = inet->cork.opt; 839 opt = inet->cork.opt;
840 840
@@ -1083,7 +1083,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1083 if (skb_queue_empty(&sk->sk_write_queue)) 1083 if (skb_queue_empty(&sk->sk_write_queue))
1084 return -EINVAL; 1084 return -EINVAL;
1085 1085
1086 rt = inet->cork.rt; 1086 rt = (struct rtable *)inet->cork.dst;
1087 if (inet->cork.flags & IPCORK_OPT) 1087 if (inet->cork.flags & IPCORK_OPT)
1088 opt = inet->cork.opt; 1088 opt = inet->cork.opt;
1089 1089
@@ -1208,10 +1208,8 @@ static void ip_cork_release(struct inet_sock *inet)
1208 inet->cork.flags &= ~IPCORK_OPT; 1208 inet->cork.flags &= ~IPCORK_OPT;
1209 kfree(inet->cork.opt); 1209 kfree(inet->cork.opt);
1210 inet->cork.opt = NULL; 1210 inet->cork.opt = NULL;
1211 if (inet->cork.rt) { 1211 dst_release(inet->cork.dst);
1212 ip_rt_put(inet->cork.rt); 1212 inet->cork.dst = NULL;
1213 inet->cork.rt = NULL;
1214 }
1215} 1213}
1216 1214
1217/* 1215/*
@@ -1224,7 +1222,7 @@ int ip_push_pending_frames(struct sock *sk)
1224 struct sk_buff **tail_skb; 1222 struct sk_buff **tail_skb;
1225 struct inet_sock *inet = inet_sk(sk); 1223 struct inet_sock *inet = inet_sk(sk);
1226 struct ip_options *opt = NULL; 1224 struct ip_options *opt = NULL;
1227 struct rtable *rt = inet->cork.rt; 1225 struct rtable *rt = (struct rtable *)inet->cork.dst;
1228 struct iphdr *iph; 1226 struct iphdr *iph;
1229 __be16 df = 0; 1227 __be16 df = 0;
1230 __u8 ttl; 1228 __u8 ttl;
@@ -1357,7 +1355,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1357 } replyopts; 1355 } replyopts;
1358 struct ipcm_cookie ipc; 1356 struct ipcm_cookie ipc;
1359 __be32 daddr; 1357 __be32 daddr;
1360 struct rtable *rt = (struct rtable*)skb->dst; 1358 struct rtable *rt = skb->rtable;
1361 1359
1362 if (ip_options_echo(&replyopts.opt, skb)) 1360 if (ip_options_echo(&replyopts.opt, skb))
1363 return; 1361 return;
@@ -1384,7 +1382,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1384 .dport = tcp_hdr(skb)->source } }, 1382 .dport = tcp_hdr(skb)->source } },
1385 .proto = sk->sk_protocol }; 1383 .proto = sk->sk_protocol };
1386 security_skb_classify_flow(skb, &fl); 1384 security_skb_classify_flow(skb, &fl);
1387 if (ip_route_output_key(sk->sk_net, &rt, &fl)) 1385 if (ip_route_output_key(sock_net(sk), &rt, &fl))
1388 return; 1386 return;
1389 } 1387 }
1390 1388
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c2921d01e925..d8adfd4972e2 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -57,7 +57,7 @@
57static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 57static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
58{ 58{
59 struct in_pktinfo info; 59 struct in_pktinfo info;
60 struct rtable *rt = (struct rtable *)skb->dst; 60 struct rtable *rt = skb->rtable;
61 61
62 info.ipi_addr.s_addr = ip_hdr(skb)->daddr; 62 info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
63 if (rt) { 63 if (rt) {
@@ -163,7 +163,7 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
163 ip_cmsg_recv_security(msg, skb); 163 ip_cmsg_recv_security(msg, skb);
164} 164}
165 165
166int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc) 166int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
167{ 167{
168 int err; 168 int err;
169 struct cmsghdr *cmsg; 169 struct cmsghdr *cmsg;
@@ -176,7 +176,7 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
176 switch (cmsg->cmsg_type) { 176 switch (cmsg->cmsg_type) {
177 case IP_RETOPTS: 177 case IP_RETOPTS:
178 err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); 178 err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
179 err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); 179 err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40);
180 if (err) 180 if (err)
181 return err; 181 return err;
182 break; 182 break;
@@ -449,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
449 struct ip_options * opt = NULL; 449 struct ip_options * opt = NULL;
450 if (optlen > 40 || optlen < 0) 450 if (optlen > 40 || optlen < 0)
451 goto e_inval; 451 goto e_inval;
452 err = ip_options_get_from_user(&opt, optval, optlen); 452 err = ip_options_get_from_user(sock_net(sk), &opt,
453 optval, optlen);
453 if (err) 454 if (err)
454 break; 455 break;
455 if (inet->is_icsk) { 456 if (inet->is_icsk) {
@@ -589,13 +590,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
589 err = 0; 590 err = 0;
590 break; 591 break;
591 } 592 }
592 dev = ip_dev_find(&init_net, mreq.imr_address.s_addr); 593 dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
593 if (dev) { 594 if (dev) {
594 mreq.imr_ifindex = dev->ifindex; 595 mreq.imr_ifindex = dev->ifindex;
595 dev_put(dev); 596 dev_put(dev);
596 } 597 }
597 } else 598 } else
598 dev = __dev_get_by_index(&init_net, mreq.imr_ifindex); 599 dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
599 600
600 601
601 err = -EADDRNOTAVAIL; 602 err = -EADDRNOTAVAIL;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 7c992fbbc2c3..08e8fb60d315 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -292,7 +292,7 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
292 292
293 mm_segment_t oldfs = get_fs(); 293 mm_segment_t oldfs = get_fs();
294 set_fs(get_ds()); 294 set_fs(get_ds());
295 res = devinet_ioctl(cmd, (struct ifreq __user *) arg); 295 res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
296 set_fs(oldfs); 296 set_fs(oldfs);
297 return res; 297 return res;
298} 298}
@@ -434,7 +434,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
434 unsigned char *sha, *tha; /* s for "source", t for "target" */ 434 unsigned char *sha, *tha; /* s for "source", t for "target" */
435 struct ic_device *d; 435 struct ic_device *d;
436 436
437 if (dev->nd_net != &init_net) 437 if (dev_net(dev) != &init_net)
438 goto drop; 438 goto drop;
439 439
440 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 440 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -460,10 +460,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
460 if (rarp->ar_pro != htons(ETH_P_IP)) 460 if (rarp->ar_pro != htons(ETH_P_IP))
461 goto drop; 461 goto drop;
462 462
463 if (!pskb_may_pull(skb, 463 if (!pskb_may_pull(skb, arp_hdr_len(dev)))
464 sizeof(struct arphdr) +
465 (2 * dev->addr_len) +
466 (2 * 4)))
467 goto drop; 464 goto drop;
468 465
469 /* OK, it is all there and looks valid, process... */ 466 /* OK, it is all there and looks valid, process... */
@@ -857,7 +854,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
857 struct ic_device *d; 854 struct ic_device *d;
858 int len, ext_len; 855 int len, ext_len;
859 856
860 if (dev->nd_net != &init_net) 857 if (dev_net(dev) != &init_net)
861 goto drop; 858 goto drop;
862 859
863 /* Perform verifications before taking the lock. */ 860 /* Perform verifications before taking the lock. */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index dbaed69de06a..894bce96284a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -528,7 +528,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
528 528
529 if (!dst) { 529 if (!dst) {
530 /* NBMA tunnel */ 530 /* NBMA tunnel */
531 if ((rt = (struct rtable*)skb->dst) == NULL) { 531 if ((rt = skb->rtable) == NULL) {
532 tunnel->stat.tx_fifo_errors++; 532 tunnel->stat.tx_fifo_errors++;
533 goto tx_error; 533 goto tx_error;
534 } 534 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a94f52c207a7..11700a4dcd95 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -849,7 +849,7 @@ static void mrtsock_destruct(struct sock *sk)
849{ 849{
850 rtnl_lock(); 850 rtnl_lock();
851 if (sk == mroute_socket) { 851 if (sk == mroute_socket) {
852 IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--; 852 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
853 853
854 write_lock_bh(&mrt_lock); 854 write_lock_bh(&mrt_lock);
855 mroute_socket=NULL; 855 mroute_socket=NULL;
@@ -898,7 +898,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
898 mroute_socket=sk; 898 mroute_socket=sk;
899 write_unlock_bh(&mrt_lock); 899 write_unlock_bh(&mrt_lock);
900 900
901 IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++; 901 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
902 } 902 }
903 rtnl_unlock(); 903 rtnl_unlock();
904 return ret; 904 return ret;
@@ -1089,7 +1089,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1089 struct vif_device *v; 1089 struct vif_device *v;
1090 int ct; 1090 int ct;
1091 1091
1092 if (dev->nd_net != &init_net) 1092 if (dev_net(dev) != &init_net)
1093 return NOTIFY_DONE; 1093 return NOTIFY_DONE;
1094 1094
1095 if (event != NETDEV_UNREGISTER) 1095 if (event != NETDEV_UNREGISTER)
@@ -1283,7 +1283,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1283 if (vif_table[vif].dev != skb->dev) { 1283 if (vif_table[vif].dev != skb->dev) {
1284 int true_vifi; 1284 int true_vifi;
1285 1285
1286 if (((struct rtable*)skb->dst)->fl.iif == 0) { 1286 if (skb->rtable->fl.iif == 0) {
1287 /* It is our own packet, looped back. 1287 /* It is our own packet, looped back.
1288 Very complicated situation... 1288 Very complicated situation...
1289 1289
@@ -1357,7 +1357,7 @@ dont_forward:
1357int ip_mr_input(struct sk_buff *skb) 1357int ip_mr_input(struct sk_buff *skb)
1358{ 1358{
1359 struct mfc_cache *cache; 1359 struct mfc_cache *cache;
1360 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; 1360 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1361 1361
1362 /* Packet is looped back after forward, it should not be 1362 /* Packet is looped back after forward, it should not be
1363 forwarded second time, but still can be delivered locally. 1363 forwarded second time, but still can be delivered locally.
@@ -1594,7 +1594,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1594{ 1594{
1595 int err; 1595 int err;
1596 struct mfc_cache *cache; 1596 struct mfc_cache *cache;
1597 struct rtable *rt = (struct rtable*)skb->dst; 1597 struct rtable *rt = skb->rtable;
1598 1598
1599 read_lock(&mrt_lock); 1599 read_lock(&mrt_lock);
1600 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); 1600 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 12dc0d640b6d..620e40ff79a9 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -550,7 +550,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
550 550
551 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" 551 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
552 "%u.%u.%u.%u:%u to app %s on port %u\n", 552 "%u.%u.%u.%u:%u to app %s on port %u\n",
553 __FUNCTION__, 553 __func__,
554 NIPQUAD(cp->caddr), ntohs(cp->cport), 554 NIPQUAD(cp->caddr), ntohs(cp->cport),
555 NIPQUAD(cp->vaddr), ntohs(cp->vport), 555 NIPQUAD(cp->vaddr), ntohs(cp->vport),
556 inc->name, ntohs(inc->port)); 556 inc->name, ntohs(inc->port));
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 1fa7b330b9ac..1caa2908373f 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -344,7 +344,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
344 344
345 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" 345 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
346 "%u.%u.%u.%u:%u to app %s on port %u\n", 346 "%u.%u.%u.%u:%u to app %s on port %u\n",
347 __FUNCTION__, 347 __func__,
348 NIPQUAD(cp->caddr), ntohs(cp->cport), 348 NIPQUAD(cp->caddr), ntohs(cp->cport),
349 NIPQUAD(cp->vaddr), ntohs(cp->vport), 349 NIPQUAD(cp->vaddr), ntohs(cp->vport),
350 inc->name, ntohs(inc->port)); 350 inc->name, ntohs(inc->port));
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 948378d0a755..69c56663cc9a 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -916,7 +916,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
916 if (!tinfo) 916 if (!tinfo)
917 return -ENOMEM; 917 return -ENOMEM;
918 918
919 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current)); 919 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
920 IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", 920 IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
921 sizeof(struct ip_vs_sync_conn)); 921 sizeof(struct ip_vs_sync_conn));
922 922
@@ -956,7 +956,7 @@ int stop_sync_thread(int state)
956 (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) 956 (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
957 return -ESRCH; 957 return -ESRCH;
958 958
959 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current)); 959 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
960 IP_VS_INFO("stopping sync thread %d ...\n", 960 IP_VS_INFO("stopping sync thread %d ...\n",
961 (state == IP_VS_STATE_MASTER) ? 961 (state == IP_VS_STATE_MASTER) ?
962 sync_master_pid : sync_backup_pid); 962 sync_master_pid : sync_backup_pid);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index a7591ce344d2..1563f29b5117 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -52,7 +52,7 @@ MODULE_DESCRIPTION("arptables core");
52do { \ 52do { \
53 if (!(x)) \ 53 if (!(x)) \
54 printk("ARP_NF_ASSERT: %s:%s:%u\n", \ 54 printk("ARP_NF_ASSERT: %s:%s:%u\n", \
55 __FUNCTION__, __FILE__, __LINE__); \ 55 __func__, __FILE__, __LINE__); \
56} while(0) 56} while(0)
57#else 57#else
58#define ARP_NF_ASSERT(x) 58#define ARP_NF_ASSERT(x)
@@ -233,10 +233,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
233 void *table_base; 233 void *table_base;
234 struct xt_table_info *private; 234 struct xt_table_info *private;
235 235
236 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ 236 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
237 if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
238 (2 * skb->dev->addr_len) +
239 (2 * sizeof(u32)))))
240 return NF_DROP; 237 return NF_DROP;
241 238
242 indev = in ? in->name : nulldevname; 239 indev = in ? in->name : nulldevname;
@@ -1499,11 +1496,11 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
1499 1496
1500 switch (cmd) { 1497 switch (cmd) {
1501 case ARPT_SO_SET_REPLACE: 1498 case ARPT_SO_SET_REPLACE:
1502 ret = compat_do_replace(sk->sk_net, user, len); 1499 ret = compat_do_replace(sock_net(sk), user, len);
1503 break; 1500 break;
1504 1501
1505 case ARPT_SO_SET_ADD_COUNTERS: 1502 case ARPT_SO_SET_ADD_COUNTERS:
1506 ret = do_add_counters(sk->sk_net, user, len, 1); 1503 ret = do_add_counters(sock_net(sk), user, len, 1);
1507 break; 1504 break;
1508 1505
1509 default: 1506 default:
@@ -1647,10 +1644,10 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
1647 1644
1648 switch (cmd) { 1645 switch (cmd) {
1649 case ARPT_SO_GET_INFO: 1646 case ARPT_SO_GET_INFO:
1650 ret = get_info(sk->sk_net, user, len, 1); 1647 ret = get_info(sock_net(sk), user, len, 1);
1651 break; 1648 break;
1652 case ARPT_SO_GET_ENTRIES: 1649 case ARPT_SO_GET_ENTRIES:
1653 ret = compat_get_entries(sk->sk_net, user, len); 1650 ret = compat_get_entries(sock_net(sk), user, len);
1654 break; 1651 break;
1655 default: 1652 default:
1656 ret = do_arpt_get_ctl(sk, cmd, user, len); 1653 ret = do_arpt_get_ctl(sk, cmd, user, len);
@@ -1668,11 +1665,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
1668 1665
1669 switch (cmd) { 1666 switch (cmd) {
1670 case ARPT_SO_SET_REPLACE: 1667 case ARPT_SO_SET_REPLACE:
1671 ret = do_replace(sk->sk_net, user, len); 1668 ret = do_replace(sock_net(sk), user, len);
1672 break; 1669 break;
1673 1670
1674 case ARPT_SO_SET_ADD_COUNTERS: 1671 case ARPT_SO_SET_ADD_COUNTERS:
1675 ret = do_add_counters(sk->sk_net, user, len, 0); 1672 ret = do_add_counters(sock_net(sk), user, len, 0);
1676 break; 1673 break;
1677 1674
1678 default: 1675 default:
@@ -1692,11 +1689,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1692 1689
1693 switch (cmd) { 1690 switch (cmd) {
1694 case ARPT_SO_GET_INFO: 1691 case ARPT_SO_GET_INFO:
1695 ret = get_info(sk->sk_net, user, len, 0); 1692 ret = get_info(sock_net(sk), user, len, 0);
1696 break; 1693 break;
1697 1694
1698 case ARPT_SO_GET_ENTRIES: 1695 case ARPT_SO_GET_ENTRIES:
1699 ret = get_entries(sk->sk_net, user, len); 1696 ret = get_entries(sock_net(sk), user, len);
1700 break; 1697 break;
1701 1698
1702 case ARPT_SO_GET_REVISION_TARGET: { 1699 case ARPT_SO_GET_REVISION_TARGET: {
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 4dc162894cb2..719be29f7506 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -481,7 +481,7 @@ ipq_rcv_dev_event(struct notifier_block *this,
481{ 481{
482 struct net_device *dev = ptr; 482 struct net_device *dev = ptr;
483 483
484 if (dev->nd_net != &init_net) 484 if (dev_net(dev) != &init_net)
485 return NOTIFY_DONE; 485 return NOTIFY_DONE;
486 486
487 /* Drop any packets associated with the downed device */ 487 /* Drop any packets associated with the downed device */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 600737f122d2..a819d191e1aa 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -53,7 +53,7 @@ MODULE_DESCRIPTION("IPv4 packet filter");
53do { \ 53do { \
54 if (!(x)) \ 54 if (!(x)) \
55 printk("IP_NF_ASSERT: %s:%s:%u\n", \ 55 printk("IP_NF_ASSERT: %s:%s:%u\n", \
56 __FUNCTION__, __FILE__, __LINE__); \ 56 __func__, __FILE__, __LINE__); \
57} while(0) 57} while(0)
58#else 58#else
59#define IP_NF_ASSERT(x) 59#define IP_NF_ASSERT(x)
@@ -1852,11 +1852,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1852 1852
1853 switch (cmd) { 1853 switch (cmd) {
1854 case IPT_SO_SET_REPLACE: 1854 case IPT_SO_SET_REPLACE:
1855 ret = compat_do_replace(sk->sk_net, user, len); 1855 ret = compat_do_replace(sock_net(sk), user, len);
1856 break; 1856 break;
1857 1857
1858 case IPT_SO_SET_ADD_COUNTERS: 1858 case IPT_SO_SET_ADD_COUNTERS:
1859 ret = do_add_counters(sk->sk_net, user, len, 1); 1859 ret = do_add_counters(sock_net(sk), user, len, 1);
1860 break; 1860 break;
1861 1861
1862 default: 1862 default:
@@ -1963,10 +1963,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1963 1963
1964 switch (cmd) { 1964 switch (cmd) {
1965 case IPT_SO_GET_INFO: 1965 case IPT_SO_GET_INFO:
1966 ret = get_info(sk->sk_net, user, len, 1); 1966 ret = get_info(sock_net(sk), user, len, 1);
1967 break; 1967 break;
1968 case IPT_SO_GET_ENTRIES: 1968 case IPT_SO_GET_ENTRIES:
1969 ret = compat_get_entries(sk->sk_net, user, len); 1969 ret = compat_get_entries(sock_net(sk), user, len);
1970 break; 1970 break;
1971 default: 1971 default:
1972 ret = do_ipt_get_ctl(sk, cmd, user, len); 1972 ret = do_ipt_get_ctl(sk, cmd, user, len);
@@ -1985,11 +1985,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1985 1985
1986 switch (cmd) { 1986 switch (cmd) {
1987 case IPT_SO_SET_REPLACE: 1987 case IPT_SO_SET_REPLACE:
1988 ret = do_replace(sk->sk_net, user, len); 1988 ret = do_replace(sock_net(sk), user, len);
1989 break; 1989 break;
1990 1990
1991 case IPT_SO_SET_ADD_COUNTERS: 1991 case IPT_SO_SET_ADD_COUNTERS:
1992 ret = do_add_counters(sk->sk_net, user, len, 0); 1992 ret = do_add_counters(sock_net(sk), user, len, 0);
1993 break; 1993 break;
1994 1994
1995 default: 1995 default:
@@ -2010,11 +2010,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2010 2010
2011 switch (cmd) { 2011 switch (cmd) {
2012 case IPT_SO_GET_INFO: 2012 case IPT_SO_GET_INFO:
2013 ret = get_info(sk->sk_net, user, len, 0); 2013 ret = get_info(sock_net(sk), user, len, 0);
2014 break; 2014 break;
2015 2015
2016 case IPT_SO_GET_ENTRIES: 2016 case IPT_SO_GET_ENTRIES:
2017 ret = get_entries(sk->sk_net, user, len); 2017 ret = get_entries(sock_net(sk), user, len);
2018 break; 2018 break;
2019 2019
2020 case IPT_SO_GET_REVISION_MATCH: 2020 case IPT_SO_GET_REVISION_MATCH:
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a12dd329e208..380d8daac72b 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -333,7 +333,7 @@ clusterip_tg(struct sk_buff *skb, const struct net_device *in,
333 } 333 }
334 334
335#ifdef DEBUG 335#ifdef DEBUG
336 DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 336 NF_CT_DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
337#endif 337#endif
338 pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); 338 pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
339 if (!clusterip_responsible(cipinfo->config, hash)) { 339 if (!clusterip_responsible(cipinfo->config, hash)) {
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d80fee8327e4..84c26dd27d81 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -77,7 +77,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in,
77 return NF_ACCEPT; 77 return NF_ACCEPT;
78 78
79 mr = targinfo; 79 mr = targinfo;
80 rt = (struct rtable *)skb->dst; 80 rt = skb->rtable;
81 newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); 81 newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
82 if (!newsrc) { 82 if (!newsrc) {
83 printk("MASQUERADE: %s ate my IP address\n", out->name); 83 printk("MASQUERADE: %s ate my IP address\n", out->name);
@@ -120,7 +120,7 @@ static int masq_device_event(struct notifier_block *this,
120{ 120{
121 const struct net_device *dev = ptr; 121 const struct net_device *dev = ptr;
122 122
123 if (dev->nd_net != &init_net) 123 if (dev_net(dev) != &init_net)
124 return NOTIFY_DONE; 124 return NOTIFY_DONE;
125 125
126 if (event == NETDEV_DOWN) { 126 if (event == NETDEV_DOWN) {
@@ -139,18 +139,8 @@ static int masq_inet_event(struct notifier_block *this,
139 unsigned long event, 139 unsigned long event,
140 void *ptr) 140 void *ptr)
141{ 141{
142 const struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; 142 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
143 143 return masq_device_event(this, event, dev);
144 if (event == NETDEV_DOWN) {
145 /* IP address was deleted. Search entire table for
146 conntracks which were associated with that device,
147 and forget them. */
148 NF_CT_ASSERT(dev->ifindex != 0);
149
150 nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
151 }
152
153 return NOTIFY_DONE;
154} 144}
155 145
156static struct notifier_block masq_dev_notifier = { 146static struct notifier_block masq_dev_notifier = {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f500b0fdaef4..e60b885d2dcd 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -379,7 +379,7 @@ static const struct file_operations ct_cpu_seq_fops = {
379 .open = ct_cpu_seq_open, 379 .open = ct_cpu_seq_open,
380 .read = seq_read, 380 .read = seq_read,
381 .llseek = seq_lseek, 381 .llseek = seq_lseek,
382 .release = seq_release_private, 382 .release = seq_release,
383}; 383};
384 384
385int __init nf_conntrack_ipv4_compat_init(void) 385int __init nf_conntrack_ipv4_compat_init(void)
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index ca57f47bbd25..2fca727aa8ba 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -139,7 +139,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
139 const char *rep_buffer, 139 const char *rep_buffer,
140 unsigned int rep_len) 140 unsigned int rep_len)
141{ 141{
142 struct rtable *rt = (struct rtable *)skb->dst; 142 struct rtable *rt = skb->rtable;
143 struct iphdr *iph; 143 struct iphdr *iph;
144 struct tcphdr *tcph; 144 struct tcphdr *tcph;
145 int oldlen, datalen; 145 int oldlen, datalen;
@@ -217,7 +217,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
217 const char *rep_buffer, 217 const char *rep_buffer,
218 unsigned int rep_len) 218 unsigned int rep_len)
219{ 219{
220 struct rtable *rt = (struct rtable *)skb->dst; 220 struct rtable *rt = skb->rtable;
221 struct iphdr *iph; 221 struct iphdr *iph;
222 struct udphdr *udph; 222 struct udphdr *udph;
223 int datalen, oldlen; 223 int datalen, oldlen;
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b4c8d4968bb2..4334d5cabc5b 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -2,6 +2,8 @@
2 * 2 *
3 * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> 3 * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
4 * based on RR's ip_nat_ftp.c and other modules. 4 * based on RR's ip_nat_ftp.c and other modules.
5 * (C) 2007 United Security Providers
6 * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
5 * 7 *
6 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -26,275 +28,461 @@ MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
26MODULE_DESCRIPTION("SIP NAT helper"); 28MODULE_DESCRIPTION("SIP NAT helper");
27MODULE_ALIAS("ip_nat_sip"); 29MODULE_ALIAS("ip_nat_sip");
28 30
29struct addr_map {
30 struct {
31 char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
32 char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
33 unsigned int srclen, srciplen;
34 unsigned int dstlen, dstiplen;
35 } addr[IP_CT_DIR_MAX];
36};
37 31
38static void addr_map_init(const struct nf_conn *ct, struct addr_map *map) 32static unsigned int mangle_packet(struct sk_buff *skb,
33 const char **dptr, unsigned int *datalen,
34 unsigned int matchoff, unsigned int matchlen,
35 const char *buffer, unsigned int buflen)
39{ 36{
40 const struct nf_conntrack_tuple *t; 37 enum ip_conntrack_info ctinfo;
41 enum ip_conntrack_dir dir; 38 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
42 unsigned int n; 39
43 40 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen,
44 for (dir = 0; dir < IP_CT_DIR_MAX; dir++) { 41 buffer, buflen))
45 t = &ct->tuplehash[dir].tuple; 42 return 0;
46 43
47 n = sprintf(map->addr[dir].src, "%u.%u.%u.%u", 44 /* Reload data pointer and adjust datalen value */
48 NIPQUAD(t->src.u3.ip)); 45 *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
49 map->addr[dir].srciplen = n; 46 *datalen += buflen - matchlen;
50 n += sprintf(map->addr[dir].src + n, ":%u", 47 return 1;
51 ntohs(t->src.u.udp.port));
52 map->addr[dir].srclen = n;
53
54 n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u",
55 NIPQUAD(t->dst.u3.ip));
56 map->addr[dir].dstiplen = n;
57 n += sprintf(map->addr[dir].dst + n, ":%u",
58 ntohs(t->dst.u.udp.port));
59 map->addr[dir].dstlen = n;
60 }
61} 48}
62 49
63static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, 50static int map_addr(struct sk_buff *skb,
64 struct nf_conn *ct, const char **dptr, size_t dlen, 51 const char **dptr, unsigned int *datalen,
65 enum sip_header_pos pos, struct addr_map *map) 52 unsigned int matchoff, unsigned int matchlen,
53 union nf_inet_addr *addr, __be16 port)
66{ 54{
55 enum ip_conntrack_info ctinfo;
56 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
67 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 57 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
68 unsigned int matchlen, matchoff, addrlen; 58 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
69 char *addr; 59 unsigned int buflen;
70 60 __be32 newaddr;
71 if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) 61 __be16 newport;
62
63 if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip &&
64 ct->tuplehash[dir].tuple.src.u.udp.port == port) {
65 newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
66 newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
67 } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip &&
68 ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
69 newaddr = ct->tuplehash[!dir].tuple.src.u3.ip;
70 newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
71 } else
72 return 1; 72 return 1;
73 73
74 if ((matchlen == map->addr[dir].srciplen || 74 if (newaddr == addr->ip && newport == port)
75 matchlen == map->addr[dir].srclen) &&
76 memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) {
77 addr = map->addr[!dir].dst;
78 addrlen = map->addr[!dir].dstlen;
79 } else if ((matchlen == map->addr[dir].dstiplen ||
80 matchlen == map->addr[dir].dstlen) &&
81 memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) {
82 addr = map->addr[!dir].src;
83 addrlen = map->addr[!dir].srclen;
84 } else
85 return 1; 75 return 1;
86 76
87 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, 77 buflen = sprintf(buffer, "%u.%u.%u.%u:%u",
88 matchoff, matchlen, addr, addrlen)) 78 NIPQUAD(newaddr), ntohs(newport));
89 return 0;
90 *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
91 return 1;
92 79
80 return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
81 buffer, buflen);
93} 82}
94 83
95static unsigned int ip_nat_sip(struct sk_buff *skb, 84static int map_sip_addr(struct sk_buff *skb,
96 enum ip_conntrack_info ctinfo, 85 const char **dptr, unsigned int *datalen,
97 struct nf_conn *ct, 86 enum sip_header_types type)
98 const char **dptr)
99{ 87{
100 enum sip_header_pos pos; 88 enum ip_conntrack_info ctinfo;
101 struct addr_map map; 89 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
102 int dataoff, datalen; 90 unsigned int matchlen, matchoff;
91 union nf_inet_addr addr;
92 __be16 port;
103 93
104 dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); 94 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
105 datalen = skb->len - dataoff; 95 &matchoff, &matchlen, &addr, &port) <= 0)
106 if (datalen < sizeof("SIP/2.0") - 1) 96 return 1;
107 return NF_ACCEPT; 97 return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port);
98}
108 99
109 addr_map_init(ct, &map); 100static unsigned int ip_nat_sip(struct sk_buff *skb,
101 const char **dptr, unsigned int *datalen)
102{
103 enum ip_conntrack_info ctinfo;
104 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
105 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
106 unsigned int dataoff, matchoff, matchlen;
107 union nf_inet_addr addr;
108 __be16 port;
109 int request, in_header;
110 110
111 /* Basic rules: requests and responses. */ 111 /* Basic rules: requests and responses. */
112 if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) { 112 if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) {
113 /* 10.2: Constructing the REGISTER Request: 113 if (ct_sip_parse_request(ct, *dptr, *datalen,
114 * 114 &matchoff, &matchlen,
115 * The "userinfo" and "@" components of the SIP URI MUST NOT 115 &addr, &port) > 0 &&
116 * be present. 116 !map_addr(skb, dptr, datalen, matchoff, matchlen,
117 */ 117 &addr, port))
118 if (datalen >= sizeof("REGISTER") - 1 && 118 return NF_DROP;
119 strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0) 119 request = 1;
120 pos = POS_REG_REQ_URI; 120 } else
121 else 121 request = 0;
122 pos = POS_REQ_URI; 122
123 123 /* Translate topmost Via header and parameters */
124 if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map)) 124 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
125 SIP_HDR_VIA, NULL, &matchoff, &matchlen,
126 &addr, &port) > 0) {
127 unsigned int matchend, poff, plen, buflen, n;
128 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
129
130 /* We're only interested in headers related to this
131 * connection */
132 if (request) {
133 if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip ||
134 port != ct->tuplehash[dir].tuple.src.u.udp.port)
135 goto next;
136 } else {
137 if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip ||
138 port != ct->tuplehash[dir].tuple.dst.u.udp.port)
139 goto next;
140 }
141
142 if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
143 &addr, port))
125 return NF_DROP; 144 return NF_DROP;
145
146 matchend = matchoff + matchlen;
147
148 /* The maddr= parameter (RFC 2361) specifies where to send
149 * the reply. */
150 if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
151 "maddr=", &poff, &plen,
152 &addr) > 0 &&
153 addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
154 addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
155 __be32 ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
156 buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
157 if (!mangle_packet(skb, dptr, datalen, poff, plen,
158 buffer, buflen))
159 return NF_DROP;
160 }
161
162 /* The received= parameter (RFC 2361) contains the address
163 * from which the server received the request. */
164 if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
165 "received=", &poff, &plen,
166 &addr) > 0 &&
167 addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
168 addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
169 __be32 ip = ct->tuplehash[!dir].tuple.src.u3.ip;
170 buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
171 if (!mangle_packet(skb, dptr, datalen, poff, plen,
172 buffer, buflen))
173 return NF_DROP;
174 }
175
176 /* The rport= parameter (RFC 3581) contains the port number
177 * from which the server received the request. */
178 if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen,
179 "rport=", &poff, &plen,
180 &n) > 0 &&
181 htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port &&
182 htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
183 __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
184 buflen = sprintf(buffer, "%u", ntohs(p));
185 if (!mangle_packet(skb, dptr, datalen, poff, plen,
186 buffer, buflen))
187 return NF_DROP;
188 }
126 } 189 }
127 190
128 if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || 191next:
129 !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) || 192 /* Translate Contact headers */
130 !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || 193 dataoff = 0;
131 !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) 194 in_header = 0;
195 while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
196 SIP_HDR_CONTACT, &in_header,
197 &matchoff, &matchlen,
198 &addr, &port) > 0) {
199 if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
200 &addr, port))
201 return NF_DROP;
202 }
203
204 if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) ||
205 !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO))
132 return NF_DROP; 206 return NF_DROP;
133 return NF_ACCEPT; 207 return NF_ACCEPT;
134} 208}
135 209
136static unsigned int mangle_sip_packet(struct sk_buff *skb, 210/* Handles expected signalling connections and media streams */
137 enum ip_conntrack_info ctinfo, 211static void ip_nat_sip_expected(struct nf_conn *ct,
138 struct nf_conn *ct, 212 struct nf_conntrack_expect *exp)
139 const char **dptr, size_t dlen,
140 char *buffer, int bufflen,
141 enum sip_header_pos pos)
142{ 213{
143 unsigned int matchlen, matchoff; 214 struct nf_nat_range range;
144 215
145 if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) 216 /* This must be a fresh one. */
146 return 0; 217 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
147 218
148 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, 219 /* For DST manip, map port here to where it's expected. */
149 matchoff, matchlen, buffer, bufflen)) 220 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
150 return 0; 221 range.min = range.max = exp->saved_proto;
222 range.min_ip = range.max_ip = exp->saved_ip;
223 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
151 224
152 /* We need to reload this. Thanks Patrick. */ 225 /* Change src to where master sends to, but only if the connection
153 *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); 226 * actually came from the same source. */
154 return 1; 227 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
228 ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
229 range.flags = IP_NAT_RANGE_MAP_IPS;
230 range.min_ip = range.max_ip
231 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
232 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
233 }
155} 234}
156 235
157static int mangle_content_len(struct sk_buff *skb, 236static unsigned int ip_nat_sip_expect(struct sk_buff *skb,
158 enum ip_conntrack_info ctinfo, 237 const char **dptr, unsigned int *datalen,
159 struct nf_conn *ct, 238 struct nf_conntrack_expect *exp,
160 const char *dptr) 239 unsigned int matchoff,
240 unsigned int matchlen)
161{ 241{
162 unsigned int dataoff, matchoff, matchlen; 242 enum ip_conntrack_info ctinfo;
163 char buffer[sizeof("65536")]; 243 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
164 int bufflen; 244 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
245 __be32 newip;
246 u_int16_t port;
247 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
248 unsigned buflen;
165 249
166 dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); 250 /* Connection will come from reply */
251 if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
252 newip = exp->tuple.dst.u3.ip;
253 else
254 newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
167 255
168 /* Get actual SDP length */ 256 /* If the signalling port matches the connection's source port in the
169 if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, 257 * original direction, try to use the destination port in the opposite
170 &matchlen, POS_SDP_HEADER) > 0) { 258 * direction. */
259 if (exp->tuple.dst.u.udp.port ==
260 ct->tuplehash[dir].tuple.src.u.udp.port)
261 port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
262 else
263 port = ntohs(exp->tuple.dst.u.udp.port);
264
265 exp->saved_ip = exp->tuple.dst.u3.ip;
266 exp->tuple.dst.u3.ip = newip;
267 exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
268 exp->dir = !dir;
269 exp->expectfn = ip_nat_sip_expected;
171 270
172 /* since ct_sip_get_info() give us a pointer passing 'v=' 271 for (; port != 0; port++) {
173 we need to add 2 bytes in this count. */ 272 exp->tuple.dst.u.udp.port = htons(port);
174 int c_len = skb->len - dataoff - matchoff + 2; 273 if (nf_ct_expect_related(exp) == 0)
274 break;
275 }
175 276
176 /* Now, update SDP length */ 277 if (port == 0)
177 if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, 278 return NF_DROP;
178 &matchlen, POS_CONTENT) > 0) {
179 279
180 bufflen = sprintf(buffer, "%u", c_len); 280 if (exp->tuple.dst.u3.ip != exp->saved_ip ||
181 return nf_nat_mangle_udp_packet(skb, ct, ctinfo, 281 exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
182 matchoff, matchlen, 282 buflen = sprintf(buffer, "%u.%u.%u.%u:%u",
183 buffer, bufflen); 283 NIPQUAD(newip), port);
184 } 284 if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
285 buffer, buflen))
286 goto err;
185 } 287 }
186 return 0; 288 return NF_ACCEPT;
289
290err:
291 nf_ct_unexpect_related(exp);
292 return NF_DROP;
187} 293}
188 294
189static unsigned int mangle_sdp(struct sk_buff *skb, 295static int mangle_content_len(struct sk_buff *skb,
190 enum ip_conntrack_info ctinfo, 296 const char **dptr, unsigned int *datalen)
191 struct nf_conn *ct,
192 __be32 newip, u_int16_t port,
193 const char *dptr)
194{ 297{
195 char buffer[sizeof("nnn.nnn.nnn.nnn")]; 298 enum ip_conntrack_info ctinfo;
196 unsigned int dataoff, bufflen; 299 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
300 unsigned int matchoff, matchlen;
301 char buffer[sizeof("65536")];
302 int buflen, c_len;
197 303
198 dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); 304 /* Get actual SDP length */
305 if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
306 SDP_HDR_VERSION, SDP_HDR_UNSPEC,
307 &matchoff, &matchlen) <= 0)
308 return 0;
309 c_len = *datalen - matchoff + strlen("v=");
199 310
200 /* Mangle owner and contact info. */ 311 /* Now, update SDP length */
201 bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); 312 if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH,
202 if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, 313 &matchoff, &matchlen) <= 0)
203 buffer, bufflen, POS_OWNER_IP4))
204 return 0; 314 return 0;
205 315
206 if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, 316 buflen = sprintf(buffer, "%u", c_len);
207 buffer, bufflen, POS_CONNECTION_IP4)) 317 return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
318 buffer, buflen);
319}
320
321static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
322 unsigned int dataoff, unsigned int *datalen,
323 enum sdp_header_types type,
324 enum sdp_header_types term,
325 char *buffer, int buflen)
326{
327 enum ip_conntrack_info ctinfo;
328 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
329 unsigned int matchlen, matchoff;
330
331 if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term,
332 &matchoff, &matchlen) <= 0)
208 return 0; 333 return 0;
334 return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
335 buffer, buflen);
336}
209 337
210 /* Mangle media port. */ 338static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
211 bufflen = sprintf(buffer, "%u", port); 339 unsigned int dataoff,
212 if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, 340 unsigned int *datalen,
213 buffer, bufflen, POS_MEDIA)) 341 enum sdp_header_types type,
342 enum sdp_header_types term,
343 const union nf_inet_addr *addr)
344{
345 char buffer[sizeof("nnn.nnn.nnn.nnn")];
346 unsigned int buflen;
347
348 buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip));
349 if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term,
350 buffer, buflen))
214 return 0; 351 return 0;
215 352
216 return mangle_content_len(skb, ctinfo, ct, dptr); 353 return mangle_content_len(skb, dptr, datalen);
217} 354}
218 355
219static void ip_nat_sdp_expect(struct nf_conn *ct, 356static unsigned int ip_nat_sdp_port(struct sk_buff *skb,
220 struct nf_conntrack_expect *exp) 357 const char **dptr,
358 unsigned int *datalen,
359 unsigned int matchoff,
360 unsigned int matchlen,
361 u_int16_t port)
221{ 362{
222 struct nf_nat_range range; 363 char buffer[sizeof("nnnnn")];
364 unsigned int buflen;
223 365
224 /* This must be a fresh one. */ 366 buflen = sprintf(buffer, "%u", port);
225 BUG_ON(ct->status & IPS_NAT_DONE_MASK); 367 if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
368 buffer, buflen))
369 return 0;
226 370
227 /* Change src to where master sends to */ 371 return mangle_content_len(skb, dptr, datalen);
228 range.flags = IP_NAT_RANGE_MAP_IPS; 372}
229 range.min_ip = range.max_ip
230 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
231 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
232 373
233 /* For DST manip, map port here to where it's expected. */ 374static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
234 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 375 unsigned int dataoff,
235 range.min = range.max = exp->saved_proto; 376 unsigned int *datalen,
236 range.min_ip = range.max_ip = exp->saved_ip; 377 const union nf_inet_addr *addr)
237 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); 378{
379 char buffer[sizeof("nnn.nnn.nnn.nnn")];
380 unsigned int buflen;
381
382 /* Mangle session description owner and contact addresses */
383 buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip));
384 if (!mangle_sdp_packet(skb, dptr, dataoff, datalen,
385 SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
386 buffer, buflen))
387 return 0;
388
389 if (!mangle_sdp_packet(skb, dptr, dataoff, datalen,
390 SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
391 buffer, buflen))
392 return 0;
393
394 return mangle_content_len(skb, dptr, datalen);
238} 395}
239 396
240/* So, this packet has hit the connection tracking matching code. 397/* So, this packet has hit the connection tracking matching code.
241 Mangle it, and change the expectation to match the new version. */ 398 Mangle it, and change the expectation to match the new version. */
242static unsigned int ip_nat_sdp(struct sk_buff *skb, 399static unsigned int ip_nat_sdp_media(struct sk_buff *skb,
243 enum ip_conntrack_info ctinfo, 400 const char **dptr,
244 struct nf_conntrack_expect *exp, 401 unsigned int *datalen,
245 const char *dptr) 402 struct nf_conntrack_expect *rtp_exp,
403 struct nf_conntrack_expect *rtcp_exp,
404 unsigned int mediaoff,
405 unsigned int medialen,
406 union nf_inet_addr *rtp_addr)
246{ 407{
247 struct nf_conn *ct = exp->master; 408 enum ip_conntrack_info ctinfo;
409 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
248 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 410 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
249 __be32 newip;
250 u_int16_t port; 411 u_int16_t port;
251 412
252 /* Connection will come from reply */ 413 /* Connection will come from reply */
253 if (ct->tuplehash[dir].tuple.src.u3.ip == 414 if (ct->tuplehash[dir].tuple.src.u3.ip ==
254 ct->tuplehash[!dir].tuple.dst.u3.ip) 415 ct->tuplehash[!dir].tuple.dst.u3.ip)
255 newip = exp->tuple.dst.u3.ip; 416 rtp_addr->ip = rtp_exp->tuple.dst.u3.ip;
256 else 417 else
257 newip = ct->tuplehash[!dir].tuple.dst.u3.ip; 418 rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
258 419
259 exp->saved_ip = exp->tuple.dst.u3.ip; 420 rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip;
260 exp->tuple.dst.u3.ip = newip; 421 rtp_exp->tuple.dst.u3.ip = rtp_addr->ip;
261 exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; 422 rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
262 exp->dir = !dir; 423 rtp_exp->dir = !dir;
263 424 rtp_exp->expectfn = ip_nat_sip_expected;
264 /* When you see the packet, we need to NAT it the same as the 425
265 this one. */ 426 rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip;
266 exp->expectfn = ip_nat_sdp_expect; 427 rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip;
267 428 rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
268 /* Try to get same port: if not, try to change it. */ 429 rtcp_exp->dir = !dir;
269 for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { 430 rtcp_exp->expectfn = ip_nat_sip_expected;
270 exp->tuple.dst.u.udp.port = htons(port); 431
271 if (nf_ct_expect_related(exp) == 0) 432 /* Try to get same pair of ports: if not, try to change them. */
433 for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
434 port != 0; port += 2) {
435 rtp_exp->tuple.dst.u.udp.port = htons(port);
436 if (nf_ct_expect_related(rtp_exp) != 0)
437 continue;
438 rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
439 if (nf_ct_expect_related(rtcp_exp) == 0)
272 break; 440 break;
441 nf_ct_unexpect_related(rtp_exp);
273 } 442 }
274 443
275 if (port == 0) 444 if (port == 0)
276 return NF_DROP; 445 goto err1;
446
447 /* Update media port. */
448 if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
449 !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port))
450 goto err2;
277 451
278 if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) {
279 nf_ct_unexpect_related(exp);
280 return NF_DROP;
281 }
282 return NF_ACCEPT; 452 return NF_ACCEPT;
453
454err2:
455 nf_ct_unexpect_related(rtp_exp);
456 nf_ct_unexpect_related(rtcp_exp);
457err1:
458 return NF_DROP;
283} 459}
284 460
285static void __exit nf_nat_sip_fini(void) 461static void __exit nf_nat_sip_fini(void)
286{ 462{
287 rcu_assign_pointer(nf_nat_sip_hook, NULL); 463 rcu_assign_pointer(nf_nat_sip_hook, NULL);
288 rcu_assign_pointer(nf_nat_sdp_hook, NULL); 464 rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
465 rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
466 rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
467 rcu_assign_pointer(nf_nat_sdp_session_hook, NULL);
468 rcu_assign_pointer(nf_nat_sdp_media_hook, NULL);
289 synchronize_rcu(); 469 synchronize_rcu();
290} 470}
291 471
292static int __init nf_nat_sip_init(void) 472static int __init nf_nat_sip_init(void)
293{ 473{
294 BUG_ON(nf_nat_sip_hook != NULL); 474 BUG_ON(nf_nat_sip_hook != NULL);
295 BUG_ON(nf_nat_sdp_hook != NULL); 475 BUG_ON(nf_nat_sip_expect_hook != NULL);
476 BUG_ON(nf_nat_sdp_addr_hook != NULL);
477 BUG_ON(nf_nat_sdp_port_hook != NULL);
478 BUG_ON(nf_nat_sdp_session_hook != NULL);
479 BUG_ON(nf_nat_sdp_media_hook != NULL);
296 rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); 480 rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
297 rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp); 481 rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
482 rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
483 rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
484 rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session);
485 rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media);
298 return 0; 486 return 0;
299} 487}
300 488
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 540ce6ae887c..000e080bac5c 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -50,6 +50,7 @@
50#include <net/udp.h> 50#include <net/udp.h>
51 51
52#include <net/netfilter/nf_nat.h> 52#include <net/netfilter/nf_nat.h>
53#include <net/netfilter/nf_conntrack_expect.h>
53#include <net/netfilter/nf_conntrack_helper.h> 54#include <net/netfilter/nf_conntrack_helper.h>
54#include <net/netfilter/nf_nat_helper.h> 55#include <net/netfilter/nf_nat_helper.h>
55 56
@@ -1267,11 +1268,15 @@ static int help(struct sk_buff *skb, unsigned int protoff,
1267 return ret; 1268 return ret;
1268} 1269}
1269 1270
1271static const struct nf_conntrack_expect_policy snmp_exp_policy = {
1272 .max_expected = 0,
1273 .timeout = 180,
1274};
1275
1270static struct nf_conntrack_helper snmp_helper __read_mostly = { 1276static struct nf_conntrack_helper snmp_helper __read_mostly = {
1271 .max_expected = 0,
1272 .timeout = 180,
1273 .me = THIS_MODULE, 1277 .me = THIS_MODULE,
1274 .help = help, 1278 .help = help,
1279 .expect_policy = &snmp_exp_policy,
1275 .name = "snmp", 1280 .name = "snmp",
1276 .tuple.src.l3num = AF_INET, 1281 .tuple.src.l3num = AF_INET,
1277 .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), 1282 .tuple.src.u.udp.port = __constant_htons(SNMP_PORT),
@@ -1279,10 +1284,9 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = {
1279}; 1284};
1280 1285
1281static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { 1286static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
1282 .max_expected = 0,
1283 .timeout = 180,
1284 .me = THIS_MODULE, 1287 .me = THIS_MODULE,
1285 .help = help, 1288 .help = help,
1289 .expect_policy = &snmp_exp_policy,
1286 .name = "snmp_trap", 1290 .name = "snmp_trap",
1287 .tuple.src.l3num = AF_INET, 1291 .tuple.src.l3num = AF_INET,
1288 .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), 1292 .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT),
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d63474c6b400..552169b41b16 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -51,24 +51,54 @@
51 */ 51 */
52static int sockstat_seq_show(struct seq_file *seq, void *v) 52static int sockstat_seq_show(struct seq_file *seq, void *v)
53{ 53{
54 struct net *net = seq->private;
55
54 socket_seq_show(seq); 56 socket_seq_show(seq);
55 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", 57 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
56 sock_prot_inuse_get(&tcp_prot), 58 sock_prot_inuse_get(net, &tcp_prot),
57 atomic_read(&tcp_orphan_count), 59 atomic_read(&tcp_orphan_count),
58 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), 60 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
59 atomic_read(&tcp_memory_allocated)); 61 atomic_read(&tcp_memory_allocated));
60 seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), 62 seq_printf(seq, "UDP: inuse %d mem %d\n",
63 sock_prot_inuse_get(net, &udp_prot),
61 atomic_read(&udp_memory_allocated)); 64 atomic_read(&udp_memory_allocated));
62 seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); 65 seq_printf(seq, "UDPLITE: inuse %d\n",
63 seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); 66 sock_prot_inuse_get(net, &udplite_prot));
67 seq_printf(seq, "RAW: inuse %d\n",
68 sock_prot_inuse_get(net, &raw_prot));
64 seq_printf(seq, "FRAG: inuse %d memory %d\n", 69 seq_printf(seq, "FRAG: inuse %d memory %d\n",
65 ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); 70 ip_frag_nqueues(net), ip_frag_mem(net));
66 return 0; 71 return 0;
67} 72}
68 73
69static int sockstat_seq_open(struct inode *inode, struct file *file) 74static int sockstat_seq_open(struct inode *inode, struct file *file)
70{ 75{
71 return single_open(file, sockstat_seq_show, NULL); 76 int err;
77 struct net *net;
78
79 err = -ENXIO;
80 net = get_proc_net(inode);
81 if (net == NULL)
82 goto err_net;
83
84 err = single_open(file, sockstat_seq_show, net);
85 if (err < 0)
86 goto err_open;
87
88 return 0;
89
90err_open:
91 put_net(net);
92err_net:
93 return err;
94}
95
96static int sockstat_seq_release(struct inode *inode, struct file *file)
97{
98 struct net *net = ((struct seq_file *)file->private_data)->private;
99
100 put_net(net);
101 return single_release(inode, file);
72} 102}
73 103
74static const struct file_operations sockstat_seq_fops = { 104static const struct file_operations sockstat_seq_fops = {
@@ -76,7 +106,7 @@ static const struct file_operations sockstat_seq_fops = {
76 .open = sockstat_seq_open, 106 .open = sockstat_seq_open,
77 .read = seq_read, 107 .read = seq_read,
78 .llseek = seq_lseek, 108 .llseek = seq_lseek,
79 .release = single_release, 109 .release = sockstat_seq_release,
80}; 110};
81 111
82/* snmp items */ 112/* snmp items */
@@ -423,25 +453,42 @@ static const struct file_operations netstat_seq_fops = {
423 .release = single_release, 453 .release = single_release,
424}; 454};
425 455
456static __net_init int ip_proc_init_net(struct net *net)
457{
458 if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops))
459 return -ENOMEM;
460 return 0;
461}
462
463static __net_exit void ip_proc_exit_net(struct net *net)
464{
465 proc_net_remove(net, "sockstat");
466}
467
468static __net_initdata struct pernet_operations ip_proc_ops = {
469 .init = ip_proc_init_net,
470 .exit = ip_proc_exit_net,
471};
472
426int __init ip_misc_proc_init(void) 473int __init ip_misc_proc_init(void)
427{ 474{
428 int rc = 0; 475 int rc = 0;
429 476
477 if (register_pernet_subsys(&ip_proc_ops))
478 goto out_pernet;
479
430 if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) 480 if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops))
431 goto out_netstat; 481 goto out_netstat;
432 482
433 if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) 483 if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
434 goto out_snmp; 484 goto out_snmp;
435
436 if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops))
437 goto out_sockstat;
438out: 485out:
439 return rc; 486 return rc;
440out_sockstat:
441 proc_net_remove(&init_net, "snmp");
442out_snmp: 487out_snmp:
443 proc_net_remove(&init_net, "netstat"); 488 proc_net_remove(&init_net, "netstat");
444out_netstat: 489out_netstat:
490 unregister_pernet_subsys(&ip_proc_ops);
491out_pernet:
445 rc = -ENOMEM; 492 rc = -ENOMEM;
446 goto out; 493 goto out;
447} 494}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a3002fe65b7f..11d7f753a820 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -81,41 +81,34 @@
81#include <linux/netfilter_ipv4.h> 81#include <linux/netfilter_ipv4.h>
82 82
83static struct raw_hashinfo raw_v4_hashinfo = { 83static struct raw_hashinfo raw_v4_hashinfo = {
84 .lock = __RW_LOCK_UNLOCKED(), 84 .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
85}; 85};
86 86
87void raw_hash_sk(struct sock *sk, struct raw_hashinfo *h) 87void raw_hash_sk(struct sock *sk)
88{ 88{
89 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
89 struct hlist_head *head; 90 struct hlist_head *head;
90 91
91 head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)]; 92 head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)];
92 93
93 write_lock_bh(&h->lock); 94 write_lock_bh(&h->lock);
94 sk_add_node(sk, head); 95 sk_add_node(sk, head);
95 sock_prot_inuse_add(sk->sk_prot, 1); 96 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
96 write_unlock_bh(&h->lock); 97 write_unlock_bh(&h->lock);
97} 98}
98EXPORT_SYMBOL_GPL(raw_hash_sk); 99EXPORT_SYMBOL_GPL(raw_hash_sk);
99 100
100void raw_unhash_sk(struct sock *sk, struct raw_hashinfo *h) 101void raw_unhash_sk(struct sock *sk)
101{ 102{
103 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
104
102 write_lock_bh(&h->lock); 105 write_lock_bh(&h->lock);
103 if (sk_del_node_init(sk)) 106 if (sk_del_node_init(sk))
104 sock_prot_inuse_add(sk->sk_prot, -1); 107 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
105 write_unlock_bh(&h->lock); 108 write_unlock_bh(&h->lock);
106} 109}
107EXPORT_SYMBOL_GPL(raw_unhash_sk); 110EXPORT_SYMBOL_GPL(raw_unhash_sk);
108 111
109static void raw_v4_hash(struct sock *sk)
110{
111 raw_hash_sk(sk, &raw_v4_hashinfo);
112}
113
114static void raw_v4_unhash(struct sock *sk)
115{
116 raw_unhash_sk(sk, &raw_v4_hashinfo);
117}
118
119static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, 112static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
120 unsigned short num, __be32 raddr, __be32 laddr, int dif) 113 unsigned short num, __be32 raddr, __be32 laddr, int dif)
121{ 114{
@@ -124,7 +117,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
124 sk_for_each_from(sk, node) { 117 sk_for_each_from(sk, node) {
125 struct inet_sock *inet = inet_sk(sk); 118 struct inet_sock *inet = inet_sk(sk);
126 119
127 if (sk->sk_net == net && inet->num == num && 120 if (net_eq(sock_net(sk), net) && inet->num == num &&
128 !(inet->daddr && inet->daddr != raddr) && 121 !(inet->daddr && inet->daddr != raddr) &&
129 !(inet->rcv_saddr && inet->rcv_saddr != laddr) && 122 !(inet->rcv_saddr && inet->rcv_saddr != laddr) &&
130 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 123 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
@@ -175,7 +168,7 @@ static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
175 if (hlist_empty(head)) 168 if (hlist_empty(head))
176 goto out; 169 goto out;
177 170
178 net = skb->dev->nd_net; 171 net = dev_net(skb->dev);
179 sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, 172 sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
180 iph->saddr, iph->daddr, 173 iph->saddr, iph->daddr,
181 skb->dev->ifindex); 174 skb->dev->ifindex);
@@ -283,7 +276,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
283 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); 276 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
284 if (raw_sk != NULL) { 277 if (raw_sk != NULL) {
285 iph = (struct iphdr *)skb->data; 278 iph = (struct iphdr *)skb->data;
286 net = skb->dev->nd_net; 279 net = dev_net(skb->dev);
287 280
288 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, 281 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
289 iph->daddr, iph->saddr, 282 iph->daddr, iph->saddr,
@@ -506,7 +499,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
506 ipc.oif = sk->sk_bound_dev_if; 499 ipc.oif = sk->sk_bound_dev_if;
507 500
508 if (msg->msg_controllen) { 501 if (msg->msg_controllen) {
509 err = ip_cmsg_send(msg, &ipc); 502 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
510 if (err) 503 if (err)
511 goto out; 504 goto out;
512 if (ipc.opt) 505 if (ipc.opt)
@@ -560,7 +553,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
560 } 553 }
561 554
562 security_sk_classify_flow(sk, &fl); 555 security_sk_classify_flow(sk, &fl);
563 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); 556 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
564 } 557 }
565 if (err) 558 if (err)
566 goto done; 559 goto done;
@@ -627,7 +620,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
627 620
628 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) 621 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
629 goto out; 622 goto out;
630 chk_addr_ret = inet_addr_type(sk->sk_net, addr->sin_addr.s_addr); 623 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
631 ret = -EADDRNOTAVAIL; 624 ret = -EADDRNOTAVAIL;
632 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && 625 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
633 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) 626 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
@@ -825,8 +818,6 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
825 } 818 }
826} 819}
827 820
828DEFINE_PROTO_INUSE(raw)
829
830struct proto raw_prot = { 821struct proto raw_prot = {
831 .name = "RAW", 822 .name = "RAW",
832 .owner = THIS_MODULE, 823 .owner = THIS_MODULE,
@@ -841,14 +832,14 @@ struct proto raw_prot = {
841 .recvmsg = raw_recvmsg, 832 .recvmsg = raw_recvmsg,
842 .bind = raw_bind, 833 .bind = raw_bind,
843 .backlog_rcv = raw_rcv_skb, 834 .backlog_rcv = raw_rcv_skb,
844 .hash = raw_v4_hash, 835 .hash = raw_hash_sk,
845 .unhash = raw_v4_unhash, 836 .unhash = raw_unhash_sk,
846 .obj_size = sizeof(struct raw_sock), 837 .obj_size = sizeof(struct raw_sock),
838 .h.raw_hash = &raw_v4_hashinfo,
847#ifdef CONFIG_COMPAT 839#ifdef CONFIG_COMPAT
848 .compat_setsockopt = compat_raw_setsockopt, 840 .compat_setsockopt = compat_raw_setsockopt,
849 .compat_getsockopt = compat_raw_getsockopt, 841 .compat_getsockopt = compat_raw_getsockopt,
850#endif 842#endif
851 REF_PROTO_INUSE(raw)
852}; 843};
853 844
854#ifdef CONFIG_PROC_FS 845#ifdef CONFIG_PROC_FS
@@ -862,7 +853,7 @@ static struct sock *raw_get_first(struct seq_file *seq)
862 struct hlist_node *node; 853 struct hlist_node *node;
863 854
864 sk_for_each(sk, node, &state->h->ht[state->bucket]) 855 sk_for_each(sk, node, &state->h->ht[state->bucket])
865 if (sk->sk_net == state->p.net) 856 if (sock_net(sk) == seq_file_net(seq))
866 goto found; 857 goto found;
867 } 858 }
868 sk = NULL; 859 sk = NULL;
@@ -878,7 +869,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
878 sk = sk_next(sk); 869 sk = sk_next(sk);
879try_again: 870try_again:
880 ; 871 ;
881 } while (sk && sk->sk_net != state->p.net); 872 } while (sk && sock_net(sk) != seq_file_net(seq));
882 873
883 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { 874 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
884 sk = sk_head(&state->h->ht[state->bucket]); 875 sk = sk_head(&state->h->ht[state->bucket]);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7b5e8e1d94be..139799f8a8a1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -118,21 +118,19 @@
118#define RT_GC_TIMEOUT (300*HZ) 118#define RT_GC_TIMEOUT (300*HZ)
119 119
120static int ip_rt_max_size; 120static int ip_rt_max_size;
121static int ip_rt_gc_timeout = RT_GC_TIMEOUT; 121static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
122static int ip_rt_gc_interval = 60 * HZ; 122static int ip_rt_gc_interval __read_mostly = 60 * HZ;
123static int ip_rt_gc_min_interval = HZ / 2; 123static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
124static int ip_rt_redirect_number = 9; 124static int ip_rt_redirect_number __read_mostly = 9;
125static int ip_rt_redirect_load = HZ / 50; 125static int ip_rt_redirect_load __read_mostly = HZ / 50;
126static int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1)); 126static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
127static int ip_rt_error_cost = HZ; 127static int ip_rt_error_cost __read_mostly = HZ;
128static int ip_rt_error_burst = 5 * HZ; 128static int ip_rt_error_burst __read_mostly = 5 * HZ;
129static int ip_rt_gc_elasticity = 8; 129static int ip_rt_gc_elasticity __read_mostly = 8;
130static int ip_rt_mtu_expires = 10 * 60 * HZ; 130static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
131static int ip_rt_min_pmtu = 512 + 20 + 20; 131static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
132static int ip_rt_min_advmss = 256; 132static int ip_rt_min_advmss __read_mostly = 256;
133static int ip_rt_secret_interval = 10 * 60 * HZ; 133static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
134
135#define RTprint(a...) printk(KERN_DEBUG a)
136 134
137static void rt_worker_func(struct work_struct *work); 135static void rt_worker_func(struct work_struct *work);
138static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); 136static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
@@ -252,40 +250,41 @@ static inline void rt_hash_lock_init(void)
252} 250}
253#endif 251#endif
254 252
255static struct rt_hash_bucket *rt_hash_table; 253static struct rt_hash_bucket *rt_hash_table __read_mostly;
256static unsigned rt_hash_mask; 254static unsigned rt_hash_mask __read_mostly;
257static unsigned int rt_hash_log; 255static unsigned int rt_hash_log __read_mostly;
258static atomic_t rt_genid; 256static atomic_t rt_genid __read_mostly;
259 257
260static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 258static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
261#define RT_CACHE_STAT_INC(field) \ 259#define RT_CACHE_STAT_INC(field) \
262 (__raw_get_cpu_var(rt_cache_stat).field++) 260 (__raw_get_cpu_var(rt_cache_stat).field++)
263 261
264static unsigned int rt_hash_code(u32 daddr, u32 saddr) 262static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx)
265{ 263{
266 return jhash_2words(daddr, saddr, atomic_read(&rt_genid)) 264 return jhash_3words((__force u32)(__be32)(daddr),
265 (__force u32)(__be32)(saddr),
266 idx, atomic_read(&rt_genid))
267 & rt_hash_mask; 267 & rt_hash_mask;
268} 268}
269 269
270#define rt_hash(daddr, saddr, idx) \
271 rt_hash_code((__force u32)(__be32)(daddr),\
272 (__force u32)(__be32)(saddr) ^ ((idx) << 5))
273
274#ifdef CONFIG_PROC_FS 270#ifdef CONFIG_PROC_FS
275struct rt_cache_iter_state { 271struct rt_cache_iter_state {
272 struct seq_net_private p;
276 int bucket; 273 int bucket;
277 int genid; 274 int genid;
278}; 275};
279 276
280static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) 277static struct rtable *rt_cache_get_first(struct seq_file *seq)
281{ 278{
279 struct rt_cache_iter_state *st = seq->private;
282 struct rtable *r = NULL; 280 struct rtable *r = NULL;
283 281
284 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
285 rcu_read_lock_bh(); 283 rcu_read_lock_bh();
286 r = rcu_dereference(rt_hash_table[st->bucket].chain); 284 r = rcu_dereference(rt_hash_table[st->bucket].chain);
287 while (r) { 285 while (r) {
288 if (r->rt_genid == st->genid) 286 if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
287 r->rt_genid == st->genid)
289 return r; 288 return r;
290 r = rcu_dereference(r->u.dst.rt_next); 289 r = rcu_dereference(r->u.dst.rt_next);
291 } 290 }
@@ -294,8 +293,10 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st)
294 return r; 293 return r;
295} 294}
296 295
297static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) 296static struct rtable *__rt_cache_get_next(struct seq_file *seq,
297 struct rtable *r)
298{ 298{
299 struct rt_cache_iter_state *st = seq->private;
299 r = r->u.dst.rt_next; 300 r = r->u.dst.rt_next;
300 while (!r) { 301 while (!r) {
301 rcu_read_unlock_bh(); 302 rcu_read_unlock_bh();
@@ -307,25 +308,34 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct r
307 return rcu_dereference(r); 308 return rcu_dereference(r);
308} 309}
309 310
310static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) 311static struct rtable *rt_cache_get_next(struct seq_file *seq,
312 struct rtable *r)
313{
314 struct rt_cache_iter_state *st = seq->private;
315 while ((r = __rt_cache_get_next(seq, r)) != NULL) {
316 if (dev_net(r->u.dst.dev) != seq_file_net(seq))
317 continue;
318 if (r->rt_genid == st->genid)
319 break;
320 }
321 return r;
322}
323
324static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)
311{ 325{
312 struct rtable *r = rt_cache_get_first(st); 326 struct rtable *r = rt_cache_get_first(seq);
313 327
314 if (r) 328 if (r)
315 while (pos && (r = rt_cache_get_next(st, r))) { 329 while (pos && (r = rt_cache_get_next(seq, r)))
316 if (r->rt_genid != st->genid)
317 continue;
318 --pos; 330 --pos;
319 }
320 return pos ? NULL : r; 331 return pos ? NULL : r;
321} 332}
322 333
323static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 334static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
324{ 335{
325 struct rt_cache_iter_state *st = seq->private; 336 struct rt_cache_iter_state *st = seq->private;
326
327 if (*pos) 337 if (*pos)
328 return rt_cache_get_idx(st, *pos - 1); 338 return rt_cache_get_idx(seq, *pos - 1);
329 st->genid = atomic_read(&rt_genid); 339 st->genid = atomic_read(&rt_genid);
330 return SEQ_START_TOKEN; 340 return SEQ_START_TOKEN;
331} 341}
@@ -333,12 +343,11 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
333static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 343static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
334{ 344{
335 struct rtable *r; 345 struct rtable *r;
336 struct rt_cache_iter_state *st = seq->private;
337 346
338 if (v == SEQ_START_TOKEN) 347 if (v == SEQ_START_TOKEN)
339 r = rt_cache_get_first(st); 348 r = rt_cache_get_first(seq);
340 else 349 else
341 r = rt_cache_get_next(st, v); 350 r = rt_cache_get_next(seq, v);
342 ++*pos; 351 ++*pos;
343 return r; 352 return r;
344} 353}
@@ -390,7 +399,7 @@ static const struct seq_operations rt_cache_seq_ops = {
390 399
391static int rt_cache_seq_open(struct inode *inode, struct file *file) 400static int rt_cache_seq_open(struct inode *inode, struct file *file)
392{ 401{
393 return seq_open_private(file, &rt_cache_seq_ops, 402 return seq_open_net(inode, file, &rt_cache_seq_ops,
394 sizeof(struct rt_cache_iter_state)); 403 sizeof(struct rt_cache_iter_state));
395} 404}
396 405
@@ -399,7 +408,7 @@ static const struct file_operations rt_cache_seq_fops = {
399 .open = rt_cache_seq_open, 408 .open = rt_cache_seq_open,
400 .read = seq_read, 409 .read = seq_read,
401 .llseek = seq_lseek, 410 .llseek = seq_lseek,
402 .release = seq_release_private, 411 .release = seq_release_net,
403}; 412};
404 413
405 414
@@ -533,7 +542,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
533} 542}
534#endif 543#endif
535 544
536static __init int ip_rt_proc_init(struct net *net) 545static int __net_init ip_rt_do_proc_init(struct net *net)
537{ 546{
538 struct proc_dir_entry *pde; 547 struct proc_dir_entry *pde;
539 548
@@ -564,25 +573,43 @@ err2:
564err1: 573err1:
565 return -ENOMEM; 574 return -ENOMEM;
566} 575}
576
577static void __net_exit ip_rt_do_proc_exit(struct net *net)
578{
579 remove_proc_entry("rt_cache", net->proc_net_stat);
580 remove_proc_entry("rt_cache", net->proc_net);
581 remove_proc_entry("rt_acct", net->proc_net);
582}
583
584static struct pernet_operations ip_rt_proc_ops __net_initdata = {
585 .init = ip_rt_do_proc_init,
586 .exit = ip_rt_do_proc_exit,
587};
588
589static int __init ip_rt_proc_init(void)
590{
591 return register_pernet_subsys(&ip_rt_proc_ops);
592}
593
567#else 594#else
568static inline int ip_rt_proc_init(struct net *net) 595static inline int ip_rt_proc_init(void)
569{ 596{
570 return 0; 597 return 0;
571} 598}
572#endif /* CONFIG_PROC_FS */ 599#endif /* CONFIG_PROC_FS */
573 600
574static __inline__ void rt_free(struct rtable *rt) 601static inline void rt_free(struct rtable *rt)
575{ 602{
576 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 603 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
577} 604}
578 605
579static __inline__ void rt_drop(struct rtable *rt) 606static inline void rt_drop(struct rtable *rt)
580{ 607{
581 ip_rt_put(rt); 608 ip_rt_put(rt);
582 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 609 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
583} 610}
584 611
585static __inline__ int rt_fast_clean(struct rtable *rth) 612static inline int rt_fast_clean(struct rtable *rth)
586{ 613{
587 /* Kill broadcast/multicast entries very aggresively, if they 614 /* Kill broadcast/multicast entries very aggresively, if they
588 collide in hash table with more useful entries */ 615 collide in hash table with more useful entries */
@@ -590,7 +617,7 @@ static __inline__ int rt_fast_clean(struct rtable *rth)
590 rth->fl.iif && rth->u.dst.rt_next; 617 rth->fl.iif && rth->u.dst.rt_next;
591} 618}
592 619
593static __inline__ int rt_valuable(struct rtable *rth) 620static inline int rt_valuable(struct rtable *rth)
594{ 621{
595 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 622 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
596 rth->u.dst.expires; 623 rth->u.dst.expires;
@@ -652,7 +679,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
652 679
653static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 680static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
654{ 681{
655 return rt1->u.dst.dev->nd_net == rt2->u.dst.dev->nd_net; 682 return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
656} 683}
657 684
658/* 685/*
@@ -1131,10 +1158,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1131 __be32 skeys[2] = { saddr, 0 }; 1158 __be32 skeys[2] = { saddr, 0 };
1132 int ikeys[2] = { dev->ifindex, 0 }; 1159 int ikeys[2] = { dev->ifindex, 0 };
1133 struct netevent_redirect netevent; 1160 struct netevent_redirect netevent;
1161 struct net *net;
1134 1162
1135 if (!in_dev) 1163 if (!in_dev)
1136 return; 1164 return;
1137 1165
1166 net = dev_net(dev);
1138 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) 1167 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
1139 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) 1168 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
1140 || ipv4_is_zeronet(new_gw)) 1169 || ipv4_is_zeronet(new_gw))
@@ -1146,7 +1175,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1146 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 1175 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
1147 goto reject_redirect; 1176 goto reject_redirect;
1148 } else { 1177 } else {
1149 if (inet_addr_type(&init_net, new_gw) != RTN_UNICAST) 1178 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
1150 goto reject_redirect; 1179 goto reject_redirect;
1151 } 1180 }
1152 1181
@@ -1164,7 +1193,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1164 rth->fl.fl4_src != skeys[i] || 1193 rth->fl.fl4_src != skeys[i] ||
1165 rth->fl.oif != ikeys[k] || 1194 rth->fl.oif != ikeys[k] ||
1166 rth->fl.iif != 0 || 1195 rth->fl.iif != 0 ||
1167 rth->rt_genid != atomic_read(&rt_genid)) { 1196 rth->rt_genid != atomic_read(&rt_genid) ||
1197 !net_eq(dev_net(rth->u.dst.dev), net)) {
1168 rthp = &rth->u.dst.rt_next; 1198 rthp = &rth->u.dst.rt_next;
1169 continue; 1199 continue;
1170 } 1200 }
@@ -1256,7 +1286,7 @@ reject_redirect:
1256 1286
1257static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1287static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1258{ 1288{
1259 struct rtable *rt = (struct rtable*)dst; 1289 struct rtable *rt = (struct rtable *)dst;
1260 struct dst_entry *ret = dst; 1290 struct dst_entry *ret = dst;
1261 1291
1262 if (rt) { 1292 if (rt) {
@@ -1297,7 +1327,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1297 1327
1298void ip_rt_send_redirect(struct sk_buff *skb) 1328void ip_rt_send_redirect(struct sk_buff *skb)
1299{ 1329{
1300 struct rtable *rt = (struct rtable*)skb->dst; 1330 struct rtable *rt = skb->rtable;
1301 struct in_device *in_dev = in_dev_get(rt->u.dst.dev); 1331 struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
1302 1332
1303 if (!in_dev) 1333 if (!in_dev)
@@ -1346,7 +1376,7 @@ out:
1346 1376
1347static int ip_error(struct sk_buff *skb) 1377static int ip_error(struct sk_buff *skb)
1348{ 1378{
1349 struct rtable *rt = (struct rtable*)skb->dst; 1379 struct rtable *rt = skb->rtable;
1350 unsigned long now; 1380 unsigned long now;
1351 int code; 1381 int code;
1352 1382
@@ -1388,7 +1418,7 @@ out: kfree_skb(skb);
1388static const unsigned short mtu_plateau[] = 1418static const unsigned short mtu_plateau[] =
1389{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; 1419{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
1390 1420
1391static __inline__ unsigned short guess_mtu(unsigned short old_mtu) 1421static inline unsigned short guess_mtu(unsigned short old_mtu)
1392{ 1422{
1393 int i; 1423 int i;
1394 1424
@@ -1423,7 +1453,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1423 rth->rt_src == iph->saddr && 1453 rth->rt_src == iph->saddr &&
1424 rth->fl.iif == 0 && 1454 rth->fl.iif == 0 &&
1425 !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && 1455 !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
1426 rth->u.dst.dev->nd_net == net && 1456 net_eq(dev_net(rth->u.dst.dev), net) &&
1427 rth->rt_genid == atomic_read(&rt_genid)) { 1457 rth->rt_genid == atomic_read(&rt_genid)) {
1428 unsigned short mtu = new_mtu; 1458 unsigned short mtu = new_mtu;
1429 1459
@@ -1499,9 +1529,9 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
1499{ 1529{
1500 struct rtable *rt = (struct rtable *) dst; 1530 struct rtable *rt = (struct rtable *) dst;
1501 struct in_device *idev = rt->idev; 1531 struct in_device *idev = rt->idev;
1502 if (dev != dev->nd_net->loopback_dev && idev && idev->dev == dev) { 1532 if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) {
1503 struct in_device *loopback_idev = 1533 struct in_device *loopback_idev =
1504 in_dev_get(dev->nd_net->loopback_dev); 1534 in_dev_get(dev_net(dev)->loopback_dev);
1505 if (loopback_idev) { 1535 if (loopback_idev) {
1506 rt->idev = loopback_idev; 1536 rt->idev = loopback_idev;
1507 in_dev_put(idev); 1537 in_dev_put(idev);
@@ -1515,7 +1545,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1515 1545
1516 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1546 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1517 1547
1518 rt = (struct rtable *) skb->dst; 1548 rt = skb->rtable;
1519 if (rt) 1549 if (rt)
1520 dst_set_expires(&rt->u.dst, 0); 1550 dst_set_expires(&rt->u.dst, 0);
1521} 1551}
@@ -1545,7 +1575,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1545 1575
1546 if (rt->fl.iif == 0) 1576 if (rt->fl.iif == 0)
1547 src = rt->rt_src; 1577 src = rt->rt_src;
1548 else if (fib_lookup(rt->u.dst.dev->nd_net, &rt->fl, &res) == 0) { 1578 else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) {
1549 src = FIB_RES_PREFSRC(res); 1579 src = FIB_RES_PREFSRC(res);
1550 fib_res_put(&res); 1580 fib_res_put(&res);
1551 } else 1581 } else
@@ -1675,7 +1705,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1675 1705
1676 in_dev_put(in_dev); 1706 in_dev_put(in_dev);
1677 hash = rt_hash(daddr, saddr, dev->ifindex); 1707 hash = rt_hash(daddr, saddr, dev->ifindex);
1678 return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); 1708 return rt_intern_hash(hash, rth, &skb->rtable);
1679 1709
1680e_nobufs: 1710e_nobufs:
1681 in_dev_put(in_dev); 1711 in_dev_put(in_dev);
@@ -1718,11 +1748,11 @@ static void ip_handle_martian_source(struct net_device *dev,
1718#endif 1748#endif
1719} 1749}
1720 1750
1721static inline int __mkroute_input(struct sk_buff *skb, 1751static int __mkroute_input(struct sk_buff *skb,
1722 struct fib_result* res, 1752 struct fib_result *res,
1723 struct in_device *in_dev, 1753 struct in_device *in_dev,
1724 __be32 daddr, __be32 saddr, u32 tos, 1754 __be32 daddr, __be32 saddr, u32 tos,
1725 struct rtable **result) 1755 struct rtable **result)
1726{ 1756{
1727 1757
1728 struct rtable *rth; 1758 struct rtable *rth;
@@ -1814,11 +1844,11 @@ static inline int __mkroute_input(struct sk_buff *skb,
1814 return err; 1844 return err;
1815} 1845}
1816 1846
1817static inline int ip_mkroute_input(struct sk_buff *skb, 1847static int ip_mkroute_input(struct sk_buff *skb,
1818 struct fib_result* res, 1848 struct fib_result *res,
1819 const struct flowi *fl, 1849 const struct flowi *fl,
1820 struct in_device *in_dev, 1850 struct in_device *in_dev,
1821 __be32 daddr, __be32 saddr, u32 tos) 1851 __be32 daddr, __be32 saddr, u32 tos)
1822{ 1852{
1823 struct rtable* rth = NULL; 1853 struct rtable* rth = NULL;
1824 int err; 1854 int err;
@@ -1836,7 +1866,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
1836 1866
1837 /* put it into the cache */ 1867 /* put it into the cache */
1838 hash = rt_hash(daddr, saddr, fl->iif); 1868 hash = rt_hash(daddr, saddr, fl->iif);
1839 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 1869 return rt_intern_hash(hash, rth, &skb->rtable);
1840} 1870}
1841 1871
1842/* 1872/*
@@ -1869,7 +1899,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1869 __be32 spec_dst; 1899 __be32 spec_dst;
1870 int err = -EINVAL; 1900 int err = -EINVAL;
1871 int free_res = 0; 1901 int free_res = 0;
1872 struct net * net = dev->nd_net; 1902 struct net * net = dev_net(dev);
1873 1903
1874 /* IP on this device is disabled. */ 1904 /* IP on this device is disabled. */
1875 1905
@@ -1992,7 +2022,7 @@ local_input:
1992 } 2022 }
1993 rth->rt_type = res.type; 2023 rth->rt_type = res.type;
1994 hash = rt_hash(daddr, saddr, fl.iif); 2024 hash = rt_hash(daddr, saddr, fl.iif);
1995 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 2025 err = rt_intern_hash(hash, rth, &skb->rtable);
1996 goto done; 2026 goto done;
1997 2027
1998no_route: 2028no_route:
@@ -2040,25 +2070,25 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2040 int iif = dev->ifindex; 2070 int iif = dev->ifindex;
2041 struct net *net; 2071 struct net *net;
2042 2072
2043 net = dev->nd_net; 2073 net = dev_net(dev);
2044 tos &= IPTOS_RT_MASK; 2074 tos &= IPTOS_RT_MASK;
2045 hash = rt_hash(daddr, saddr, iif); 2075 hash = rt_hash(daddr, saddr, iif);
2046 2076
2047 rcu_read_lock(); 2077 rcu_read_lock();
2048 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2078 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2049 rth = rcu_dereference(rth->u.dst.rt_next)) { 2079 rth = rcu_dereference(rth->u.dst.rt_next)) {
2050 if (rth->fl.fl4_dst == daddr && 2080 if (((rth->fl.fl4_dst ^ daddr) |
2051 rth->fl.fl4_src == saddr && 2081 (rth->fl.fl4_src ^ saddr) |
2052 rth->fl.iif == iif && 2082 (rth->fl.iif ^ iif) |
2053 rth->fl.oif == 0 && 2083 rth->fl.oif |
2084 (rth->fl.fl4_tos ^ tos)) == 0 &&
2054 rth->fl.mark == skb->mark && 2085 rth->fl.mark == skb->mark &&
2055 rth->fl.fl4_tos == tos && 2086 net_eq(dev_net(rth->u.dst.dev), net) &&
2056 rth->u.dst.dev->nd_net == net &&
2057 rth->rt_genid == atomic_read(&rt_genid)) { 2087 rth->rt_genid == atomic_read(&rt_genid)) {
2058 dst_use(&rth->u.dst, jiffies); 2088 dst_use(&rth->u.dst, jiffies);
2059 RT_CACHE_STAT_INC(in_hit); 2089 RT_CACHE_STAT_INC(in_hit);
2060 rcu_read_unlock(); 2090 rcu_read_unlock();
2061 skb->dst = (struct dst_entry*)rth; 2091 skb->rtable = rth;
2062 return 0; 2092 return 0;
2063 } 2093 }
2064 RT_CACHE_STAT_INC(in_hlist_search); 2094 RT_CACHE_STAT_INC(in_hlist_search);
@@ -2100,12 +2130,12 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2100 return ip_route_input_slow(skb, daddr, saddr, tos, dev); 2130 return ip_route_input_slow(skb, daddr, saddr, tos, dev);
2101} 2131}
2102 2132
2103static inline int __mkroute_output(struct rtable **result, 2133static int __mkroute_output(struct rtable **result,
2104 struct fib_result* res, 2134 struct fib_result *res,
2105 const struct flowi *fl, 2135 const struct flowi *fl,
2106 const struct flowi *oldflp, 2136 const struct flowi *oldflp,
2107 struct net_device *dev_out, 2137 struct net_device *dev_out,
2108 unsigned flags) 2138 unsigned flags)
2109{ 2139{
2110 struct rtable *rth; 2140 struct rtable *rth;
2111 struct in_device *in_dev; 2141 struct in_device *in_dev;
@@ -2220,12 +2250,12 @@ static inline int __mkroute_output(struct rtable **result,
2220 return err; 2250 return err;
2221} 2251}
2222 2252
2223static inline int ip_mkroute_output(struct rtable **rp, 2253static int ip_mkroute_output(struct rtable **rp,
2224 struct fib_result* res, 2254 struct fib_result *res,
2225 const struct flowi *fl, 2255 const struct flowi *fl,
2226 const struct flowi *oldflp, 2256 const struct flowi *oldflp,
2227 struct net_device *dev_out, 2257 struct net_device *dev_out,
2228 unsigned flags) 2258 unsigned flags)
2229{ 2259{
2230 struct rtable *rth = NULL; 2260 struct rtable *rth = NULL;
2231 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2261 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
@@ -2455,7 +2485,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2455 rth->fl.mark == flp->mark && 2485 rth->fl.mark == flp->mark &&
2456 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2486 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2457 (IPTOS_RT_MASK | RTO_ONLINK)) && 2487 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2458 rth->u.dst.dev->nd_net == net && 2488 net_eq(dev_net(rth->u.dst.dev), net) &&
2459 rth->rt_genid == atomic_read(&rt_genid)) { 2489 rth->rt_genid == atomic_read(&rt_genid)) {
2460 dst_use(&rth->u.dst, jiffies); 2490 dst_use(&rth->u.dst, jiffies);
2461 RT_CACHE_STAT_INC(out_hit); 2491 RT_CACHE_STAT_INC(out_hit);
@@ -2487,7 +2517,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2487}; 2517};
2488 2518
2489 2519
2490static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk) 2520static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
2491{ 2521{
2492 struct rtable *ort = *rp; 2522 struct rtable *ort = *rp;
2493 struct rtable *rt = (struct rtable *) 2523 struct rtable *rt = (struct rtable *)
@@ -2547,7 +2577,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2547 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, 2577 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
2548 flags ? XFRM_LOOKUP_WAIT : 0); 2578 flags ? XFRM_LOOKUP_WAIT : 0);
2549 if (err == -EREMOTE) 2579 if (err == -EREMOTE)
2550 err = ipv4_dst_blackhole(rp, flp, sk); 2580 err = ipv4_dst_blackhole(rp, flp);
2551 2581
2552 return err; 2582 return err;
2553 } 2583 }
@@ -2565,7 +2595,7 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2565static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2595static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2566 int nowait, unsigned int flags) 2596 int nowait, unsigned int flags)
2567{ 2597{
2568 struct rtable *rt = (struct rtable*)skb->dst; 2598 struct rtable *rt = skb->rtable;
2569 struct rtmsg *r; 2599 struct rtmsg *r;
2570 struct nlmsghdr *nlh; 2600 struct nlmsghdr *nlh;
2571 long expires; 2601 long expires;
@@ -2658,7 +2688,7 @@ nla_put_failure:
2658 2688
2659static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2689static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2660{ 2690{
2661 struct net *net = in_skb->sk->sk_net; 2691 struct net *net = sock_net(in_skb->sk);
2662 struct rtmsg *rtm; 2692 struct rtmsg *rtm;
2663 struct nlattr *tb[RTA_MAX+1]; 2693 struct nlattr *tb[RTA_MAX+1];
2664 struct rtable *rt = NULL; 2694 struct rtable *rt = NULL;
@@ -2668,9 +2698,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2668 int err; 2698 int err;
2669 struct sk_buff *skb; 2699 struct sk_buff *skb;
2670 2700
2671 if (net != &init_net)
2672 return -EINVAL;
2673
2674 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); 2701 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2675 if (err < 0) 2702 if (err < 0)
2676 goto errout; 2703 goto errout;
@@ -2700,7 +2727,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2700 if (iif) { 2727 if (iif) {
2701 struct net_device *dev; 2728 struct net_device *dev;
2702 2729
2703 dev = __dev_get_by_index(&init_net, iif); 2730 dev = __dev_get_by_index(net, iif);
2704 if (dev == NULL) { 2731 if (dev == NULL) {
2705 err = -ENODEV; 2732 err = -ENODEV;
2706 goto errout_free; 2733 goto errout_free;
@@ -2712,7 +2739,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2712 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2739 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2713 local_bh_enable(); 2740 local_bh_enable();
2714 2741
2715 rt = (struct rtable*) skb->dst; 2742 rt = skb->rtable;
2716 if (err == 0 && rt->u.dst.error) 2743 if (err == 0 && rt->u.dst.error)
2717 err = -rt->u.dst.error; 2744 err = -rt->u.dst.error;
2718 } else { 2745 } else {
@@ -2726,22 +2753,22 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2726 }, 2753 },
2727 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2754 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2728 }; 2755 };
2729 err = ip_route_output_key(&init_net, &rt, &fl); 2756 err = ip_route_output_key(net, &rt, &fl);
2730 } 2757 }
2731 2758
2732 if (err) 2759 if (err)
2733 goto errout_free; 2760 goto errout_free;
2734 2761
2735 skb->dst = &rt->u.dst; 2762 skb->rtable = rt;
2736 if (rtm->rtm_flags & RTM_F_NOTIFY) 2763 if (rtm->rtm_flags & RTM_F_NOTIFY)
2737 rt->rt_flags |= RTCF_NOTIFY; 2764 rt->rt_flags |= RTCF_NOTIFY;
2738 2765
2739 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2766 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2740 RTM_NEWROUTE, 0, 0); 2767 RTM_NEWROUTE, 0, 0);
2741 if (err <= 0) 2768 if (err <= 0)
2742 goto errout_free; 2769 goto errout_free;
2743 2770
2744 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); 2771 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2745errout: 2772errout:
2746 return err; 2773 return err;
2747 2774
@@ -2755,6 +2782,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2755 struct rtable *rt; 2782 struct rtable *rt;
2756 int h, s_h; 2783 int h, s_h;
2757 int idx, s_idx; 2784 int idx, s_idx;
2785 struct net *net;
2786
2787 net = sock_net(skb->sk);
2758 2788
2759 s_h = cb->args[0]; 2789 s_h = cb->args[0];
2760 if (s_h < 0) 2790 if (s_h < 0)
@@ -2764,7 +2794,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2764 rcu_read_lock_bh(); 2794 rcu_read_lock_bh();
2765 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; 2795 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
2766 rt = rcu_dereference(rt->u.dst.rt_next), idx++) { 2796 rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
2767 if (idx < s_idx) 2797 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
2768 continue; 2798 continue;
2769 if (rt->rt_genid != atomic_read(&rt_genid)) 2799 if (rt->rt_genid != atomic_read(&rt_genid))
2770 continue; 2800 continue;
@@ -3028,7 +3058,9 @@ int __init ip_rt_init(void)
3028 devinet_init(); 3058 devinet_init();
3029 ip_fib_init(); 3059 ip_fib_init();
3030 3060
3031 setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); 3061 rt_secret_timer.function = rt_secret_rebuild;
3062 rt_secret_timer.data = 0;
3063 init_timer_deferrable(&rt_secret_timer);
3032 3064
3033 /* All the timers, started at system startup tend 3065 /* All the timers, started at system startup tend
3034 to synchronize. Perturb it a bit. 3066 to synchronize. Perturb it a bit.
@@ -3040,7 +3072,7 @@ int __init ip_rt_init(void)
3040 ip_rt_secret_interval; 3072 ip_rt_secret_interval;
3041 add_timer(&rt_secret_timer); 3073 add_timer(&rt_secret_timer);
3042 3074
3043 if (ip_rt_proc_init(&init_net)) 3075 if (ip_rt_proc_init())
3044 printk(KERN_ERR "Unable to create route proc files\n"); 3076 printk(KERN_ERR "Unable to create route proc files\n");
3045#ifdef CONFIG_XFRM 3077#ifdef CONFIG_XFRM
3046 xfrm_init(); 3078 xfrm_init();
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f470fe4511db..73ba98921d64 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -10,8 +10,6 @@
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $ 12 * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
13 *
14 * Missing: IPv6 support.
15 */ 13 */
16 14
17#include <linux/tcp.h> 15#include <linux/tcp.h>
@@ -21,26 +19,33 @@
21#include <linux/kernel.h> 19#include <linux/kernel.h>
22#include <net/tcp.h> 20#include <net/tcp.h>
23 21
22/* Timestamps: lowest 9 bits store TCP options */
23#define TSBITS 9
24#define TSMASK (((__u32)1 << TSBITS) - 1)
25
24extern int sysctl_tcp_syncookies; 26extern int sysctl_tcp_syncookies;
25 27
26static __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS]; 28__u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
29EXPORT_SYMBOL(syncookie_secret);
27 30
28static __init int init_syncookies(void) 31static __init int init_syncookies(void)
29{ 32{
30 get_random_bytes(syncookie_secret, sizeof(syncookie_secret)); 33 get_random_bytes(syncookie_secret, sizeof(syncookie_secret));
31 return 0; 34 return 0;
32} 35}
33module_init(init_syncookies); 36__initcall(init_syncookies);
34 37
35#define COOKIEBITS 24 /* Upper bits store count */ 38#define COOKIEBITS 24 /* Upper bits store count */
36#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) 39#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
37 40
41static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS];
42
38static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, 43static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
39 u32 count, int c) 44 u32 count, int c)
40{ 45{
41 __u32 tmp[16 + 5 + SHA_WORKSPACE_WORDS]; 46 __u32 *tmp = __get_cpu_var(cookie_scratch);
42 47
43 memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c])); 48 memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c]));
44 tmp[0] = (__force u32)saddr; 49 tmp[0] = (__force u32)saddr;
45 tmp[1] = (__force u32)daddr; 50 tmp[1] = (__force u32)daddr;
46 tmp[2] = ((__force u32)sport << 16) + (__force u32)dport; 51 tmp[2] = ((__force u32)sport << 16) + (__force u32)dport;
@@ -50,6 +55,39 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
50 return tmp[17]; 55 return tmp[17];
51} 56}
52 57
58
59/*
60 * when syncookies are in effect and tcp timestamps are enabled we encode
61 * tcp options in the lowest 9 bits of the timestamp value that will be
62 * sent in the syn-ack.
63 * Since subsequent timestamps use the normal tcp_time_stamp value, we
64 * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
65 */
66__u32 cookie_init_timestamp(struct request_sock *req)
67{
68 struct inet_request_sock *ireq;
69 u32 ts, ts_now = tcp_time_stamp;
70 u32 options = 0;
71
72 ireq = inet_rsk(req);
73 if (ireq->wscale_ok) {
74 options = ireq->snd_wscale;
75 options |= ireq->rcv_wscale << 4;
76 }
77 options |= ireq->sack_ok << 8;
78
79 ts = ts_now & ~TSMASK;
80 ts |= options;
81 if (ts > ts_now) {
82 ts >>= TSBITS;
83 ts--;
84 ts <<= TSBITS;
85 ts |= options;
86 }
87 return ts;
88}
89
90
53static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, 91static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport,
54 __be16 dport, __u32 sseq, __u32 count, 92 __be16 dport, __u32 sseq, __u32 count,
55 __u32 data) 93 __u32 data)
@@ -184,6 +222,35 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
184 return child; 222 return child;
185} 223}
186 224
225
226/*
227 * when syncookies are in effect and tcp timestamps are enabled we stored
228 * additional tcp options in the timestamp.
229 * This extracts these options from the timestamp echo.
230 *
231 * The lowest 4 bits are for snd_wscale
232 * The next 4 lsb are for rcv_wscale
233 * The next lsb is for sack_ok
234 */
235void cookie_check_timestamp(struct tcp_options_received *tcp_opt)
236{
237 /* echoed timestamp, 9 lowest bits contain options */
238 u32 options = tcp_opt->rcv_tsecr & TSMASK;
239
240 tcp_opt->snd_wscale = options & 0xf;
241 options >>= 4;
242 tcp_opt->rcv_wscale = options & 0xf;
243
244 tcp_opt->sack_ok = (options >> 4) & 0x1;
245
246 if (tcp_opt->sack_ok)
247 tcp_sack_reset(tcp_opt);
248
249 if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale)
250 tcp_opt->wscale_ok = 1;
251}
252EXPORT_SYMBOL(cookie_check_timestamp);
253
187struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 254struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
188 struct ip_options *opt) 255 struct ip_options *opt)
189{ 256{
@@ -197,6 +264,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
197 int mss; 264 int mss;
198 struct rtable *rt; 265 struct rtable *rt;
199 __u8 rcv_wscale; 266 __u8 rcv_wscale;
267 struct tcp_options_received tcp_opt;
200 268
201 if (!sysctl_tcp_syncookies || !th->ack) 269 if (!sysctl_tcp_syncookies || !th->ack)
202 goto out; 270 goto out;
@@ -209,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
209 277
210 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); 278 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
211 279
280 /* check for timestamp cookie support */
281 memset(&tcp_opt, 0, sizeof(tcp_opt));
282 tcp_parse_options(skb, &tcp_opt, 0);
283
284 if (tcp_opt.saw_tstamp)
285 cookie_check_timestamp(&tcp_opt);
286
212 ret = NULL; 287 ret = NULL;
213 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 288 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
214 if (!req) 289 if (!req)
@@ -227,6 +302,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
227 ireq->loc_addr = ip_hdr(skb)->daddr; 302 ireq->loc_addr = ip_hdr(skb)->daddr;
228 ireq->rmt_addr = ip_hdr(skb)->saddr; 303 ireq->rmt_addr = ip_hdr(skb)->saddr;
229 ireq->opt = NULL; 304 ireq->opt = NULL;
305 ireq->snd_wscale = tcp_opt.snd_wscale;
306 ireq->rcv_wscale = tcp_opt.rcv_wscale;
307 ireq->sack_ok = tcp_opt.sack_ok;
308 ireq->wscale_ok = tcp_opt.wscale_ok;
309 ireq->tstamp_ok = tcp_opt.saw_tstamp;
310 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
230 311
231 /* We throwed the options of the initial SYN away, so we hope 312 /* We throwed the options of the initial SYN away, so we hope
232 * the ACK carries the same options again (see RFC1122 4.2.3.8) 313 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -241,8 +322,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
241 } 322 }
242 } 323 }
243 324
244 ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0;
245 ireq->wscale_ok = ireq->sack_ok = 0;
246 req->expires = 0UL; 325 req->expires = 0UL;
247 req->retrans = 0; 326 req->retrans = 0;
248 327
@@ -271,11 +350,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
271 } 350 }
272 351
273 /* Try to redo what tcp_v4_send_synack did. */ 352 /* Try to redo what tcp_v4_send_synack did. */
274 req->window_clamp = dst_metric(&rt->u.dst, RTAX_WINDOW); 353 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
354
275 tcp_select_initial_window(tcp_full_space(sk), req->mss, 355 tcp_select_initial_window(tcp_full_space(sk), req->mss,
276 &req->rcv_wnd, &req->window_clamp, 356 &req->rcv_wnd, &req->window_clamp,
277 0, &rcv_wscale); 357 ireq->wscale_ok, &rcv_wscale);
278 /* BTW win scale with syncookies is 0 by definition */ 358
279 ireq->rcv_wscale = rcv_wscale; 359 ireq->rcv_wscale = rcv_wscale;
280 360
281 ret = get_cookie_sock(sk, skb, req, &rt->u.dst); 361 ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 88286f35d1e2..c437f804ee38 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -404,38 +404,6 @@ static struct ctl_table ipv4_table[] = {
404 .strategy = &ipv4_sysctl_local_port_range, 404 .strategy = &ipv4_sysctl_local_port_range,
405 }, 405 },
406 { 406 {
407 .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL,
408 .procname = "icmp_echo_ignore_all",
409 .data = &sysctl_icmp_echo_ignore_all,
410 .maxlen = sizeof(int),
411 .mode = 0644,
412 .proc_handler = &proc_dointvec
413 },
414 {
415 .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS,
416 .procname = "icmp_echo_ignore_broadcasts",
417 .data = &sysctl_icmp_echo_ignore_broadcasts,
418 .maxlen = sizeof(int),
419 .mode = 0644,
420 .proc_handler = &proc_dointvec
421 },
422 {
423 .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES,
424 .procname = "icmp_ignore_bogus_error_responses",
425 .data = &sysctl_icmp_ignore_bogus_error_responses,
426 .maxlen = sizeof(int),
427 .mode = 0644,
428 .proc_handler = &proc_dointvec
429 },
430 {
431 .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR,
432 .procname = "icmp_errors_use_inbound_ifaddr",
433 .data = &sysctl_icmp_errors_use_inbound_ifaddr,
434 .maxlen = sizeof(int),
435 .mode = 0644,
436 .proc_handler = &proc_dointvec
437 },
438 {
439 .ctl_name = NET_IPV4_ROUTE, 407 .ctl_name = NET_IPV4_ROUTE,
440 .procname = "route", 408 .procname = "route",
441 .maxlen = 0, 409 .maxlen = 0,
@@ -586,22 +554,6 @@ static struct ctl_table ipv4_table[] = {
586 .proc_handler = &proc_dointvec 554 .proc_handler = &proc_dointvec
587 }, 555 },
588 { 556 {
589 .ctl_name = NET_IPV4_ICMP_RATELIMIT,
590 .procname = "icmp_ratelimit",
591 .data = &sysctl_icmp_ratelimit,
592 .maxlen = sizeof(int),
593 .mode = 0644,
594 .proc_handler = &proc_dointvec
595 },
596 {
597 .ctl_name = NET_IPV4_ICMP_RATEMASK,
598 .procname = "icmp_ratemask",
599 .data = &sysctl_icmp_ratemask,
600 .maxlen = sizeof(int),
601 .mode = 0644,
602 .proc_handler = &proc_dointvec
603 },
604 {
605 .ctl_name = NET_TCP_TW_REUSE, 557 .ctl_name = NET_TCP_TW_REUSE,
606 .procname = "tcp_tw_reuse", 558 .procname = "tcp_tw_reuse",
607 .data = &sysctl_tcp_tw_reuse, 559 .data = &sysctl_tcp_tw_reuse,
@@ -804,6 +756,58 @@ static struct ctl_table ipv4_table[] = {
804 { .ctl_name = 0 } 756 { .ctl_name = 0 }
805}; 757};
806 758
759static struct ctl_table ipv4_net_table[] = {
760 {
761 .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL,
762 .procname = "icmp_echo_ignore_all",
763 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
764 .maxlen = sizeof(int),
765 .mode = 0644,
766 .proc_handler = &proc_dointvec
767 },
768 {
769 .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS,
770 .procname = "icmp_echo_ignore_broadcasts",
771 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
772 .maxlen = sizeof(int),
773 .mode = 0644,
774 .proc_handler = &proc_dointvec
775 },
776 {
777 .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES,
778 .procname = "icmp_ignore_bogus_error_responses",
779 .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
780 .maxlen = sizeof(int),
781 .mode = 0644,
782 .proc_handler = &proc_dointvec
783 },
784 {
785 .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR,
786 .procname = "icmp_errors_use_inbound_ifaddr",
787 .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
788 .maxlen = sizeof(int),
789 .mode = 0644,
790 .proc_handler = &proc_dointvec
791 },
792 {
793 .ctl_name = NET_IPV4_ICMP_RATELIMIT,
794 .procname = "icmp_ratelimit",
795 .data = &init_net.ipv4.sysctl_icmp_ratelimit,
796 .maxlen = sizeof(int),
797 .mode = 0644,
798 .proc_handler = &proc_dointvec
799 },
800 {
801 .ctl_name = NET_IPV4_ICMP_RATEMASK,
802 .procname = "icmp_ratemask",
803 .data = &init_net.ipv4.sysctl_icmp_ratemask,
804 .maxlen = sizeof(int),
805 .mode = 0644,
806 .proc_handler = &proc_dointvec
807 },
808 { }
809};
810
807struct ctl_path net_ipv4_ctl_path[] = { 811struct ctl_path net_ipv4_ctl_path[] = {
808 { .procname = "net", .ctl_name = CTL_NET, }, 812 { .procname = "net", .ctl_name = CTL_NET, },
809 { .procname = "ipv4", .ctl_name = NET_IPV4, }, 813 { .procname = "ipv4", .ctl_name = NET_IPV4, },
@@ -811,12 +815,72 @@ struct ctl_path net_ipv4_ctl_path[] = {
811}; 815};
812EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); 816EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
813 817
818static __net_init int ipv4_sysctl_init_net(struct net *net)
819{
820 struct ctl_table *table;
821
822 table = ipv4_net_table;
823 if (net != &init_net) {
824 table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
825 if (table == NULL)
826 goto err_alloc;
827
828 table[0].data =
829 &net->ipv4.sysctl_icmp_echo_ignore_all;
830 table[1].data =
831 &net->ipv4.sysctl_icmp_echo_ignore_broadcasts;
832 table[2].data =
833 &net->ipv4.sysctl_icmp_ignore_bogus_error_responses;
834 table[3].data =
835 &net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr;
836 table[4].data =
837 &net->ipv4.sysctl_icmp_ratelimit;
838 table[5].data =
839 &net->ipv4.sysctl_icmp_ratemask;
840 }
841
842 net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
843 net_ipv4_ctl_path, table);
844 if (net->ipv4.ipv4_hdr == NULL)
845 goto err_reg;
846
847 return 0;
848
849err_reg:
850 if (net != &init_net)
851 kfree(table);
852err_alloc:
853 return -ENOMEM;
854}
855
856static __net_exit void ipv4_sysctl_exit_net(struct net *net)
857{
858 struct ctl_table *table;
859
860 table = net->ipv4.ipv4_hdr->ctl_table_arg;
861 unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
862 kfree(table);
863}
864
865static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
866 .init = ipv4_sysctl_init_net,
867 .exit = ipv4_sysctl_exit_net,
868};
869
814static __init int sysctl_ipv4_init(void) 870static __init int sysctl_ipv4_init(void)
815{ 871{
816 struct ctl_table_header *hdr; 872 struct ctl_table_header *hdr;
817 873
818 hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); 874 hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
819 return hdr == NULL ? -ENOMEM : 0; 875 if (hdr == NULL)
876 return -ENOMEM;
877
878 if (register_pernet_subsys(&ipv4_sysctl_ops)) {
879 unregister_sysctl_table(hdr);
880 return -ENOMEM;
881 }
882
883 return 0;
820} 884}
821 885
822__initcall(sysctl_ipv4_init); 886__initcall(sysctl_ipv4_init);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39b629ac2404..58ac838bf460 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2105,15 +2105,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2105 break; 2105 break;
2106 2106
2107 case TCP_DEFER_ACCEPT: 2107 case TCP_DEFER_ACCEPT:
2108 icsk->icsk_accept_queue.rskq_defer_accept = 0; 2108 if (val < 0) {
2109 if (val > 0) { 2109 err = -EINVAL;
2110 /* Translate value in seconds to number of 2110 } else {
2111 * retransmits */ 2111 if (val > MAX_TCP_ACCEPT_DEFERRED)
2112 while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && 2112 val = MAX_TCP_ACCEPT_DEFERRED;
2113 val > ((TCP_TIMEOUT_INIT / HZ) << 2113 icsk->icsk_accept_queue.rskq_defer_accept = val;
2114 icsk->icsk_accept_queue.rskq_defer_accept))
2115 icsk->icsk_accept_queue.rskq_defer_accept++;
2116 icsk->icsk_accept_queue.rskq_defer_accept++;
2117 } 2114 }
2118 break; 2115 break;
2119 2116
@@ -2295,8 +2292,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2295 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2292 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2296 break; 2293 break;
2297 case TCP_DEFER_ACCEPT: 2294 case TCP_DEFER_ACCEPT:
2298 val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : 2295 val = icsk->icsk_accept_queue.rskq_defer_accept;
2299 ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
2300 break; 2296 break;
2301 case TCP_WINDOW_CLAMP: 2297 case TCP_WINDOW_CLAMP:
2302 val = tp->window_clamp; 2298 val = tp->window_clamp;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 3aa0b23c1ea0..eb5b9854c8c7 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,12 +1,13 @@
1/* 1/*
2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.1 2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.2
3 * 3 * Home page:
4 * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
4 * This is from the implementation of CUBIC TCP in 5 * This is from the implementation of CUBIC TCP in
5 * Injong Rhee, Lisong Xu. 6 * Injong Rhee, Lisong Xu.
6 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant 7 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant
7 * in PFLDnet 2005 8 * in PFLDnet 2005
8 * Available from: 9 * Available from:
9 * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf 10 * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf
10 * 11 *
11 * Unless CUBIC is enabled and congestion window is large 12 * Unless CUBIC is enabled and congestion window is large
12 * this behaves the same as the original Reno. 13 * this behaves the same as the original Reno.
@@ -20,15 +21,10 @@
20#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation 21#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
21 * max_cwnd = snd_cwnd * beta 22 * max_cwnd = snd_cwnd * beta
22 */ 23 */
23#define BICTCP_B 4 /*
24 * In binary search,
25 * go to point (max+min)/N
26 */
27#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ 24#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
28 25
29static int fast_convergence __read_mostly = 1; 26static int fast_convergence __read_mostly = 1;
30static int max_increment __read_mostly = 16; 27static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */
31static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
32static int initial_ssthresh __read_mostly; 28static int initial_ssthresh __read_mostly;
33static int bic_scale __read_mostly = 41; 29static int bic_scale __read_mostly = 41;
34static int tcp_friendliness __read_mostly = 1; 30static int tcp_friendliness __read_mostly = 1;
@@ -40,9 +36,7 @@ static u64 cube_factor __read_mostly;
40/* Note parameters that are used for precomputing scale factors are read-only */ 36/* Note parameters that are used for precomputing scale factors are read-only */
41module_param(fast_convergence, int, 0644); 37module_param(fast_convergence, int, 0644);
42MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); 38MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
43module_param(max_increment, int, 0644); 39module_param(beta, int, 0644);
44MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
45module_param(beta, int, 0444);
46MODULE_PARM_DESC(beta, "beta for multiplicative increase"); 40MODULE_PARM_DESC(beta, "beta for multiplicative increase");
47module_param(initial_ssthresh, int, 0644); 41module_param(initial_ssthresh, int, 0644);
48MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); 42MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
@@ -145,7 +139,7 @@ static u32 cubic_root(u64 a)
145static inline void bictcp_update(struct bictcp *ca, u32 cwnd) 139static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
146{ 140{
147 u64 offs; 141 u64 offs;
148 u32 delta, t, bic_target, min_cnt, max_cnt; 142 u32 delta, t, bic_target, max_cnt;
149 143
150 ca->ack_cnt++; /* count the number of ACKs */ 144 ca->ack_cnt++; /* count the number of ACKs */
151 145
@@ -211,19 +205,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
211 ca->cnt = 100 * cwnd; /* very small increment*/ 205 ca->cnt = 100 * cwnd; /* very small increment*/
212 } 206 }
213 207
214 if (ca->delay_min > 0) {
215 /* max increment = Smax * rtt / 0.1 */
216 min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
217
218 /* use concave growth when the target is above the origin */
219 if (ca->cnt < min_cnt && t >= ca->bic_K)
220 ca->cnt = min_cnt;
221 }
222
223 /* slow start and low utilization */
224 if (ca->loss_cwnd == 0) /* could be aggressive in slow start */
225 ca->cnt = 50;
226
227 /* TCP Friendly */ 208 /* TCP Friendly */
228 if (tcp_friendliness) { 209 if (tcp_friendliness) {
229 u32 scale = beta_scale; 210 u32 scale = beta_scale;
@@ -391,4 +372,4 @@ module_exit(cubictcp_unregister);
391MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); 372MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
392MODULE_LICENSE("GPL"); 373MODULE_LICENSE("GPL");
393MODULE_DESCRIPTION("CUBIC TCP"); 374MODULE_DESCRIPTION("CUBIC TCP");
394MODULE_VERSION("2.1"); 375MODULE_VERSION("2.2");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5119856017ab..bd0ee8ca8b21 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3592,7 +3592,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3592 * cases we should never reach this piece of code. 3592 * cases we should never reach this piece of code.
3593 */ 3593 */
3594 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", 3594 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
3595 __FUNCTION__, sk->sk_state); 3595 __func__, sk->sk_state);
3596 break; 3596 break;
3597 } 3597 }
3598 3598
@@ -3999,7 +3999,7 @@ drop:
3999 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 3999 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4000 4000
4001 if (seq == TCP_SKB_CB(skb1)->end_seq) { 4001 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4002 __skb_append(skb1, skb, &tp->out_of_order_queue); 4002 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4003 4003
4004 if (!tp->rx_opt.num_sacks || 4004 if (!tp->rx_opt.num_sacks ||
4005 tp->selective_acks[0].end_seq != seq) 4005 tp->selective_acks[0].end_seq != seq)
@@ -4482,6 +4482,49 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4482 } 4482 }
4483} 4483}
4484 4484
4485static int tcp_defer_accept_check(struct sock *sk)
4486{
4487 struct tcp_sock *tp = tcp_sk(sk);
4488
4489 if (tp->defer_tcp_accept.request) {
4490 int queued_data = tp->rcv_nxt - tp->copied_seq;
4491 int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
4492 tcp_hdr((struct sk_buff *)
4493 sk->sk_receive_queue.prev)->fin : 0;
4494
4495 if (queued_data && hasfin)
4496 queued_data--;
4497
4498 if (queued_data &&
4499 tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
4500 if (sock_flag(sk, SOCK_KEEPOPEN)) {
4501 inet_csk_reset_keepalive_timer(sk,
4502 keepalive_time_when(tp));
4503 } else {
4504 inet_csk_delete_keepalive_timer(sk);
4505 }
4506
4507 inet_csk_reqsk_queue_add(
4508 tp->defer_tcp_accept.listen_sk,
4509 tp->defer_tcp_accept.request,
4510 sk);
4511
4512 tp->defer_tcp_accept.listen_sk->sk_data_ready(
4513 tp->defer_tcp_accept.listen_sk, 0);
4514
4515 sock_put(tp->defer_tcp_accept.listen_sk);
4516 sock_put(sk);
4517 tp->defer_tcp_accept.listen_sk = NULL;
4518 tp->defer_tcp_accept.request = NULL;
4519 } else if (hasfin ||
4520 tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
4521 tcp_reset(sk);
4522 return -1;
4523 }
4524 }
4525 return 0;
4526}
4527
4485static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) 4528static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4486{ 4529{
4487 struct tcp_sock *tp = tcp_sk(sk); 4530 struct tcp_sock *tp = tcp_sk(sk);
@@ -4842,6 +4885,9 @@ step5:
4842 4885
4843 tcp_data_snd_check(sk); 4886 tcp_data_snd_check(sk);
4844 tcp_ack_snd_check(sk); 4887 tcp_ack_snd_check(sk);
4888
4889 if (tcp_defer_accept_check(sk))
4890 return -1;
4845 return 0; 4891 return 0;
4846 4892
4847csum_error: 4893csum_error:
@@ -5361,6 +5407,7 @@ discard:
5361 5407
5362EXPORT_SYMBOL(sysctl_tcp_ecn); 5408EXPORT_SYMBOL(sysctl_tcp_ecn);
5363EXPORT_SYMBOL(sysctl_tcp_reordering); 5409EXPORT_SYMBOL(sysctl_tcp_reordering);
5410EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
5364EXPORT_SYMBOL(tcp_parse_options); 5411EXPORT_SYMBOL(tcp_parse_options);
5365EXPORT_SYMBOL(tcp_rcv_established); 5412EXPORT_SYMBOL(tcp_rcv_established);
5366EXPORT_SYMBOL(tcp_rcv_state_process); 5413EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 00156bf421ca..02519730e0d5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -88,9 +88,6 @@ int sysctl_tcp_low_latency __read_mostly;
88/* Check TCP sequence numbers in ICMP packets. */ 88/* Check TCP sequence numbers in ICMP packets. */
89#define ICMP_MIN_LENGTH 8 89#define ICMP_MIN_LENGTH 8
90 90
91/* Socket used for sending RSTs */
92static struct socket *tcp_socket __read_mostly;
93
94void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); 91void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
95 92
96#ifdef CONFIG_TCP_MD5SIG 93#ifdef CONFIG_TCP_MD5SIG
@@ -353,7 +350,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
353 return; 350 return;
354 } 351 }
355 352
356 sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest, 353 sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest,
357 iph->saddr, th->source, inet_iif(skb)); 354 iph->saddr, th->source, inet_iif(skb));
358 if (!sk) { 355 if (!sk) {
359 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 356 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
@@ -552,7 +549,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
552 if (th->rst) 549 if (th->rst)
553 return; 550 return;
554 551
555 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) 552 if (skb->rtable->rt_type != RTN_LOCAL)
556 return; 553 return;
557 554
558 /* Swap the send and the receive. */ 555 /* Swap the send and the receive. */
@@ -598,7 +595,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
598 sizeof(struct tcphdr), IPPROTO_TCP, 0); 595 sizeof(struct tcphdr), IPPROTO_TCP, 0);
599 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 596 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
600 597
601 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); 598 ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb,
599 &arg, arg.iov[0].iov_len);
602 600
603 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 601 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
604 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); 602 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
@@ -693,7 +691,8 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
693 if (twsk) 691 if (twsk)
694 arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; 692 arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
695 693
696 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); 694 ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb,
695 &arg, arg.iov[0].iov_len);
697 696
698 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 697 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
699} 698}
@@ -723,8 +722,8 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
723 * This still operates on a request_sock only, not on a big 722 * This still operates on a request_sock only, not on a big
724 * socket. 723 * socket.
725 */ 724 */
726static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, 725static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
727 struct dst_entry *dst) 726 struct dst_entry *dst)
728{ 727{
729 const struct inet_request_sock *ireq = inet_rsk(req); 728 const struct inet_request_sock *ireq = inet_rsk(req);
730 int err = -1; 729 int err = -1;
@@ -732,7 +731,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
732 731
733 /* First, grab a route. */ 732 /* First, grab a route. */
734 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 733 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
735 goto out; 734 return -1;
736 735
737 skb = tcp_make_synack(sk, dst, req); 736 skb = tcp_make_synack(sk, dst, req);
738 737
@@ -751,11 +750,15 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
751 err = net_xmit_eval(err); 750 err = net_xmit_eval(err);
752 } 751 }
753 752
754out:
755 dst_release(dst); 753 dst_release(dst);
756 return err; 754 return err;
757} 755}
758 756
757static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
758{
759 return __tcp_v4_send_synack(sk, req, NULL);
760}
761
759/* 762/*
760 * IPv4 request_sock destructor. 763 * IPv4 request_sock destructor.
761 */ 764 */
@@ -1258,8 +1261,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1258#endif 1261#endif
1259 1262
1260 /* Never answer to SYNs send to broadcast or multicast */ 1263 /* Never answer to SYNs send to broadcast or multicast */
1261 if (((struct rtable *)skb->dst)->rt_flags & 1264 if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1262 (RTCF_BROADCAST | RTCF_MULTICAST))
1263 goto drop; 1265 goto drop;
1264 1266
1265 /* TW buckets are converted to open requests without 1267 /* TW buckets are converted to open requests without
@@ -1297,10 +1299,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1297 1299
1298 tcp_parse_options(skb, &tmp_opt, 0); 1300 tcp_parse_options(skb, &tmp_opt, 0);
1299 1301
1300 if (want_cookie) { 1302 if (want_cookie && !tmp_opt.saw_tstamp)
1301 tcp_clear_options(&tmp_opt); 1303 tcp_clear_options(&tmp_opt);
1302 tmp_opt.saw_tstamp = 0;
1303 }
1304 1304
1305 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { 1305 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1306 /* Some OSes (unknown ones, but I see them on web server, which 1306 /* Some OSes (unknown ones, but I see them on web server, which
@@ -1328,6 +1328,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1328 if (want_cookie) { 1328 if (want_cookie) {
1329#ifdef CONFIG_SYN_COOKIES 1329#ifdef CONFIG_SYN_COOKIES
1330 syn_flood_warning(skb); 1330 syn_flood_warning(skb);
1331 req->cookie_ts = tmp_opt.tstamp_ok;
1331#endif 1332#endif
1332 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1333 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1333 } else if (!isn) { 1334 } else if (!isn) {
@@ -1351,8 +1352,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1351 (s32)(peer->tcp_ts - req->ts_recent) > 1352 (s32)(peer->tcp_ts - req->ts_recent) >
1352 TCP_PAWS_WINDOW) { 1353 TCP_PAWS_WINDOW) {
1353 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); 1354 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1354 dst_release(dst); 1355 goto drop_and_release;
1355 goto drop_and_free;
1356 } 1356 }
1357 } 1357 }
1358 /* Kill the following clause, if you dislike this way. */ 1358 /* Kill the following clause, if you dislike this way. */
@@ -1372,24 +1372,21 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1372 "request from %u.%u.%u.%u/%u\n", 1372 "request from %u.%u.%u.%u/%u\n",
1373 NIPQUAD(saddr), 1373 NIPQUAD(saddr),
1374 ntohs(tcp_hdr(skb)->source)); 1374 ntohs(tcp_hdr(skb)->source));
1375 dst_release(dst); 1375 goto drop_and_release;
1376 goto drop_and_free;
1377 } 1376 }
1378 1377
1379 isn = tcp_v4_init_sequence(skb); 1378 isn = tcp_v4_init_sequence(skb);
1380 } 1379 }
1381 tcp_rsk(req)->snt_isn = isn; 1380 tcp_rsk(req)->snt_isn = isn;
1382 1381
1383 if (tcp_v4_send_synack(sk, req, dst)) 1382 if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
1384 goto drop_and_free; 1383 goto drop_and_free;
1385 1384
1386 if (want_cookie) { 1385 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1387 reqsk_free(req);
1388 } else {
1389 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1390 }
1391 return 0; 1386 return 0;
1392 1387
1388drop_and_release:
1389 dst_release(dst);
1393drop_and_free: 1390drop_and_free:
1394 reqsk_free(req); 1391 reqsk_free(req);
1395drop: 1392drop:
@@ -1487,7 +1484,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1487 if (req) 1484 if (req)
1488 return tcp_check_req(sk, skb, req, prev); 1485 return tcp_check_req(sk, skb, req, prev);
1489 1486
1490 nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr, 1487 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1491 th->source, iph->daddr, th->dest, inet_iif(skb)); 1488 th->source, iph->daddr, th->dest, inet_iif(skb));
1492 1489
1493 if (nsk) { 1490 if (nsk) {
@@ -1645,7 +1642,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1645 TCP_SKB_CB(skb)->flags = iph->tos; 1642 TCP_SKB_CB(skb)->flags = iph->tos;
1646 TCP_SKB_CB(skb)->sacked = 0; 1643 TCP_SKB_CB(skb)->sacked = 0;
1647 1644
1648 sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr, 1645 sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr,
1649 th->source, iph->daddr, th->dest, inet_iif(skb)); 1646 th->source, iph->daddr, th->dest, inet_iif(skb));
1650 if (!sk) 1647 if (!sk)
1651 goto no_tcp_socket; 1648 goto no_tcp_socket;
@@ -1719,7 +1716,7 @@ do_time_wait:
1719 } 1716 }
1720 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1717 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1721 case TCP_TW_SYN: { 1718 case TCP_TW_SYN: {
1722 struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net, 1719 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1723 &tcp_hashinfo, 1720 &tcp_hashinfo,
1724 iph->daddr, th->dest, 1721 iph->daddr, th->dest,
1725 inet_iif(skb)); 1722 inet_iif(skb));
@@ -1921,6 +1918,14 @@ int tcp_v4_destroy_sock(struct sock *sk)
1921 sk->sk_sndmsg_page = NULL; 1918 sk->sk_sndmsg_page = NULL;
1922 } 1919 }
1923 1920
1921 if (tp->defer_tcp_accept.request) {
1922 reqsk_free(tp->defer_tcp_accept.request);
1923 sock_put(tp->defer_tcp_accept.listen_sk);
1924 sock_put(sk);
1925 tp->defer_tcp_accept.listen_sk = NULL;
1926 tp->defer_tcp_accept.request = NULL;
1927 }
1928
1924 atomic_dec(&tcp_sockets_allocated); 1929 atomic_dec(&tcp_sockets_allocated);
1925 1930
1926 return 0; 1931 return 0;
@@ -1949,6 +1954,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1949 struct hlist_node *node; 1954 struct hlist_node *node;
1950 struct sock *sk = cur; 1955 struct sock *sk = cur;
1951 struct tcp_iter_state* st = seq->private; 1956 struct tcp_iter_state* st = seq->private;
1957 struct net *net = seq_file_net(seq);
1952 1958
1953 if (!sk) { 1959 if (!sk) {
1954 st->bucket = 0; 1960 st->bucket = 0;
@@ -1965,7 +1971,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1965 req = req->dl_next; 1971 req = req->dl_next;
1966 while (1) { 1972 while (1) {
1967 while (req) { 1973 while (req) {
1968 if (req->rsk_ops->family == st->family) { 1974 if (req->rsk_ops->family == st->family &&
1975 net_eq(sock_net(req->sk), net)) {
1969 cur = req; 1976 cur = req;
1970 goto out; 1977 goto out;
1971 } 1978 }
@@ -1989,7 +1996,7 @@ get_req:
1989 } 1996 }
1990get_sk: 1997get_sk:
1991 sk_for_each_from(sk, node) { 1998 sk_for_each_from(sk, node) {
1992 if (sk->sk_family == st->family) { 1999 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
1993 cur = sk; 2000 cur = sk;
1994 goto out; 2001 goto out;
1995 } 2002 }
@@ -2028,6 +2035,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2028static void *established_get_first(struct seq_file *seq) 2035static void *established_get_first(struct seq_file *seq)
2029{ 2036{
2030 struct tcp_iter_state* st = seq->private; 2037 struct tcp_iter_state* st = seq->private;
2038 struct net *net = seq_file_net(seq);
2031 void *rc = NULL; 2039 void *rc = NULL;
2032 2040
2033 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { 2041 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
@@ -2038,7 +2046,8 @@ static void *established_get_first(struct seq_file *seq)
2038 2046
2039 read_lock_bh(lock); 2047 read_lock_bh(lock);
2040 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 2048 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2041 if (sk->sk_family != st->family) { 2049 if (sk->sk_family != st->family ||
2050 !net_eq(sock_net(sk), net)) {
2042 continue; 2051 continue;
2043 } 2052 }
2044 rc = sk; 2053 rc = sk;
@@ -2047,7 +2056,8 @@ static void *established_get_first(struct seq_file *seq)
2047 st->state = TCP_SEQ_STATE_TIME_WAIT; 2056 st->state = TCP_SEQ_STATE_TIME_WAIT;
2048 inet_twsk_for_each(tw, node, 2057 inet_twsk_for_each(tw, node,
2049 &tcp_hashinfo.ehash[st->bucket].twchain) { 2058 &tcp_hashinfo.ehash[st->bucket].twchain) {
2050 if (tw->tw_family != st->family) { 2059 if (tw->tw_family != st->family ||
2060 !net_eq(twsk_net(tw), net)) {
2051 continue; 2061 continue;
2052 } 2062 }
2053 rc = tw; 2063 rc = tw;
@@ -2066,6 +2076,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
2066 struct inet_timewait_sock *tw; 2076 struct inet_timewait_sock *tw;
2067 struct hlist_node *node; 2077 struct hlist_node *node;
2068 struct tcp_iter_state* st = seq->private; 2078 struct tcp_iter_state* st = seq->private;
2079 struct net *net = seq_file_net(seq);
2069 2080
2070 ++st->num; 2081 ++st->num;
2071 2082
@@ -2073,7 +2084,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
2073 tw = cur; 2084 tw = cur;
2074 tw = tw_next(tw); 2085 tw = tw_next(tw);
2075get_tw: 2086get_tw:
2076 while (tw && tw->tw_family != st->family) { 2087 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2077 tw = tw_next(tw); 2088 tw = tw_next(tw);
2078 } 2089 }
2079 if (tw) { 2090 if (tw) {
@@ -2094,7 +2105,7 @@ get_tw:
2094 sk = sk_next(sk); 2105 sk = sk_next(sk);
2095 2106
2096 sk_for_each_from(sk, node) { 2107 sk_for_each_from(sk, node) {
2097 if (sk->sk_family == st->family) 2108 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2098 goto found; 2109 goto found;
2099 } 2110 }
2100 2111
@@ -2200,48 +2211,37 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2200static int tcp_seq_open(struct inode *inode, struct file *file) 2211static int tcp_seq_open(struct inode *inode, struct file *file)
2201{ 2212{
2202 struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 2213 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2203 struct seq_file *seq;
2204 struct tcp_iter_state *s; 2214 struct tcp_iter_state *s;
2205 int rc; 2215 int err;
2206 2216
2207 if (unlikely(afinfo == NULL)) 2217 if (unlikely(afinfo == NULL))
2208 return -EINVAL; 2218 return -EINVAL;
2209 2219
2210 s = kzalloc(sizeof(*s), GFP_KERNEL); 2220 err = seq_open_net(inode, file, &afinfo->seq_ops,
2211 if (!s) 2221 sizeof(struct tcp_iter_state));
2212 return -ENOMEM; 2222 if (err < 0)
2223 return err;
2224
2225 s = ((struct seq_file *)file->private_data)->private;
2213 s->family = afinfo->family; 2226 s->family = afinfo->family;
2214 s->seq_ops.start = tcp_seq_start; 2227 return 0;
2215 s->seq_ops.next = tcp_seq_next;
2216 s->seq_ops.show = afinfo->seq_show;
2217 s->seq_ops.stop = tcp_seq_stop;
2218
2219 rc = seq_open(file, &s->seq_ops);
2220 if (rc)
2221 goto out_kfree;
2222 seq = file->private_data;
2223 seq->private = s;
2224out:
2225 return rc;
2226out_kfree:
2227 kfree(s);
2228 goto out;
2229} 2228}
2230 2229
2231int tcp_proc_register(struct tcp_seq_afinfo *afinfo) 2230int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2232{ 2231{
2233 int rc = 0; 2232 int rc = 0;
2234 struct proc_dir_entry *p; 2233 struct proc_dir_entry *p;
2235 2234
2236 if (!afinfo) 2235 afinfo->seq_fops.open = tcp_seq_open;
2237 return -EINVAL; 2236 afinfo->seq_fops.read = seq_read;
2238 afinfo->seq_fops->owner = afinfo->owner; 2237 afinfo->seq_fops.llseek = seq_lseek;
2239 afinfo->seq_fops->open = tcp_seq_open; 2238 afinfo->seq_fops.release = seq_release_net;
2240 afinfo->seq_fops->read = seq_read; 2239
2241 afinfo->seq_fops->llseek = seq_lseek; 2240 afinfo->seq_ops.start = tcp_seq_start;
2242 afinfo->seq_fops->release = seq_release_private; 2241 afinfo->seq_ops.next = tcp_seq_next;
2242 afinfo->seq_ops.stop = tcp_seq_stop;
2243 2243
2244 p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); 2244 p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops);
2245 if (p) 2245 if (p)
2246 p->data = afinfo; 2246 p->data = afinfo;
2247 else 2247 else
@@ -2249,12 +2249,9 @@ int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2249 return rc; 2249 return rc;
2250} 2250}
2251 2251
2252void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) 2252void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2253{ 2253{
2254 if (!afinfo) 2254 proc_net_remove(net, afinfo->name);
2255 return;
2256 proc_net_remove(&init_net, afinfo->name);
2257 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2258} 2255}
2259 2256
2260static void get_openreq4(struct sock *sk, struct request_sock *req, 2257static void get_openreq4(struct sock *sk, struct request_sock *req,
@@ -2383,28 +2380,43 @@ out:
2383 return 0; 2380 return 0;
2384} 2381}
2385 2382
2386static struct file_operations tcp4_seq_fops;
2387static struct tcp_seq_afinfo tcp4_seq_afinfo = { 2383static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2388 .owner = THIS_MODULE,
2389 .name = "tcp", 2384 .name = "tcp",
2390 .family = AF_INET, 2385 .family = AF_INET,
2391 .seq_show = tcp4_seq_show, 2386 .seq_fops = {
2392 .seq_fops = &tcp4_seq_fops, 2387 .owner = THIS_MODULE,
2388 },
2389 .seq_ops = {
2390 .show = tcp4_seq_show,
2391 },
2392};
2393
2394static int tcp4_proc_init_net(struct net *net)
2395{
2396 return tcp_proc_register(net, &tcp4_seq_afinfo);
2397}
2398
2399static void tcp4_proc_exit_net(struct net *net)
2400{
2401 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2402}
2403
2404static struct pernet_operations tcp4_net_ops = {
2405 .init = tcp4_proc_init_net,
2406 .exit = tcp4_proc_exit_net,
2393}; 2407};
2394 2408
2395int __init tcp4_proc_init(void) 2409int __init tcp4_proc_init(void)
2396{ 2410{
2397 return tcp_proc_register(&tcp4_seq_afinfo); 2411 return register_pernet_subsys(&tcp4_net_ops);
2398} 2412}
2399 2413
2400void tcp4_proc_exit(void) 2414void tcp4_proc_exit(void)
2401{ 2415{
2402 tcp_proc_unregister(&tcp4_seq_afinfo); 2416 unregister_pernet_subsys(&tcp4_net_ops);
2403} 2417}
2404#endif /* CONFIG_PROC_FS */ 2418#endif /* CONFIG_PROC_FS */
2405 2419
2406DEFINE_PROTO_INUSE(tcp)
2407
2408struct proto tcp_prot = { 2420struct proto tcp_prot = {
2409 .name = "TCP", 2421 .name = "TCP",
2410 .owner = THIS_MODULE, 2422 .owner = THIS_MODULE,
@@ -2435,18 +2447,33 @@ struct proto tcp_prot = {
2435 .obj_size = sizeof(struct tcp_sock), 2447 .obj_size = sizeof(struct tcp_sock),
2436 .twsk_prot = &tcp_timewait_sock_ops, 2448 .twsk_prot = &tcp_timewait_sock_ops,
2437 .rsk_prot = &tcp_request_sock_ops, 2449 .rsk_prot = &tcp_request_sock_ops,
2438 .hashinfo = &tcp_hashinfo, 2450 .h.hashinfo = &tcp_hashinfo,
2439#ifdef CONFIG_COMPAT 2451#ifdef CONFIG_COMPAT
2440 .compat_setsockopt = compat_tcp_setsockopt, 2452 .compat_setsockopt = compat_tcp_setsockopt,
2441 .compat_getsockopt = compat_tcp_getsockopt, 2453 .compat_getsockopt = compat_tcp_getsockopt,
2442#endif 2454#endif
2443 REF_PROTO_INUSE(tcp)
2444}; 2455};
2445 2456
2446void __init tcp_v4_init(struct net_proto_family *ops) 2457
2458static int __net_init tcp_sk_init(struct net *net)
2459{
2460 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2461 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2462}
2463
2464static void __net_exit tcp_sk_exit(struct net *net)
2465{
2466 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2467}
2468
2469static struct pernet_operations __net_initdata tcp_sk_ops = {
2470 .init = tcp_sk_init,
2471 .exit = tcp_sk_exit,
2472};
2473
2474void __init tcp_v4_init(void)
2447{ 2475{
2448 if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, 2476 if (register_pernet_device(&tcp_sk_ops))
2449 IPPROTO_TCP) < 0)
2450 panic("Failed to create the TCP control socket.\n"); 2477 panic("Failed to create the TCP control socket.\n");
2451} 2478}
2452 2479
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b61b76847ad9..019c8c16e5cc 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -35,6 +35,8 @@
35#endif 35#endif
36 36
37int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; 37int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
38EXPORT_SYMBOL(sysctl_tcp_syncookies);
39
38int sysctl_tcp_abort_on_overflow __read_mostly; 40int sysctl_tcp_abort_on_overflow __read_mostly;
39 41
40struct inet_timewait_death_row tcp_death_row = { 42struct inet_timewait_death_row tcp_death_row = {
@@ -536,7 +538,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
536 * Enforce "SYN-ACK" according to figure 8, figure 6 538 * Enforce "SYN-ACK" according to figure 8, figure 6
537 * of RFC793, fixed by RFC1122. 539 * of RFC793, fixed by RFC1122.
538 */ 540 */
539 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 541 req->rsk_ops->rtx_syn_ack(sk, req);
540 return NULL; 542 return NULL;
541 } 543 }
542 544
@@ -569,10 +571,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
569 does sequence test, SYN is truncated, and thus we consider 571 does sequence test, SYN is truncated, and thus we consider
570 it a bare ACK. 572 it a bare ACK.
571 573
572 If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this 574 Both ends (listening sockets) accept the new incoming
573 bare ACK. Otherwise, we create an established connection. Both 575 connection and try to talk to each other. 8-)
574 ends (listening sockets) accept the new incoming connection and try
575 to talk to each other. 8-)
576 576
577 Note: This case is both harmless, and rare. Possibility is about the 577 Note: This case is both harmless, and rare. Possibility is about the
578 same as us discovering intelligent life on another plant tomorrow. 578 same as us discovering intelligent life on another plant tomorrow.
@@ -640,13 +640,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
640 if (!(flg & TCP_FLAG_ACK)) 640 if (!(flg & TCP_FLAG_ACK))
641 return NULL; 641 return NULL;
642 642
643 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
644 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
645 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
646 inet_rsk(req)->acked = 1;
647 return NULL;
648 }
649
650 /* OK, ACK is valid, create big socket and 643 /* OK, ACK is valid, create big socket and
651 * feed this segment to it. It will repeat all 644 * feed this segment to it. It will repeat all
652 * the tests. THIS SEGMENT MUST MOVE SOCKET TO 645 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -685,7 +678,24 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
685 inet_csk_reqsk_queue_unlink(sk, req, prev); 678 inet_csk_reqsk_queue_unlink(sk, req, prev);
686 inet_csk_reqsk_queue_removed(sk, req); 679 inet_csk_reqsk_queue_removed(sk, req);
687 680
688 inet_csk_reqsk_queue_add(sk, req, child); 681 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
682 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
683
684 /* the accept queue handling is done is est recv slow
685 * path so lets make sure to start there
686 */
687 tcp_sk(child)->pred_flags = 0;
688 sock_hold(sk);
689 sock_hold(child);
690 tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
691 tcp_sk(child)->defer_tcp_accept.request = req;
692
693 inet_csk_reset_keepalive_timer(child,
694 inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
695 } else {
696 inet_csk_reqsk_queue_add(sk, req, child);
697 }
698
689 return child; 699 return child;
690 700
691 listen_overflow: 701 listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d29ef79c00ca..90270cbdf42c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -998,7 +998,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
998 xmit_size_goal = mss_now; 998 xmit_size_goal = mss_now;
999 999
1000 if (doing_tso) { 1000 if (doing_tso) {
1001 xmit_size_goal = (65535 - 1001 xmit_size_goal = ((sk->sk_gso_max_size - 1) -
1002 inet_csk(sk)->icsk_af_ops->net_header_len - 1002 inet_csk(sk)->icsk_af_ops->net_header_len -
1003 inet_csk(sk)->icsk_ext_hdr_len - 1003 inet_csk(sk)->icsk_ext_hdr_len -
1004 tp->tcp_header_len); 1004 tp->tcp_header_len);
@@ -1282,7 +1282,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1282 limit = min(send_win, cong_win); 1282 limit = min(send_win, cong_win);
1283 1283
1284 /* If a full-sized TSO skb can be sent, do it. */ 1284 /* If a full-sized TSO skb can be sent, do it. */
1285 if (limit >= 65536) 1285 if (limit >= sk->sk_gso_max_size)
1286 goto send_now; 1286 goto send_now;
1287 1287
1288 if (sysctl_tcp_tso_win_divisor) { 1288 if (sysctl_tcp_tso_win_divisor) {
@@ -2236,7 +2236,11 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2236 2236
2237 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 2237 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
2238 th->window = htons(min(req->rcv_wnd, 65535U)); 2238 th->window = htons(min(req->rcv_wnd, 65535U));
2239 2239#ifdef CONFIG_SYN_COOKIES
2240 if (unlikely(req->cookie_ts))
2241 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2242 else
2243#endif
2240 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2244 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2241 tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, 2245 tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
2242 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, 2246 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
@@ -2571,6 +2575,7 @@ void tcp_send_probe0(struct sock *sk)
2571 } 2575 }
2572} 2576}
2573 2577
2578EXPORT_SYMBOL(tcp_select_initial_window);
2574EXPORT_SYMBOL(tcp_connect); 2579EXPORT_SYMBOL(tcp_connect);
2575EXPORT_SYMBOL(tcp_make_synack); 2580EXPORT_SYMBOL(tcp_make_synack);
2576EXPORT_SYMBOL(tcp_simple_retransmit); 2581EXPORT_SYMBOL(tcp_simple_retransmit);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 803d758a2b12..160d16f9f4fc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data)
481 goto death; 481 goto death;
482 } 482 }
483 483
484 if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
485 tcp_send_active_reset(sk, GFP_ATOMIC);
486 goto death;
487 }
488
484 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) 489 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
485 goto out; 490 goto out;
486 491
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1704c1474ea1..7b7fcacec4a0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -137,29 +137,28 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
137 struct hlist_node *node; 137 struct hlist_node *node;
138 138
139 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) 139 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
140 if (sk->sk_net == net && sk->sk_hash == num) 140 if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
141 return 1; 141 return 1;
142 return 0; 142 return 0;
143} 143}
144 144
145/** 145/**
146 * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 146 * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
147 * 147 *
148 * @sk: socket struct in question 148 * @sk: socket struct in question
149 * @snum: port number to look up 149 * @snum: port number to look up
150 * @udptable: hash list table, must be of UDP_HTABLE_SIZE
151 * @saddr_comp: AF-dependent comparison of bound local IP addresses 150 * @saddr_comp: AF-dependent comparison of bound local IP addresses
152 */ 151 */
153int __udp_lib_get_port(struct sock *sk, unsigned short snum, 152int udp_lib_get_port(struct sock *sk, unsigned short snum,
154 struct hlist_head udptable[],
155 int (*saddr_comp)(const struct sock *sk1, 153 int (*saddr_comp)(const struct sock *sk1,
156 const struct sock *sk2 ) ) 154 const struct sock *sk2 ) )
157{ 155{
156 struct hlist_head *udptable = sk->sk_prot->h.udp_hash;
158 struct hlist_node *node; 157 struct hlist_node *node;
159 struct hlist_head *head; 158 struct hlist_head *head;
160 struct sock *sk2; 159 struct sock *sk2;
161 int error = 1; 160 int error = 1;
162 struct net *net = sk->sk_net; 161 struct net *net = sock_net(sk);
163 162
164 write_lock_bh(&udp_hash_lock); 163 write_lock_bh(&udp_hash_lock);
165 164
@@ -219,7 +218,7 @@ gotit:
219 sk_for_each(sk2, node, head) 218 sk_for_each(sk2, node, head)
220 if (sk2->sk_hash == snum && 219 if (sk2->sk_hash == snum &&
221 sk2 != sk && 220 sk2 != sk &&
222 sk2->sk_net == net && 221 net_eq(sock_net(sk2), net) &&
223 (!sk2->sk_reuse || !sk->sk_reuse) && 222 (!sk2->sk_reuse || !sk->sk_reuse) &&
224 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 223 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
225 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 224 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
@@ -232,7 +231,7 @@ gotit:
232 if (sk_unhashed(sk)) { 231 if (sk_unhashed(sk)) {
233 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; 232 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
234 sk_add_node(sk, head); 233 sk_add_node(sk, head);
235 sock_prot_inuse_add(sk->sk_prot, 1); 234 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
236 } 235 }
237 error = 0; 236 error = 0;
238fail: 237fail:
@@ -240,13 +239,7 @@ fail:
240 return error; 239 return error;
241} 240}
242 241
243int udp_get_port(struct sock *sk, unsigned short snum, 242static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
244 int (*scmp)(const struct sock *, const struct sock *))
245{
246 return __udp_lib_get_port(sk, snum, udp_hash, scmp);
247}
248
249int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
250{ 243{
251 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 244 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
252 245
@@ -255,9 +248,9 @@ int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
255 inet1->rcv_saddr == inet2->rcv_saddr )); 248 inet1->rcv_saddr == inet2->rcv_saddr ));
256} 249}
257 250
258static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) 251int udp_v4_get_port(struct sock *sk, unsigned short snum)
259{ 252{
260 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); 253 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
261} 254}
262 255
263/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 256/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
@@ -276,7 +269,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
276 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { 269 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
277 struct inet_sock *inet = inet_sk(sk); 270 struct inet_sock *inet = inet_sk(sk);
278 271
279 if (sk->sk_net == net && sk->sk_hash == hnum && 272 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
280 !ipv6_only_sock(sk)) { 273 !ipv6_only_sock(sk)) {
281 int score = (sk->sk_family == PF_INET ? 1 : 0); 274 int score = (sk->sk_family == PF_INET ? 1 : 0);
282 if (inet->rcv_saddr) { 275 if (inet->rcv_saddr) {
@@ -364,7 +357,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
364 int harderr; 357 int harderr;
365 int err; 358 int err;
366 359
367 sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, 360 sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest,
368 iph->saddr, uh->source, skb->dev->ifindex, udptable); 361 iph->saddr, uh->source, skb->dev->ifindex, udptable);
369 if (sk == NULL) { 362 if (sk == NULL) {
370 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 363 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
@@ -614,7 +607,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
614 607
615 ipc.oif = sk->sk_bound_dev_if; 608 ipc.oif = sk->sk_bound_dev_if;
616 if (msg->msg_controllen) { 609 if (msg->msg_controllen) {
617 err = ip_cmsg_send(msg, &ipc); 610 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
618 if (err) 611 if (err)
619 return err; 612 return err;
620 if (ipc.opt) 613 if (ipc.opt)
@@ -663,7 +656,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
663 { .sport = inet->sport, 656 { .sport = inet->sport,
664 .dport = dport } } }; 657 .dport = dport } } };
665 security_sk_classify_flow(sk, &fl); 658 security_sk_classify_flow(sk, &fl);
666 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); 659 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
667 if (err) { 660 if (err) {
668 if (err == -ENETUNREACH) 661 if (err == -ENETUNREACH)
669 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 662 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
@@ -1188,7 +1181,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1188 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1181 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1189 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); 1182 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1190 1183
1191 sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, 1184 sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr,
1192 uh->dest, inet_iif(skb), udptable); 1185 uh->dest, inet_iif(skb), udptable);
1193 1186
1194 if (sk != NULL) { 1187 if (sk != NULL) {
@@ -1474,8 +1467,6 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1474 1467
1475} 1468}
1476 1469
1477DEFINE_PROTO_INUSE(udp)
1478
1479struct proto udp_prot = { 1470struct proto udp_prot = {
1480 .name = "UDP", 1471 .name = "UDP",
1481 .owner = THIS_MODULE, 1472 .owner = THIS_MODULE,
@@ -1498,11 +1489,11 @@ struct proto udp_prot = {
1498 .sysctl_wmem = &sysctl_udp_wmem_min, 1489 .sysctl_wmem = &sysctl_udp_wmem_min,
1499 .sysctl_rmem = &sysctl_udp_rmem_min, 1490 .sysctl_rmem = &sysctl_udp_rmem_min,
1500 .obj_size = sizeof(struct udp_sock), 1491 .obj_size = sizeof(struct udp_sock),
1492 .h.udp_hash = udp_hash,
1501#ifdef CONFIG_COMPAT 1493#ifdef CONFIG_COMPAT
1502 .compat_setsockopt = compat_udp_setsockopt, 1494 .compat_setsockopt = compat_udp_setsockopt,
1503 .compat_getsockopt = compat_udp_getsockopt, 1495 .compat_getsockopt = compat_udp_getsockopt,
1504#endif 1496#endif
1505 REF_PROTO_INUSE(udp)
1506}; 1497};
1507 1498
1508/* ------------------------------------------------------------------------ */ 1499/* ------------------------------------------------------------------------ */
@@ -1512,10 +1503,13 @@ static struct sock *udp_get_first(struct seq_file *seq)
1512{ 1503{
1513 struct sock *sk; 1504 struct sock *sk;
1514 struct udp_iter_state *state = seq->private; 1505 struct udp_iter_state *state = seq->private;
1506 struct net *net = seq_file_net(seq);
1515 1507
1516 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { 1508 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
1517 struct hlist_node *node; 1509 struct hlist_node *node;
1518 sk_for_each(sk, node, state->hashtable + state->bucket) { 1510 sk_for_each(sk, node, state->hashtable + state->bucket) {
1511 if (!net_eq(sock_net(sk), net))
1512 continue;
1519 if (sk->sk_family == state->family) 1513 if (sk->sk_family == state->family)
1520 goto found; 1514 goto found;
1521 } 1515 }
@@ -1528,12 +1522,13 @@ found:
1528static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) 1522static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
1529{ 1523{
1530 struct udp_iter_state *state = seq->private; 1524 struct udp_iter_state *state = seq->private;
1525 struct net *net = seq_file_net(seq);
1531 1526
1532 do { 1527 do {
1533 sk = sk_next(sk); 1528 sk = sk_next(sk);
1534try_again: 1529try_again:
1535 ; 1530 ;
1536 } while (sk && sk->sk_family != state->family); 1531 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
1537 1532
1538 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { 1533 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
1539 sk = sk_head(state->hashtable + state->bucket); 1534 sk = sk_head(state->hashtable + state->bucket);
@@ -1581,47 +1576,36 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
1581static int udp_seq_open(struct inode *inode, struct file *file) 1576static int udp_seq_open(struct inode *inode, struct file *file)
1582{ 1577{
1583 struct udp_seq_afinfo *afinfo = PDE(inode)->data; 1578 struct udp_seq_afinfo *afinfo = PDE(inode)->data;
1584 struct seq_file *seq; 1579 struct udp_iter_state *s;
1585 int rc = -ENOMEM; 1580 int err;
1586 struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
1587 1581
1588 if (!s) 1582 err = seq_open_net(inode, file, &afinfo->seq_ops,
1589 goto out; 1583 sizeof(struct udp_iter_state));
1584 if (err < 0)
1585 return err;
1586
1587 s = ((struct seq_file *)file->private_data)->private;
1590 s->family = afinfo->family; 1588 s->family = afinfo->family;
1591 s->hashtable = afinfo->hashtable; 1589 s->hashtable = afinfo->hashtable;
1592 s->seq_ops.start = udp_seq_start; 1590 return err;
1593 s->seq_ops.next = udp_seq_next;
1594 s->seq_ops.show = afinfo->seq_show;
1595 s->seq_ops.stop = udp_seq_stop;
1596
1597 rc = seq_open(file, &s->seq_ops);
1598 if (rc)
1599 goto out_kfree;
1600
1601 seq = file->private_data;
1602 seq->private = s;
1603out:
1604 return rc;
1605out_kfree:
1606 kfree(s);
1607 goto out;
1608} 1591}
1609 1592
1610/* ------------------------------------------------------------------------ */ 1593/* ------------------------------------------------------------------------ */
1611int udp_proc_register(struct udp_seq_afinfo *afinfo) 1594int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
1612{ 1595{
1613 struct proc_dir_entry *p; 1596 struct proc_dir_entry *p;
1614 int rc = 0; 1597 int rc = 0;
1615 1598
1616 if (!afinfo) 1599 afinfo->seq_fops.open = udp_seq_open;
1617 return -EINVAL; 1600 afinfo->seq_fops.read = seq_read;
1618 afinfo->seq_fops->owner = afinfo->owner; 1601 afinfo->seq_fops.llseek = seq_lseek;
1619 afinfo->seq_fops->open = udp_seq_open; 1602 afinfo->seq_fops.release = seq_release_net;
1620 afinfo->seq_fops->read = seq_read; 1603
1621 afinfo->seq_fops->llseek = seq_lseek; 1604 afinfo->seq_ops.start = udp_seq_start;
1622 afinfo->seq_fops->release = seq_release_private; 1605 afinfo->seq_ops.next = udp_seq_next;
1606 afinfo->seq_ops.stop = udp_seq_stop;
1623 1607
1624 p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); 1608 p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops);
1625 if (p) 1609 if (p)
1626 p->data = afinfo; 1610 p->data = afinfo;
1627 else 1611 else
@@ -1629,12 +1613,9 @@ int udp_proc_register(struct udp_seq_afinfo *afinfo)
1629 return rc; 1613 return rc;
1630} 1614}
1631 1615
1632void udp_proc_unregister(struct udp_seq_afinfo *afinfo) 1616void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
1633{ 1617{
1634 if (!afinfo) 1618 proc_net_remove(net, afinfo->name);
1635 return;
1636 proc_net_remove(&init_net, afinfo->name);
1637 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
1638} 1619}
1639 1620
1640/* ------------------------------------------------------------------------ */ 1621/* ------------------------------------------------------------------------ */
@@ -1673,24 +1654,41 @@ int udp4_seq_show(struct seq_file *seq, void *v)
1673} 1654}
1674 1655
1675/* ------------------------------------------------------------------------ */ 1656/* ------------------------------------------------------------------------ */
1676static struct file_operations udp4_seq_fops;
1677static struct udp_seq_afinfo udp4_seq_afinfo = { 1657static struct udp_seq_afinfo udp4_seq_afinfo = {
1678 .owner = THIS_MODULE,
1679 .name = "udp", 1658 .name = "udp",
1680 .family = AF_INET, 1659 .family = AF_INET,
1681 .hashtable = udp_hash, 1660 .hashtable = udp_hash,
1682 .seq_show = udp4_seq_show, 1661 .seq_fops = {
1683 .seq_fops = &udp4_seq_fops, 1662 .owner = THIS_MODULE,
1663 },
1664 .seq_ops = {
1665 .show = udp4_seq_show,
1666 },
1667};
1668
1669static int udp4_proc_init_net(struct net *net)
1670{
1671 return udp_proc_register(net, &udp4_seq_afinfo);
1672}
1673
1674static void udp4_proc_exit_net(struct net *net)
1675{
1676 udp_proc_unregister(net, &udp4_seq_afinfo);
1677}
1678
1679static struct pernet_operations udp4_net_ops = {
1680 .init = udp4_proc_init_net,
1681 .exit = udp4_proc_exit_net,
1684}; 1682};
1685 1683
1686int __init udp4_proc_init(void) 1684int __init udp4_proc_init(void)
1687{ 1685{
1688 return udp_proc_register(&udp4_seq_afinfo); 1686 return register_pernet_subsys(&udp4_net_ops);
1689} 1687}
1690 1688
1691void udp4_proc_exit(void) 1689void udp4_proc_exit(void)
1692{ 1690{
1693 udp_proc_unregister(&udp4_seq_afinfo); 1691 unregister_pernet_subsys(&udp4_net_ops);
1694} 1692}
1695#endif /* CONFIG_PROC_FS */ 1693#endif /* CONFIG_PROC_FS */
1696 1694
@@ -1717,12 +1715,12 @@ EXPORT_SYMBOL(udp_disconnect);
1717EXPORT_SYMBOL(udp_hash); 1715EXPORT_SYMBOL(udp_hash);
1718EXPORT_SYMBOL(udp_hash_lock); 1716EXPORT_SYMBOL(udp_hash_lock);
1719EXPORT_SYMBOL(udp_ioctl); 1717EXPORT_SYMBOL(udp_ioctl);
1720EXPORT_SYMBOL(udp_get_port);
1721EXPORT_SYMBOL(udp_prot); 1718EXPORT_SYMBOL(udp_prot);
1722EXPORT_SYMBOL(udp_sendmsg); 1719EXPORT_SYMBOL(udp_sendmsg);
1723EXPORT_SYMBOL(udp_lib_getsockopt); 1720EXPORT_SYMBOL(udp_lib_getsockopt);
1724EXPORT_SYMBOL(udp_lib_setsockopt); 1721EXPORT_SYMBOL(udp_lib_setsockopt);
1725EXPORT_SYMBOL(udp_poll); 1722EXPORT_SYMBOL(udp_poll);
1723EXPORT_SYMBOL(udp_lib_get_port);
1726 1724
1727#ifdef CONFIG_PROC_FS 1725#ifdef CONFIG_PROC_FS
1728EXPORT_SYMBOL(udp_proc_register); 1726EXPORT_SYMBOL(udp_proc_register);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 6c55828e41ba..7288bf7977fb 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -8,11 +8,7 @@
8extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); 8extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int );
9extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); 9extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []);
10 10
11extern int __udp_lib_get_port(struct sock *sk, unsigned short snum, 11extern int udp_v4_get_port(struct sock *sk, unsigned short snum);
12 struct hlist_head udptable[],
13 int (*)(const struct sock*,const struct sock*));
14extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
15
16 12
17extern int udp_setsockopt(struct sock *sk, int level, int optname, 13extern int udp_setsockopt(struct sock *sk, int level, int optname,
18 char __user *optval, int optlen); 14 char __user *optval, int optlen);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 001b881ca36f..72ce26b6c4d3 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -17,17 +17,6 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly;
17 17
18struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; 18struct hlist_head udplite_hash[UDP_HTABLE_SIZE];
19 19
20int udplite_get_port(struct sock *sk, unsigned short p,
21 int (*c)(const struct sock *, const struct sock *))
22{
23 return __udp_lib_get_port(sk, p, udplite_hash, c);
24}
25
26static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
27{
28 return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal);
29}
30
31static int udplite_rcv(struct sk_buff *skb) 20static int udplite_rcv(struct sk_buff *skb)
32{ 21{
33 return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); 22 return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
@@ -42,10 +31,9 @@ static struct net_protocol udplite_protocol = {
42 .handler = udplite_rcv, 31 .handler = udplite_rcv,
43 .err_handler = udplite_err, 32 .err_handler = udplite_err,
44 .no_policy = 1, 33 .no_policy = 1,
34 .netns_ok = 1,
45}; 35};
46 36
47DEFINE_PROTO_INUSE(udplite)
48
49struct proto udplite_prot = { 37struct proto udplite_prot = {
50 .name = "UDP-Lite", 38 .name = "UDP-Lite",
51 .owner = THIS_MODULE, 39 .owner = THIS_MODULE,
@@ -63,13 +51,13 @@ struct proto udplite_prot = {
63 .backlog_rcv = udp_queue_rcv_skb, 51 .backlog_rcv = udp_queue_rcv_skb,
64 .hash = udp_lib_hash, 52 .hash = udp_lib_hash,
65 .unhash = udp_lib_unhash, 53 .unhash = udp_lib_unhash,
66 .get_port = udplite_v4_get_port, 54 .get_port = udp_v4_get_port,
67 .obj_size = sizeof(struct udp_sock), 55 .obj_size = sizeof(struct udp_sock),
56 .h.udp_hash = udplite_hash,
68#ifdef CONFIG_COMPAT 57#ifdef CONFIG_COMPAT
69 .compat_setsockopt = compat_udp_setsockopt, 58 .compat_setsockopt = compat_udp_setsockopt,
70 .compat_getsockopt = compat_udp_getsockopt, 59 .compat_getsockopt = compat_udp_getsockopt,
71#endif 60#endif
72 REF_PROTO_INUSE(udplite)
73}; 61};
74 62
75static struct inet_protosw udplite4_protosw = { 63static struct inet_protosw udplite4_protosw = {
@@ -83,15 +71,42 @@ static struct inet_protosw udplite4_protosw = {
83}; 71};
84 72
85#ifdef CONFIG_PROC_FS 73#ifdef CONFIG_PROC_FS
86static struct file_operations udplite4_seq_fops;
87static struct udp_seq_afinfo udplite4_seq_afinfo = { 74static struct udp_seq_afinfo udplite4_seq_afinfo = {
88 .owner = THIS_MODULE,
89 .name = "udplite", 75 .name = "udplite",
90 .family = AF_INET, 76 .family = AF_INET,
91 .hashtable = udplite_hash, 77 .hashtable = udplite_hash,
92 .seq_show = udp4_seq_show, 78 .seq_fops = {
93 .seq_fops = &udplite4_seq_fops, 79 .owner = THIS_MODULE,
80 },
81 .seq_ops = {
82 .show = udp4_seq_show,
83 },
84};
85
86static int udplite4_proc_init_net(struct net *net)
87{
88 return udp_proc_register(net, &udplite4_seq_afinfo);
89}
90
91static void udplite4_proc_exit_net(struct net *net)
92{
93 udp_proc_unregister(net, &udplite4_seq_afinfo);
94}
95
96static struct pernet_operations udplite4_net_ops = {
97 .init = udplite4_proc_init_net,
98 .exit = udplite4_proc_exit_net,
94}; 99};
100
101static __init int udplite4_proc_init(void)
102{
103 return register_pernet_subsys(&udplite4_net_ops);
104}
105#else
106static inline int udplite4_proc_init(void)
107{
108 return 0;
109}
95#endif 110#endif
96 111
97void __init udplite4_register(void) 112void __init udplite4_register(void)
@@ -104,18 +119,15 @@ void __init udplite4_register(void)
104 119
105 inet_register_protosw(&udplite4_protosw); 120 inet_register_protosw(&udplite4_protosw);
106 121
107#ifdef CONFIG_PROC_FS 122 if (udplite4_proc_init())
108 if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */ 123 printk(KERN_ERR "%s: Cannot register /proc!\n", __func__);
109 printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__);
110#endif
111 return; 124 return;
112 125
113out_unregister_proto: 126out_unregister_proto:
114 proto_unregister(&udplite_prot); 127 proto_unregister(&udplite_prot);
115out_register_err: 128out_register_err:
116 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __FUNCTION__); 129 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__);
117} 130}
118 131
119EXPORT_SYMBOL(udplite_hash); 132EXPORT_SYMBOL(udplite_hash);
120EXPORT_SYMBOL(udplite_prot); 133EXPORT_SYMBOL(udplite_prot);
121EXPORT_SYMBOL(udplite_get_port);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 10ed70491434..c63de0a72aba 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -221,7 +221,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
221 xdst = (struct xfrm_dst *)dst; 221 xdst = (struct xfrm_dst *)dst;
222 if (xdst->u.rt.idev->dev == dev) { 222 if (xdst->u.rt.idev->dev == dev) {
223 struct in_device *loopback_idev = 223 struct in_device *loopback_idev =
224 in_dev_get(dev->nd_net->loopback_dev); 224 in_dev_get(dev_net(dev)->loopback_dev);
225 BUG_ON(!loopback_idev); 225 BUG_ON(!loopback_idev);
226 226
227 do { 227 do {