aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-18 21:02:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-18 21:02:35 -0400
commit334d094504c2fe1c44211ecb49146ae6bca8c321 (patch)
treed3c0f68e4b9f8e3d2ccc39e7dfe5de0534a5fad9 /net/ipv4
parentd1a4be630fb068f251d64b62919f143c49ca8057 (diff)
parentd1643d24c61b725bef399cc1cf2944b4c9c23177 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.26
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.26: (1090 commits) [NET]: Fix and allocate less memory for ->priv'less netdevices [IPV6]: Fix dangling references on error in fib6_add(). [NETLABEL]: Fix NULL deref in netlbl_unlabel_staticlist_gen() if ifindex not found [PKT_SCHED]: Fix datalen check in tcf_simp_init(). [INET]: Uninline the __inet_inherit_port call. [INET]: Drop the inet_inherit_port() call. SCTP: Initialize partial_bytes_acked to 0, when all of the data is acked. [netdrvr] forcedeth: internal simplifications; changelog removal phylib: factor out get_phy_id from within get_phy_device PHY: add BCM5464 support to broadcom PHY driver cxgb3: Fix __must_check warning with dev_dbg. tc35815: Statistics cleanup natsemi: fix MMIO for PPC 44x platforms [TIPC]: Cleanup of TIPC reference table code [TIPC]: Optimized initialization of TIPC reference table [TIPC]: Remove inlining of reference table locking routines e1000: convert uint16_t style integers to u16 ixgb: convert uint16_t style integers to u16 sb1000.c: make const arrays static sb1000.c: stop inlining largish static functions ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c70
-rw-r--r--net/ipv4/arp.c62
-rw-r--r--net/ipv4/cipso_ipv4.c1
-rw-r--r--net/ipv4/devinet.c35
-rw-r--r--net/ipv4/fib_frontend.c20
-rw-r--r--net/ipv4/fib_hash.c5
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/fib_trie.c240
-rw-r--r--net/ipv4/icmp.c203
-rw-r--r--net/ipv4/igmp.c45
-rw-r--r--net/ipv4/inet_connection_sock.c45
-rw-r--r--net/ipv4/inet_fragment.c10
-rw-r--r--net/ipv4/inet_hashtables.c43
-rw-r--r--net/ipv4/inet_timewait_sock.c5
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c26
-rw-r--r--net/ipv4/ip_gre.c228
-rw-r--r--net/ipv4/ip_input.c21
-rw-r--r--net/ipv4/ip_options.c63
-rw-r--r--net/ipv4/ip_output.c28
-rw-r--r--net/ipv4/ip_sockglue.c13
-rw-r--r--net/ipv4/ipcomp.c2
-rw-r--r--net/ipv4/ipconfig.c39
-rw-r--r--net/ipv4/ipip.c220
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c4
-rw-r--r--net/ipv4/netfilter.c37
-rw-r--r--net/ipv4/netfilter/Kconfig15
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/arp_tables.c89
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c12
-rw-r--r--net/ipv4/netfilter/arptable_filter.c7
-rw-r--r--net/ipv4/netfilter/ip_queue.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c53
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c14
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c2
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c9
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c18
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c6
-rw-r--r--net/ipv4/netfilter/ipt_recent.c6
-rw-r--r--net/ipv4/netfilter/iptable_filter.c21
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c51
-rw-r--r--net/ipv4/netfilter/iptable_raw.c8
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c70
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c15
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c27
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c61
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c120
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_dccp.c108
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c45
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c19
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c96
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c80
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c77
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udplite.c99
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c25
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c25
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c556
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c29
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c76
-rw-r--r--net/ipv4/proc.c71
-rw-r--r--net/ipv4/raw.c47
-rw-r--r--net/ipv4/route.c278
-rw-r--r--net/ipv4/syncookies.c102
-rw-r--r--net/ipv4/sysctl_net_ipv4.c162
-rw-r--r--net/ipv4/tcp.c18
-rw-r--r--net/ipv4/tcp_cubic.c35
-rw-r--r--net/ipv4/tcp_input.c76
-rw-r--r--net/ipv4/tcp_ipv4.c193
-rw-r--r--net/ipv4/tcp_minisocks.c36
-rw-r--r--net/ipv4/tcp_output.c13
-rw-r--r--net/ipv4/tcp_probe.c2
-rw-r--r--net/ipv4/tcp_timer.c19
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c138
-rw-r--r--net/ipv4/udp_impl.h6
-rw-r--r--net/ipv4/udplite.c62
-rw-r--r--net/ipv4/xfrm4_policy.c2
83 files changed, 2819 insertions, 1784 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0d109504ed86..f2b5270efdaa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -243,6 +243,23 @@ void build_ehash_secret(void)
243} 243}
244EXPORT_SYMBOL(build_ehash_secret); 244EXPORT_SYMBOL(build_ehash_secret);
245 245
246static inline int inet_netns_ok(struct net *net, int protocol)
247{
248 int hash;
249 struct net_protocol *ipprot;
250
251 if (net == &init_net)
252 return 1;
253
254 hash = protocol & (MAX_INET_PROTOS - 1);
255 ipprot = rcu_dereference(inet_protos[hash]);
256
257 if (ipprot == NULL)
258 /* raw IP is OK */
259 return 1;
260 return ipprot->netns_ok;
261}
262
246/* 263/*
247 * Create an inet socket. 264 * Create an inet socket.
248 */ 265 */
@@ -259,9 +276,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol)
259 int try_loading_module = 0; 276 int try_loading_module = 0;
260 int err; 277 int err;
261 278
262 if (net != &init_net)
263 return -EAFNOSUPPORT;
264
265 if (sock->type != SOCK_RAW && 279 if (sock->type != SOCK_RAW &&
266 sock->type != SOCK_DGRAM && 280 sock->type != SOCK_DGRAM &&
267 !inet_ehash_secret) 281 !inet_ehash_secret)
@@ -320,6 +334,10 @@ lookup_protocol:
320 if (answer->capability > 0 && !capable(answer->capability)) 334 if (answer->capability > 0 && !capable(answer->capability))
321 goto out_rcu_unlock; 335 goto out_rcu_unlock;
322 336
337 err = -EAFNOSUPPORT;
338 if (!inet_netns_ok(net, protocol))
339 goto out_rcu_unlock;
340
323 sock->ops = answer->ops; 341 sock->ops = answer->ops;
324 answer_prot = answer->prot; 342 answer_prot = answer->prot;
325 answer_no_check = answer->no_check; 343 answer_no_check = answer->no_check;
@@ -446,7 +464,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
446 if (addr_len < sizeof(struct sockaddr_in)) 464 if (addr_len < sizeof(struct sockaddr_in))
447 goto out; 465 goto out;
448 466
449 chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr); 467 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
450 468
451 /* Not specified by any standard per-se, however it breaks too 469 /* Not specified by any standard per-se, however it breaks too
452 * many applications when removed. It is unfortunate since 470 * many applications when removed. It is unfortunate since
@@ -784,6 +802,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
784{ 802{
785 struct sock *sk = sock->sk; 803 struct sock *sk = sock->sk;
786 int err = 0; 804 int err = 0;
805 struct net *net = sock_net(sk);
787 806
788 switch (cmd) { 807 switch (cmd) {
789 case SIOCGSTAMP: 808 case SIOCGSTAMP:
@@ -795,12 +814,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
795 case SIOCADDRT: 814 case SIOCADDRT:
796 case SIOCDELRT: 815 case SIOCDELRT:
797 case SIOCRTMSG: 816 case SIOCRTMSG:
798 err = ip_rt_ioctl(sk->sk_net, cmd, (void __user *)arg); 817 err = ip_rt_ioctl(net, cmd, (void __user *)arg);
799 break; 818 break;
800 case SIOCDARP: 819 case SIOCDARP:
801 case SIOCGARP: 820 case SIOCGARP:
802 case SIOCSARP: 821 case SIOCSARP:
803 err = arp_ioctl(sk->sk_net, cmd, (void __user *)arg); 822 err = arp_ioctl(net, cmd, (void __user *)arg);
804 break; 823 break;
805 case SIOCGIFADDR: 824 case SIOCGIFADDR:
806 case SIOCSIFADDR: 825 case SIOCSIFADDR:
@@ -813,7 +832,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
813 case SIOCSIFPFLAGS: 832 case SIOCSIFPFLAGS:
814 case SIOCGIFPFLAGS: 833 case SIOCGIFPFLAGS:
815 case SIOCSIFFLAGS: 834 case SIOCSIFFLAGS:
816 err = devinet_ioctl(cmd, (void __user *)arg); 835 err = devinet_ioctl(net, cmd, (void __user *)arg);
817 break; 836 break;
818 default: 837 default:
819 if (sk->sk_prot->ioctl) 838 if (sk->sk_prot->ioctl)
@@ -1058,8 +1077,8 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1058 1077
1059 if (sysctl_ip_dynaddr > 1) { 1078 if (sysctl_ip_dynaddr > 1) {
1060 printk(KERN_INFO "%s(): shifting inet->" 1079 printk(KERN_INFO "%s(): shifting inet->"
1061 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", 1080 "saddr from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n",
1062 __FUNCTION__, 1081 __func__,
1063 NIPQUAD(old_saddr), 1082 NIPQUAD(old_saddr),
1064 NIPQUAD(new_saddr)); 1083 NIPQUAD(new_saddr));
1065 } 1084 }
@@ -1113,7 +1132,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1113 }; 1132 };
1114 1133
1115 security_sk_classify_flow(sk, &fl); 1134 security_sk_classify_flow(sk, &fl);
1116 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 0); 1135 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
1117} 1136}
1118 if (!err) 1137 if (!err)
1119 sk_setup_caps(sk, &rt->u.dst); 1138 sk_setup_caps(sk, &rt->u.dst);
@@ -1231,6 +1250,29 @@ out:
1231 return segs; 1250 return segs;
1232} 1251}
1233 1252
1253int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1254 unsigned short type, unsigned char protocol,
1255 struct net *net)
1256{
1257 struct socket *sock;
1258 int rc = sock_create_kern(family, type, protocol, &sock);
1259
1260 if (rc == 0) {
1261 *sk = sock->sk;
1262 (*sk)->sk_allocation = GFP_ATOMIC;
1263 /*
1264 * Unhash it so that IP input processing does not even see it,
1265 * we do not wish this socket to see incoming packets.
1266 */
1267 (*sk)->sk_prot->unhash(*sk);
1268
1269 sk_change_net(*sk, net);
1270 }
1271 return rc;
1272}
1273
1274EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1275
1234unsigned long snmp_fold_field(void *mib[], int offt) 1276unsigned long snmp_fold_field(void *mib[], int offt)
1235{ 1277{
1236 unsigned long res = 0; 1278 unsigned long res = 0;
@@ -1283,17 +1325,20 @@ static struct net_protocol tcp_protocol = {
1283 .gso_send_check = tcp_v4_gso_send_check, 1325 .gso_send_check = tcp_v4_gso_send_check,
1284 .gso_segment = tcp_tso_segment, 1326 .gso_segment = tcp_tso_segment,
1285 .no_policy = 1, 1327 .no_policy = 1,
1328 .netns_ok = 1,
1286}; 1329};
1287 1330
1288static struct net_protocol udp_protocol = { 1331static struct net_protocol udp_protocol = {
1289 .handler = udp_rcv, 1332 .handler = udp_rcv,
1290 .err_handler = udp_err, 1333 .err_handler = udp_err,
1291 .no_policy = 1, 1334 .no_policy = 1,
1335 .netns_ok = 1,
1292}; 1336};
1293 1337
1294static struct net_protocol icmp_protocol = { 1338static struct net_protocol icmp_protocol = {
1295 .handler = icmp_rcv, 1339 .handler = icmp_rcv,
1296 .no_policy = 1, 1340 .no_policy = 1,
1341 .netns_ok = 1,
1297}; 1342};
1298 1343
1299static int __init init_ipv4_mibs(void) 1344static int __init init_ipv4_mibs(void)
@@ -1414,7 +1459,7 @@ static int __init inet_init(void)
1414 1459
1415 ip_init(); 1460 ip_init();
1416 1461
1417 tcp_v4_init(&inet_family_ops); 1462 tcp_v4_init();
1418 1463
1419 /* Setup TCP slab cache for open requests. */ 1464 /* Setup TCP slab cache for open requests. */
1420 tcp_init(); 1465 tcp_init();
@@ -1429,7 +1474,8 @@ static int __init inet_init(void)
1429 * Set the ICMP layer up 1474 * Set the ICMP layer up
1430 */ 1475 */
1431 1476
1432 icmp_init(&inet_family_ops); 1477 if (icmp_init() < 0)
1478 panic("Failed to create the ICMP control socket.\n");
1433 1479
1434 /* 1480 /*
1435 * Initialise the multicast router 1481 * Initialise the multicast router
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8e17f65f4002..68b72a7a1806 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -242,7 +242,7 @@ static int arp_constructor(struct neighbour *neigh)
242 return -EINVAL; 242 return -EINVAL;
243 } 243 }
244 244
245 neigh->type = inet_addr_type(&init_net, addr); 245 neigh->type = inet_addr_type(dev_net(dev), addr);
246 246
247 parms = in_dev->arp_parms; 247 parms = in_dev->arp_parms;
248 __neigh_parms_put(neigh->parms); 248 __neigh_parms_put(neigh->parms);
@@ -341,14 +341,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
341 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 341 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
342 default: 342 default:
343 case 0: /* By default announce any local IP */ 343 case 0: /* By default announce any local IP */
344 if (skb && inet_addr_type(&init_net, ip_hdr(skb)->saddr) == RTN_LOCAL) 344 if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL)
345 saddr = ip_hdr(skb)->saddr; 345 saddr = ip_hdr(skb)->saddr;
346 break; 346 break;
347 case 1: /* Restrict announcements of saddr in same subnet */ 347 case 1: /* Restrict announcements of saddr in same subnet */
348 if (!skb) 348 if (!skb)
349 break; 349 break;
350 saddr = ip_hdr(skb)->saddr; 350 saddr = ip_hdr(skb)->saddr;
351 if (inet_addr_type(&init_net, saddr) == RTN_LOCAL) { 351 if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) {
352 /* saddr should be known to target */ 352 /* saddr should be known to target */
353 if (inet_addr_onlink(in_dev, target, saddr)) 353 if (inet_addr_onlink(in_dev, target, saddr))
354 break; 354 break;
@@ -424,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
424 int flag = 0; 424 int flag = 0;
425 /*unsigned long now; */ 425 /*unsigned long now; */
426 426
427 if (ip_route_output_key(&init_net, &rt, &fl) < 0) 427 if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0)
428 return 1; 428 return 1;
429 if (rt->u.dst.dev != dev) { 429 if (rt->u.dst.dev != dev) {
430 NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); 430 NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
@@ -475,9 +475,9 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
475 return 1; 475 return 1;
476 } 476 }
477 477
478 paddr = ((struct rtable*)skb->dst)->rt_gateway; 478 paddr = skb->rtable->rt_gateway;
479 479
480 if (arp_set_predefined(inet_addr_type(&init_net, paddr), haddr, paddr, dev)) 480 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev))
481 return 0; 481 return 0;
482 482
483 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); 483 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
@@ -570,14 +570,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
570 * Allocate a buffer 570 * Allocate a buffer
571 */ 571 */
572 572
573 skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) 573 skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
574 + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
575 if (skb == NULL) 574 if (skb == NULL)
576 return NULL; 575 return NULL;
577 576
578 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 577 skb_reserve(skb, LL_RESERVED_SPACE(dev));
579 skb_reset_network_header(skb); 578 skb_reset_network_header(skb);
580 arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); 579 arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev));
581 skb->dev = dev; 580 skb->dev = dev;
582 skb->protocol = htons(ETH_P_ARP); 581 skb->protocol = htons(ETH_P_ARP);
583 if (src_hw == NULL) 582 if (src_hw == NULL)
@@ -710,6 +709,7 @@ static int arp_process(struct sk_buff *skb)
710 u16 dev_type = dev->type; 709 u16 dev_type = dev->type;
711 int addr_type; 710 int addr_type;
712 struct neighbour *n; 711 struct neighbour *n;
712 struct net *net = dev_net(dev);
713 713
714 /* arp_rcv below verifies the ARP header and verifies the device 714 /* arp_rcv below verifies the ARP header and verifies the device
715 * is ARP'able. 715 * is ARP'able.
@@ -805,7 +805,7 @@ static int arp_process(struct sk_buff *skb)
805 /* Special case: IPv4 duplicate address detection packet (RFC2131) */ 805 /* Special case: IPv4 duplicate address detection packet (RFC2131) */
806 if (sip == 0) { 806 if (sip == 0) {
807 if (arp->ar_op == htons(ARPOP_REQUEST) && 807 if (arp->ar_op == htons(ARPOP_REQUEST) &&
808 inet_addr_type(&init_net, tip) == RTN_LOCAL && 808 inet_addr_type(net, tip) == RTN_LOCAL &&
809 !arp_ignore(in_dev, sip, tip)) 809 !arp_ignore(in_dev, sip, tip))
810 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, 810 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
811 dev->dev_addr, sha); 811 dev->dev_addr, sha);
@@ -815,7 +815,7 @@ static int arp_process(struct sk_buff *skb)
815 if (arp->ar_op == htons(ARPOP_REQUEST) && 815 if (arp->ar_op == htons(ARPOP_REQUEST) &&
816 ip_route_input(skb, tip, sip, 0, dev) == 0) { 816 ip_route_input(skb, tip, sip, 0, dev) == 0) {
817 817
818 rt = (struct rtable*)skb->dst; 818 rt = skb->rtable;
819 addr_type = rt->rt_type; 819 addr_type = rt->rt_type;
820 820
821 if (addr_type == RTN_LOCAL) { 821 if (addr_type == RTN_LOCAL) {
@@ -835,7 +835,7 @@ static int arp_process(struct sk_buff *skb)
835 goto out; 835 goto out;
836 } else if (IN_DEV_FORWARD(in_dev)) { 836 } else if (IN_DEV_FORWARD(in_dev)) {
837 if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && 837 if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
838 (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0))) { 838 (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
839 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 839 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
840 if (n) 840 if (n)
841 neigh_release(n); 841 neigh_release(n);
@@ -858,14 +858,14 @@ static int arp_process(struct sk_buff *skb)
858 858
859 n = __neigh_lookup(&arp_tbl, &sip, dev, 0); 859 n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
860 860
861 if (IPV4_DEVCONF_ALL(dev->nd_net, ARP_ACCEPT)) { 861 if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) {
862 /* Unsolicited ARP is not accepted by default. 862 /* Unsolicited ARP is not accepted by default.
863 It is possible, that this option should be enabled for some 863 It is possible, that this option should be enabled for some
864 devices (strip is candidate) 864 devices (strip is candidate)
865 */ 865 */
866 if (n == NULL && 866 if (n == NULL &&
867 arp->ar_op == htons(ARPOP_REPLY) && 867 arp->ar_op == htons(ARPOP_REPLY) &&
868 inet_addr_type(&init_net, sip) == RTN_UNICAST) 868 inet_addr_type(net, sip) == RTN_UNICAST)
869 n = __neigh_lookup(&arp_tbl, &sip, dev, 1); 869 n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
870 } 870 }
871 871
@@ -912,13 +912,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
912{ 912{
913 struct arphdr *arp; 913 struct arphdr *arp;
914 914
915 if (dev->nd_net != &init_net)
916 goto freeskb;
917
918 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ 915 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
919 if (!pskb_may_pull(skb, (sizeof(struct arphdr) + 916 if (!pskb_may_pull(skb, arp_hdr_len(dev)))
920 (2 * dev->addr_len) +
921 (2 * sizeof(u32)))))
922 goto freeskb; 917 goto freeskb;
923 918
924 arp = arp_hdr(skb); 919 arp = arp_hdr(skb);
@@ -1201,9 +1196,6 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo
1201{ 1196{
1202 struct net_device *dev = ptr; 1197 struct net_device *dev = ptr;
1203 1198
1204 if (dev->nd_net != &init_net)
1205 return NOTIFY_DONE;
1206
1207 switch (event) { 1199 switch (event) {
1208 case NETDEV_CHANGEADDR: 1200 case NETDEV_CHANGEADDR:
1209 neigh_changeaddr(&arp_tbl, dev); 1201 neigh_changeaddr(&arp_tbl, dev);
@@ -1318,7 +1310,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
1318#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) 1310#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
1319 } 1311 }
1320#endif 1312#endif
1321 sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->primary_key)); 1313 sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->primary_key));
1322 seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", 1314 seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
1323 tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); 1315 tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name);
1324 read_unlock(&n->lock); 1316 read_unlock(&n->lock);
@@ -1331,7 +1323,7 @@ static void arp_format_pneigh_entry(struct seq_file *seq,
1331 int hatype = dev ? dev->type : 0; 1323 int hatype = dev ? dev->type : 0;
1332 char tbuf[16]; 1324 char tbuf[16];
1333 1325
1334 sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->key)); 1326 sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->key));
1335 seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", 1327 seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
1336 tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00", 1328 tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
1337 dev ? dev->name : "*"); 1329 dev ? dev->name : "*");
@@ -1385,13 +1377,29 @@ static const struct file_operations arp_seq_fops = {
1385 .release = seq_release_net, 1377 .release = seq_release_net,
1386}; 1378};
1387 1379
1388static int __init arp_proc_init(void) 1380
1381static int __net_init arp_net_init(struct net *net)
1389{ 1382{
1390 if (!proc_net_fops_create(&init_net, "arp", S_IRUGO, &arp_seq_fops)) 1383 if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops))
1391 return -ENOMEM; 1384 return -ENOMEM;
1392 return 0; 1385 return 0;
1393} 1386}
1394 1387
1388static void __net_exit arp_net_exit(struct net *net)
1389{
1390 proc_net_remove(net, "arp");
1391}
1392
1393static struct pernet_operations arp_net_ops = {
1394 .init = arp_net_init,
1395 .exit = arp_net_exit,
1396};
1397
1398static int __init arp_proc_init(void)
1399{
1400 return register_pernet_subsys(&arp_net_ops);
1401}
1402
1395#else /* CONFIG_PROC_FS */ 1403#else /* CONFIG_PROC_FS */
1396 1404
1397static int __init arp_proc_init(void) 1405static int __init arp_proc_init(void)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 8cd357f41283..4637ded3dba8 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1800,7 +1800,6 @@ int cipso_v4_sock_setattr(struct sock *sk,
1800 } 1800 }
1801 memcpy(opt->__data, buf, buf_len); 1801 memcpy(opt->__data, buf, buf_len);
1802 opt->optlen = opt_len; 1802 opt->optlen = opt_len;
1803 opt->is_data = 1;
1804 opt->cipso = sizeof(struct iphdr); 1803 opt->cipso = sizeof(struct iphdr);
1805 kfree(buf); 1804 kfree(buf);
1806 buf = NULL; 1805 buf = NULL;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 87490f7bb0f7..6848e4760f34 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -165,7 +165,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
165 if (!in_dev) 165 if (!in_dev)
166 goto out; 166 goto out;
167 INIT_RCU_HEAD(&in_dev->rcu_head); 167 INIT_RCU_HEAD(&in_dev->rcu_head);
168 memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt, 168 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
169 sizeof(in_dev->cnf)); 169 sizeof(in_dev->cnf));
170 in_dev->cnf.sysctl = NULL; 170 in_dev->cnf.sysctl = NULL;
171 in_dev->dev = dev; 171 in_dev->dev = dev;
@@ -437,7 +437,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
437 437
438static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 438static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439{ 439{
440 struct net *net = skb->sk->sk_net; 440 struct net *net = sock_net(skb->sk);
441 struct nlattr *tb[IFA_MAX+1]; 441 struct nlattr *tb[IFA_MAX+1];
442 struct in_device *in_dev; 442 struct in_device *in_dev;
443 struct ifaddrmsg *ifm; 443 struct ifaddrmsg *ifm;
@@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
446 446
447 ASSERT_RTNL(); 447 ASSERT_RTNL();
448 448
449 if (net != &init_net)
450 return -EINVAL;
451
452 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 449 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 if (err < 0) 450 if (err < 0)
454 goto errout; 451 goto errout;
@@ -555,14 +552,11 @@ errout:
555 552
556static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 553static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
557{ 554{
558 struct net *net = skb->sk->sk_net; 555 struct net *net = sock_net(skb->sk);
559 struct in_ifaddr *ifa; 556 struct in_ifaddr *ifa;
560 557
561 ASSERT_RTNL(); 558 ASSERT_RTNL();
562 559
563 if (net != &init_net)
564 return -EINVAL;
565
566 ifa = rtm_to_ifaddr(net, nlh); 560 ifa = rtm_to_ifaddr(net, nlh);
567 if (IS_ERR(ifa)) 561 if (IS_ERR(ifa))
568 return PTR_ERR(ifa); 562 return PTR_ERR(ifa);
@@ -595,7 +589,7 @@ static __inline__ int inet_abc_len(__be32 addr)
595} 589}
596 590
597 591
598int devinet_ioctl(unsigned int cmd, void __user *arg) 592int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
599{ 593{
600 struct ifreq ifr; 594 struct ifreq ifr;
601 struct sockaddr_in sin_orig; 595 struct sockaddr_in sin_orig;
@@ -624,7 +618,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
624 *colon = 0; 618 *colon = 0;
625 619
626#ifdef CONFIG_KMOD 620#ifdef CONFIG_KMOD
627 dev_load(&init_net, ifr.ifr_name); 621 dev_load(net, ifr.ifr_name);
628#endif 622#endif
629 623
630 switch (cmd) { 624 switch (cmd) {
@@ -665,7 +659,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
665 rtnl_lock(); 659 rtnl_lock();
666 660
667 ret = -ENODEV; 661 ret = -ENODEV;
668 if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) 662 if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
669 goto done; 663 goto done;
670 664
671 if (colon) 665 if (colon)
@@ -878,6 +872,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
878{ 872{
879 __be32 addr = 0; 873 __be32 addr = 0;
880 struct in_device *in_dev; 874 struct in_device *in_dev;
875 struct net *net = dev_net(dev);
881 876
882 rcu_read_lock(); 877 rcu_read_lock();
883 in_dev = __in_dev_get_rcu(dev); 878 in_dev = __in_dev_get_rcu(dev);
@@ -906,7 +901,7 @@ no_in_dev:
906 */ 901 */
907 read_lock(&dev_base_lock); 902 read_lock(&dev_base_lock);
908 rcu_read_lock(); 903 rcu_read_lock();
909 for_each_netdev(&init_net, dev) { 904 for_each_netdev(net, dev) {
910 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) 905 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
911 continue; 906 continue;
912 907
@@ -979,7 +974,7 @@ __be32 inet_confirm_addr(struct in_device *in_dev,
979 if (scope != RT_SCOPE_LINK) 974 if (scope != RT_SCOPE_LINK)
980 return confirm_addr_indev(in_dev, dst, local, scope); 975 return confirm_addr_indev(in_dev, dst, local, scope);
981 976
982 net = in_dev->dev->nd_net; 977 net = dev_net(in_dev->dev);
983 read_lock(&dev_base_lock); 978 read_lock(&dev_base_lock);
984 rcu_read_lock(); 979 rcu_read_lock();
985 for_each_netdev(net, dev) { 980 for_each_netdev(net, dev) {
@@ -1045,9 +1040,6 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1045 struct net_device *dev = ptr; 1040 struct net_device *dev = ptr;
1046 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1041 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1047 1042
1048 if (dev->nd_net != &init_net)
1049 return NOTIFY_DONE;
1050
1051 ASSERT_RTNL(); 1043 ASSERT_RTNL();
1052 1044
1053 if (!in_dev) { 1045 if (!in_dev) {
@@ -1166,16 +1158,13 @@ nla_put_failure:
1166 1158
1167static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1159static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1168{ 1160{
1169 struct net *net = skb->sk->sk_net; 1161 struct net *net = sock_net(skb->sk);
1170 int idx, ip_idx; 1162 int idx, ip_idx;
1171 struct net_device *dev; 1163 struct net_device *dev;
1172 struct in_device *in_dev; 1164 struct in_device *in_dev;
1173 struct in_ifaddr *ifa; 1165 struct in_ifaddr *ifa;
1174 int s_ip_idx, s_idx = cb->args[0]; 1166 int s_ip_idx, s_idx = cb->args[0];
1175 1167
1176 if (net != &init_net)
1177 return 0;
1178
1179 s_ip_idx = ip_idx = cb->args[1]; 1168 s_ip_idx = ip_idx = cb->args[1];
1180 idx = 0; 1169 idx = 0;
1181 for_each_netdev(net, dev) { 1170 for_each_netdev(net, dev) {
@@ -1214,7 +1203,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1214 int err = -ENOBUFS; 1203 int err = -ENOBUFS;
1215 struct net *net; 1204 struct net *net;
1216 1205
1217 net = ifa->ifa_dev->dev->nd_net; 1206 net = dev_net(ifa->ifa_dev->dev);
1218 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1207 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1219 if (skb == NULL) 1208 if (skb == NULL)
1220 goto errout; 1209 goto errout;
@@ -1528,7 +1517,7 @@ static void devinet_sysctl_register(struct in_device *idev)
1528{ 1517{
1529 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, 1518 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1530 NET_IPV4_NEIGH, "ipv4", NULL, NULL); 1519 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1531 __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name, 1520 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1532 idev->dev->ifindex, &idev->cnf); 1521 idev->dev->ifindex, &idev->cnf);
1533} 1522}
1534 1523
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 86ff2711fc95..0f1557a4ac7a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -257,7 +257,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
257 if (in_dev == NULL) 257 if (in_dev == NULL)
258 goto e_inval; 258 goto e_inval;
259 259
260 net = dev->nd_net; 260 net = dev_net(dev);
261 if (fib_lookup(net, &fl, &res)) 261 if (fib_lookup(net, &fl, &res))
262 goto last_resort; 262 goto last_resort;
263 if (res.type != RTN_UNICAST) 263 if (res.type != RTN_UNICAST)
@@ -583,7 +583,7 @@ errout:
583 583
584static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 584static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
585{ 585{
586 struct net *net = skb->sk->sk_net; 586 struct net *net = sock_net(skb->sk);
587 struct fib_config cfg; 587 struct fib_config cfg;
588 struct fib_table *tb; 588 struct fib_table *tb;
589 int err; 589 int err;
@@ -605,7 +605,7 @@ errout:
605 605
606static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 606static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
607{ 607{
608 struct net *net = skb->sk->sk_net; 608 struct net *net = sock_net(skb->sk);
609 struct fib_config cfg; 609 struct fib_config cfg;
610 struct fib_table *tb; 610 struct fib_table *tb;
611 int err; 611 int err;
@@ -627,7 +627,7 @@ errout:
627 627
628static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 628static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
629{ 629{
630 struct net *net = skb->sk->sk_net; 630 struct net *net = sock_net(skb->sk);
631 unsigned int h, s_h; 631 unsigned int h, s_h;
632 unsigned int e = 0, s_e; 632 unsigned int e = 0, s_e;
633 struct fib_table *tb; 633 struct fib_table *tb;
@@ -674,7 +674,7 @@ out:
674 674
675static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 675static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
676{ 676{
677 struct net *net = ifa->ifa_dev->dev->nd_net; 677 struct net *net = dev_net(ifa->ifa_dev->dev);
678 struct fib_table *tb; 678 struct fib_table *tb;
679 struct fib_config cfg = { 679 struct fib_config cfg = {
680 .fc_protocol = RTPROT_KERNEL, 680 .fc_protocol = RTPROT_KERNEL,
@@ -801,15 +801,15 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
801 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 801 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
802 802
803 /* Check, that this local address finally disappeared. */ 803 /* Check, that this local address finally disappeared. */
804 if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) { 804 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
805 /* And the last, but not the least thing. 805 /* And the last, but not the least thing.
806 We must flush stray FIB entries. 806 We must flush stray FIB entries.
807 807
808 First of all, we scan fib_info list searching 808 First of all, we scan fib_info list searching
809 for stray nexthop entries, then ignite fib_flush. 809 for stray nexthop entries, then ignite fib_flush.
810 */ 810 */
811 if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local)) 811 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
812 fib_flush(dev->nd_net); 812 fib_flush(dev_net(dev));
813 } 813 }
814 } 814 }
815#undef LOCAL_OK 815#undef LOCAL_OK
@@ -857,7 +857,7 @@ static void nl_fib_input(struct sk_buff *skb)
857 struct fib_table *tb; 857 struct fib_table *tb;
858 u32 pid; 858 u32 pid;
859 859
860 net = skb->sk->sk_net; 860 net = sock_net(skb->sk);
861 nlh = nlmsg_hdr(skb); 861 nlh = nlmsg_hdr(skb);
862 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 862 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
863 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 863 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
@@ -899,7 +899,7 @@ static void nl_fib_lookup_exit(struct net *net)
899static void fib_disable_ip(struct net_device *dev, int force) 899static void fib_disable_ip(struct net_device *dev, int force)
900{ 900{
901 if (fib_sync_down_dev(dev, force)) 901 if (fib_sync_down_dev(dev, force))
902 fib_flush(dev->nd_net); 902 fib_flush(dev_net(dev));
903 rt_cache_flush(0); 903 rt_cache_flush(0);
904 arp_ifdown(dev); 904 arp_ifdown(dev);
905} 905}
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 8d58d85dfac6..02088deb0461 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -821,7 +821,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
821 struct fib_table *main_table; 821 struct fib_table *main_table;
822 struct fn_hash *table; 822 struct fn_hash *table;
823 823
824 main_table = fib_get_table(iter->p.net, RT_TABLE_MAIN); 824 main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
825 table = (struct fn_hash *)main_table->tb_data; 825 table = (struct fn_hash *)main_table->tb_data;
826 826
827 iter->bucket = 0; 827 iter->bucket = 0;
@@ -959,11 +959,10 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
959static void *fib_seq_start(struct seq_file *seq, loff_t *pos) 959static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
960 __acquires(fib_hash_lock) 960 __acquires(fib_hash_lock)
961{ 961{
962 struct fib_iter_state *iter = seq->private;
963 void *v = NULL; 962 void *v = NULL;
964 963
965 read_lock(&fib_hash_lock); 964 read_lock(&fib_hash_lock);
966 if (fib_get_table(iter->p.net, RT_TABLE_MAIN)) 965 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
967 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 966 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
968 return v; 967 return v;
969} 968}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 19274d01afa4..1fb56876be54 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -137,7 +137,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
137 struct nlmsghdr *nlh, struct fib_rule_hdr *frh, 137 struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
138 struct nlattr **tb) 138 struct nlattr **tb)
139{ 139{
140 struct net *net = skb->sk->sk_net; 140 struct net *net = sock_net(skb->sk);
141 int err = -EINVAL; 141 int err = -EINVAL;
142 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 142 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
143 143
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a13c84763d4c..3b83c34019fc 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -152,6 +152,7 @@ void free_fib_info(struct fib_info *fi)
152 nh->nh_dev = NULL; 152 nh->nh_dev = NULL;
153 } endfor_nexthops(fi); 153 } endfor_nexthops(fi);
154 fib_info_cnt--; 154 fib_info_cnt--;
155 release_net(fi->fib_net);
155 kfree(fi); 156 kfree(fi);
156} 157}
157 158
@@ -730,7 +731,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
730 goto failure; 731 goto failure;
731 fib_info_cnt++; 732 fib_info_cnt++;
732 733
733 fi->fib_net = net; 734 fi->fib_net = hold_net(net);
734 fi->fib_protocol = cfg->fc_protocol; 735 fi->fib_protocol = cfg->fc_protocol;
735 fi->fib_flags = cfg->fc_flags; 736 fi->fib_flags = cfg->fc_flags;
736 fi->fib_priority = cfg->fc_priority; 737 fi->fib_priority = cfg->fc_priority;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f6cdc012eec5..ea294fffb9ce 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -122,7 +122,10 @@ struct tnode {
122 unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 122 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
123 unsigned int full_children; /* KEYLENGTH bits needed */ 123 unsigned int full_children; /* KEYLENGTH bits needed */
124 unsigned int empty_children; /* KEYLENGTH bits needed */ 124 unsigned int empty_children; /* KEYLENGTH bits needed */
125 struct rcu_head rcu; 125 union {
126 struct rcu_head rcu;
127 struct work_struct work;
128 };
126 struct node *child[0]; 129 struct node *child[0];
127}; 130};
128 131
@@ -160,7 +163,6 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
160static struct node *resize(struct trie *t, struct tnode *tn); 163static struct node *resize(struct trie *t, struct tnode *tn);
161static struct tnode *inflate(struct trie *t, struct tnode *tn); 164static struct tnode *inflate(struct trie *t, struct tnode *tn);
162static struct tnode *halve(struct trie *t, struct tnode *tn); 165static struct tnode *halve(struct trie *t, struct tnode *tn);
163static void tnode_free(struct tnode *tn);
164 166
165static struct kmem_cache *fn_alias_kmem __read_mostly; 167static struct kmem_cache *fn_alias_kmem __read_mostly;
166static struct kmem_cache *trie_leaf_kmem __read_mostly; 168static struct kmem_cache *trie_leaf_kmem __read_mostly;
@@ -334,6 +336,11 @@ static void __leaf_free_rcu(struct rcu_head *head)
334 kmem_cache_free(trie_leaf_kmem, l); 336 kmem_cache_free(trie_leaf_kmem, l);
335} 337}
336 338
339static inline void free_leaf(struct leaf *l)
340{
341 call_rcu_bh(&l->rcu, __leaf_free_rcu);
342}
343
337static void __leaf_info_free_rcu(struct rcu_head *head) 344static void __leaf_info_free_rcu(struct rcu_head *head)
338{ 345{
339 kfree(container_of(head, struct leaf_info, rcu)); 346 kfree(container_of(head, struct leaf_info, rcu));
@@ -346,16 +353,16 @@ static inline void free_leaf_info(struct leaf_info *leaf)
346 353
347static struct tnode *tnode_alloc(size_t size) 354static struct tnode *tnode_alloc(size_t size)
348{ 355{
349 struct page *pages;
350
351 if (size <= PAGE_SIZE) 356 if (size <= PAGE_SIZE)
352 return kzalloc(size, GFP_KERNEL); 357 return kzalloc(size, GFP_KERNEL);
358 else
359 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
360}
353 361
354 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); 362static void __tnode_vfree(struct work_struct *arg)
355 if (!pages) 363{
356 return NULL; 364 struct tnode *tn = container_of(arg, struct tnode, work);
357 365 vfree(tn);
358 return page_address(pages);
359} 366}
360 367
361static void __tnode_free_rcu(struct rcu_head *head) 368static void __tnode_free_rcu(struct rcu_head *head)
@@ -366,16 +373,17 @@ static void __tnode_free_rcu(struct rcu_head *head)
366 373
367 if (size <= PAGE_SIZE) 374 if (size <= PAGE_SIZE)
368 kfree(tn); 375 kfree(tn);
369 else 376 else {
370 free_pages((unsigned long)tn, get_order(size)); 377 INIT_WORK(&tn->work, __tnode_vfree);
378 schedule_work(&tn->work);
379 }
371} 380}
372 381
373static inline void tnode_free(struct tnode *tn) 382static inline void tnode_free(struct tnode *tn)
374{ 383{
375 if (IS_LEAF(tn)) { 384 if (IS_LEAF(tn))
376 struct leaf *l = (struct leaf *) tn; 385 free_leaf((struct leaf *) tn);
377 call_rcu_bh(&l->rcu, __leaf_free_rcu); 386 else
378 } else
379 call_rcu(&tn->rcu, __tnode_free_rcu); 387 call_rcu(&tn->rcu, __tnode_free_rcu);
380} 388}
381 389
@@ -1086,7 +1094,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1086 li = leaf_info_new(plen); 1094 li = leaf_info_new(plen);
1087 1095
1088 if (!li) { 1096 if (!li) {
1089 tnode_free((struct tnode *) l); 1097 free_leaf(l);
1090 return NULL; 1098 return NULL;
1091 } 1099 }
1092 1100
@@ -1122,7 +1130,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1122 1130
1123 if (!tn) { 1131 if (!tn) {
1124 free_leaf_info(li); 1132 free_leaf_info(li);
1125 tnode_free((struct tnode *) l); 1133 free_leaf(l);
1126 return NULL; 1134 return NULL;
1127 } 1135 }
1128 1136
@@ -1578,7 +1586,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
1578 } else 1586 } else
1579 rcu_assign_pointer(t->trie, NULL); 1587 rcu_assign_pointer(t->trie, NULL);
1580 1588
1581 tnode_free((struct tnode *) l); 1589 free_leaf(l);
1582} 1590}
1583 1591
1584/* 1592/*
@@ -1665,7 +1673,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
1665 return 0; 1673 return 0;
1666} 1674}
1667 1675
1668static int trie_flush_list(struct trie *t, struct list_head *head) 1676static int trie_flush_list(struct list_head *head)
1669{ 1677{
1670 struct fib_alias *fa, *fa_node; 1678 struct fib_alias *fa, *fa_node;
1671 int found = 0; 1679 int found = 0;
@@ -1683,7 +1691,7 @@ static int trie_flush_list(struct trie *t, struct list_head *head)
1683 return found; 1691 return found;
1684} 1692}
1685 1693
1686static int trie_flush_leaf(struct trie *t, struct leaf *l) 1694static int trie_flush_leaf(struct leaf *l)
1687{ 1695{
1688 int found = 0; 1696 int found = 0;
1689 struct hlist_head *lih = &l->list; 1697 struct hlist_head *lih = &l->list;
@@ -1691,7 +1699,7 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l)
1691 struct leaf_info *li = NULL; 1699 struct leaf_info *li = NULL;
1692 1700
1693 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { 1701 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1694 found += trie_flush_list(t, &li->falh); 1702 found += trie_flush_list(&li->falh);
1695 1703
1696 if (list_empty(&li->falh)) { 1704 if (list_empty(&li->falh)) {
1697 hlist_del_rcu(&li->hlist); 1705 hlist_del_rcu(&li->hlist);
@@ -1782,7 +1790,7 @@ static int fn_trie_flush(struct fib_table *tb)
1782 int found = 0; 1790 int found = 0;
1783 1791
1784 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { 1792 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
1785 found += trie_flush_leaf(t, l); 1793 found += trie_flush_leaf(l);
1786 1794
1787 if (ll && hlist_empty(&ll->list)) 1795 if (ll && hlist_empty(&ll->list))
1788 trie_leaf_remove(t, ll); 1796 trie_leaf_remove(t, ll);
@@ -2029,9 +2037,8 @@ struct fib_table *fib_hash_table(u32 id)
2029/* Depth first Trie walk iterator */ 2037/* Depth first Trie walk iterator */
2030struct fib_trie_iter { 2038struct fib_trie_iter {
2031 struct seq_net_private p; 2039 struct seq_net_private p;
2032 struct trie *trie_local, *trie_main; 2040 struct fib_table *tb;
2033 struct tnode *tnode; 2041 struct tnode *tnode;
2034 struct trie *trie;
2035 unsigned index; 2042 unsigned index;
2036 unsigned depth; 2043 unsigned depth;
2037}; 2044};
@@ -2084,31 +2091,26 @@ rescan:
2084static struct node *fib_trie_get_first(struct fib_trie_iter *iter, 2091static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2085 struct trie *t) 2092 struct trie *t)
2086{ 2093{
2087 struct node *n ; 2094 struct node *n;
2088 2095
2089 if (!t) 2096 if (!t)
2090 return NULL; 2097 return NULL;
2091 2098
2092 n = rcu_dereference(t->trie); 2099 n = rcu_dereference(t->trie);
2093 2100 if (!n)
2094 if (!iter)
2095 return NULL; 2101 return NULL;
2096 2102
2097 if (n) { 2103 if (IS_TNODE(n)) {
2098 if (IS_TNODE(n)) { 2104 iter->tnode = (struct tnode *) n;
2099 iter->tnode = (struct tnode *) n; 2105 iter->index = 0;
2100 iter->trie = t; 2106 iter->depth = 1;
2101 iter->index = 0; 2107 } else {
2102 iter->depth = 1; 2108 iter->tnode = NULL;
2103 } else { 2109 iter->index = 0;
2104 iter->tnode = NULL; 2110 iter->depth = 0;
2105 iter->trie = t;
2106 iter->index = 0;
2107 iter->depth = 0;
2108 }
2109 return n;
2110 } 2111 }
2111 return NULL; 2112
2113 return n;
2112} 2114}
2113 2115
2114static void trie_collect_stats(struct trie *t, struct trie_stat *s) 2116static void trie_collect_stats(struct trie *t, struct trie_stat *s)
@@ -2119,8 +2121,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2119 memset(s, 0, sizeof(*s)); 2121 memset(s, 0, sizeof(*s));
2120 2122
2121 rcu_read_lock(); 2123 rcu_read_lock();
2122 for (n = fib_trie_get_first(&iter, t); n; 2124 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2123 n = fib_trie_get_next(&iter)) {
2124 if (IS_LEAF(n)) { 2125 if (IS_LEAF(n)) {
2125 struct leaf *l = (struct leaf *)n; 2126 struct leaf *l = (struct leaf *)n;
2126 struct leaf_info *li; 2127 struct leaf_info *li;
@@ -2209,36 +2210,48 @@ static void trie_show_usage(struct seq_file *seq,
2209} 2210}
2210#endif /* CONFIG_IP_FIB_TRIE_STATS */ 2211#endif /* CONFIG_IP_FIB_TRIE_STATS */
2211 2212
2212static void fib_trie_show(struct seq_file *seq, const char *name, 2213static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2213 struct trie *trie)
2214{ 2214{
2215 struct trie_stat stat; 2215 if (tb->tb_id == RT_TABLE_LOCAL)
2216 2216 seq_puts(seq, "Local:\n");
2217 trie_collect_stats(trie, &stat); 2217 else if (tb->tb_id == RT_TABLE_MAIN)
2218 seq_printf(seq, "%s:\n", name); 2218 seq_puts(seq, "Main:\n");
2219 trie_show_stats(seq, &stat); 2219 else
2220#ifdef CONFIG_IP_FIB_TRIE_STATS 2220 seq_printf(seq, "Id %d:\n", tb->tb_id);
2221 trie_show_usage(seq, &trie->stats);
2222#endif
2223} 2221}
2224 2222
2223
2225static int fib_triestat_seq_show(struct seq_file *seq, void *v) 2224static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2226{ 2225{
2227 struct net *net = (struct net *)seq->private; 2226 struct net *net = (struct net *)seq->private;
2228 struct fib_table *tb; 2227 unsigned int h;
2229 2228
2230 seq_printf(seq, 2229 seq_printf(seq,
2231 "Basic info: size of leaf:" 2230 "Basic info: size of leaf:"
2232 " %Zd bytes, size of tnode: %Zd bytes.\n", 2231 " %Zd bytes, size of tnode: %Zd bytes.\n",
2233 sizeof(struct leaf), sizeof(struct tnode)); 2232 sizeof(struct leaf), sizeof(struct tnode));
2234 2233
2235 tb = fib_get_table(net, RT_TABLE_LOCAL); 2234 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2236 if (tb) 2235 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2237 fib_trie_show(seq, "Local", (struct trie *) tb->tb_data); 2236 struct hlist_node *node;
2237 struct fib_table *tb;
2238
2239 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2240 struct trie *t = (struct trie *) tb->tb_data;
2241 struct trie_stat stat;
2242
2243 if (!t)
2244 continue;
2238 2245
2239 tb = fib_get_table(net, RT_TABLE_MAIN); 2246 fib_table_print(seq, tb);
2240 if (tb) 2247
2241 fib_trie_show(seq, "Main", (struct trie *) tb->tb_data); 2248 trie_collect_stats(t, &stat);
2249 trie_show_stats(seq, &stat);
2250#ifdef CONFIG_IP_FIB_TRIE_STATS
2251 trie_show_usage(seq, &t->stats);
2252#endif
2253 }
2254 }
2242 2255
2243 return 0; 2256 return 0;
2244} 2257}
@@ -2274,67 +2287,79 @@ static const struct file_operations fib_triestat_fops = {
2274 .release = fib_triestat_seq_release, 2287 .release = fib_triestat_seq_release,
2275}; 2288};
2276 2289
2277static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, 2290static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2278 loff_t pos)
2279{ 2291{
2292 struct fib_trie_iter *iter = seq->private;
2293 struct net *net = seq_file_net(seq);
2280 loff_t idx = 0; 2294 loff_t idx = 0;
2281 struct node *n; 2295 unsigned int h;
2282 2296
2283 for (n = fib_trie_get_first(iter, iter->trie_local); 2297 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2284 n; ++idx, n = fib_trie_get_next(iter)) { 2298 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2285 if (pos == idx) 2299 struct hlist_node *node;
2286 return n; 2300 struct fib_table *tb;
2287 }
2288 2301
2289 for (n = fib_trie_get_first(iter, iter->trie_main); 2302 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2290 n; ++idx, n = fib_trie_get_next(iter)) { 2303 struct node *n;
2291 if (pos == idx) 2304
2292 return n; 2305 for (n = fib_trie_get_first(iter,
2306 (struct trie *) tb->tb_data);
2307 n; n = fib_trie_get_next(iter))
2308 if (pos == idx++) {
2309 iter->tb = tb;
2310 return n;
2311 }
2312 }
2293 } 2313 }
2314
2294 return NULL; 2315 return NULL;
2295} 2316}
2296 2317
2297static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2318static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2298 __acquires(RCU) 2319 __acquires(RCU)
2299{ 2320{
2300 struct fib_trie_iter *iter = seq->private;
2301 struct fib_table *tb;
2302
2303 if (!iter->trie_local) {
2304 tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL);
2305 if (tb)
2306 iter->trie_local = (struct trie *) tb->tb_data;
2307 }
2308 if (!iter->trie_main) {
2309 tb = fib_get_table(iter->p.net, RT_TABLE_MAIN);
2310 if (tb)
2311 iter->trie_main = (struct trie *) tb->tb_data;
2312 }
2313 rcu_read_lock(); 2321 rcu_read_lock();
2314 if (*pos == 0) 2322 return fib_trie_get_idx(seq, *pos);
2315 return SEQ_START_TOKEN;
2316 return fib_trie_get_idx(iter, *pos - 1);
2317} 2323}
2318 2324
2319static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2325static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2320{ 2326{
2321 struct fib_trie_iter *iter = seq->private; 2327 struct fib_trie_iter *iter = seq->private;
2322 void *l = v; 2328 struct net *net = seq_file_net(seq);
2329 struct fib_table *tb = iter->tb;
2330 struct hlist_node *tb_node;
2331 unsigned int h;
2332 struct node *n;
2323 2333
2324 ++*pos; 2334 ++*pos;
2325 if (v == SEQ_START_TOKEN) 2335 /* next node in same table */
2326 return fib_trie_get_idx(iter, 0); 2336 n = fib_trie_get_next(iter);
2327 2337 if (n)
2328 v = fib_trie_get_next(iter); 2338 return n;
2329 BUG_ON(v == l);
2330 if (v)
2331 return v;
2332 2339
2333 /* continue scan in next trie */ 2340 /* walk rest of this hash chain */
2334 if (iter->trie == iter->trie_local) 2341 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2335 return fib_trie_get_first(iter, iter->trie_main); 2342 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
2343 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2344 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2345 if (n)
2346 goto found;
2347 }
2336 2348
2349 /* new hash chain */
2350 while (++h < FIB_TABLE_HASHSZ) {
2351 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2352 hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
2353 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2354 if (n)
2355 goto found;
2356 }
2357 }
2337 return NULL; 2358 return NULL;
2359
2360found:
2361 iter->tb = tb;
2362 return n;
2338} 2363}
2339 2364
2340static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2365static void fib_trie_seq_stop(struct seq_file *seq, void *v)
@@ -2391,22 +2416,15 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2391 const struct fib_trie_iter *iter = seq->private; 2416 const struct fib_trie_iter *iter = seq->private;
2392 struct node *n = v; 2417 struct node *n = v;
2393 2418
2394 if (v == SEQ_START_TOKEN) 2419 if (!node_parent_rcu(n))
2395 return 0; 2420 fib_table_print(seq, iter->tb);
2396
2397 if (!node_parent_rcu(n)) {
2398 if (iter->trie == iter->trie_local)
2399 seq_puts(seq, "<local>:\n");
2400 else
2401 seq_puts(seq, "<main>:\n");
2402 }
2403 2421
2404 if (IS_TNODE(n)) { 2422 if (IS_TNODE(n)) {
2405 struct tnode *tn = (struct tnode *) n; 2423 struct tnode *tn = (struct tnode *) n;
2406 __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); 2424 __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
2407 2425
2408 seq_indent(seq, iter->depth-1); 2426 seq_indent(seq, iter->depth-1);
2409 seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", 2427 seq_printf(seq, " +-- " NIPQUAD_FMT "/%d %d %d %d\n",
2410 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, 2428 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
2411 tn->empty_children); 2429 tn->empty_children);
2412 2430
@@ -2417,7 +2435,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2417 __be32 val = htonl(l->key); 2435 __be32 val = htonl(l->key);
2418 2436
2419 seq_indent(seq, iter->depth); 2437 seq_indent(seq, iter->depth);
2420 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); 2438 seq_printf(seq, " |-- " NIPQUAD_FMT "\n", NIPQUAD(val));
2421 2439
2422 hlist_for_each_entry_rcu(li, node, &l->list, hlist) { 2440 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2423 struct fib_alias *fa; 2441 struct fib_alias *fa;
@@ -2502,7 +2520,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2502 struct fib_table *tb; 2520 struct fib_table *tb;
2503 2521
2504 rcu_read_lock(); 2522 rcu_read_lock();
2505 tb = fib_get_table(iter->p.net, RT_TABLE_MAIN); 2523 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
2506 if (!tb) 2524 if (!tb)
2507 return NULL; 2525 return NULL;
2508 2526
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 40508babad8c..f064031f2031 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -93,6 +93,7 @@
93#include <asm/uaccess.h> 93#include <asm/uaccess.h>
94#include <net/checksum.h> 94#include <net/checksum.h>
95#include <net/xfrm.h> 95#include <net/xfrm.h>
96#include <net/inet_common.h>
96 97
97/* 98/*
98 * Build xmit assembly blocks 99 * Build xmit assembly blocks
@@ -188,29 +189,6 @@ struct icmp_err icmp_err_convert[] = {
188 }, 189 },
189}; 190};
190 191
191/* Control parameters for ECHO replies. */
192int sysctl_icmp_echo_ignore_all __read_mostly;
193int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;
194
195/* Control parameter - ignore bogus broadcast responses? */
196int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;
197
198/*
199 * Configurable global rate limit.
200 *
201 * ratelimit defines tokens/packet consumed for dst->rate_token bucket
202 * ratemask defines which icmp types are ratelimited by setting
203 * it's bit position.
204 *
205 * default:
206 * dest unreachable (3), source quench (4),
207 * time exceeded (11), parameter problem (12)
208 */
209
210int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;
211int sysctl_icmp_ratemask __read_mostly = 0x1818;
212int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;
213
214/* 192/*
215 * ICMP control array. This specifies what to do with each ICMP. 193 * ICMP control array. This specifies what to do with each ICMP.
216 */ 194 */
@@ -229,14 +207,16 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
229 * 207 *
230 * On SMP we have one ICMP socket per-cpu. 208 * On SMP we have one ICMP socket per-cpu.
231 */ 209 */
232static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; 210static struct sock *icmp_sk(struct net *net)
233#define icmp_socket __get_cpu_var(__icmp_socket) 211{
212 return net->ipv4.icmp_sk[smp_processor_id()];
213}
234 214
235static inline int icmp_xmit_lock(void) 215static inline int icmp_xmit_lock(struct sock *sk)
236{ 216{
237 local_bh_disable(); 217 local_bh_disable();
238 218
239 if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { 219 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
240 /* This can happen if the output path signals a 220 /* This can happen if the output path signals a
241 * dst_link_failure() for an outgoing ICMP packet. 221 * dst_link_failure() for an outgoing ICMP packet.
242 */ 222 */
@@ -246,9 +226,9 @@ static inline int icmp_xmit_lock(void)
246 return 0; 226 return 0;
247} 227}
248 228
249static inline void icmp_xmit_unlock(void) 229static inline void icmp_xmit_unlock(struct sock *sk)
250{ 230{
251 spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); 231 spin_unlock_bh(&sk->sk_lock.slock);
252} 232}
253 233
254/* 234/*
@@ -291,7 +271,8 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
291 return rc; 271 return rc;
292} 272}
293 273
294static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) 274static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
275 int type, int code)
295{ 276{
296 struct dst_entry *dst = &rt->u.dst; 277 struct dst_entry *dst = &rt->u.dst;
297 int rc = 1; 278 int rc = 1;
@@ -308,8 +289,8 @@ static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code)
308 goto out; 289 goto out;
309 290
310 /* Limit if icmp type is enabled in ratemask. */ 291 /* Limit if icmp type is enabled in ratemask. */
311 if ((1 << type) & sysctl_icmp_ratemask) 292 if ((1 << type) & net->ipv4.sysctl_icmp_ratemask)
312 rc = xrlim_allow(dst, sysctl_icmp_ratelimit); 293 rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit);
313out: 294out:
314 return rc; 295 return rc;
315} 296}
@@ -346,19 +327,21 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
346static void icmp_push_reply(struct icmp_bxm *icmp_param, 327static void icmp_push_reply(struct icmp_bxm *icmp_param,
347 struct ipcm_cookie *ipc, struct rtable *rt) 328 struct ipcm_cookie *ipc, struct rtable *rt)
348{ 329{
330 struct sock *sk;
349 struct sk_buff *skb; 331 struct sk_buff *skb;
350 332
351 if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, 333 sk = icmp_sk(dev_net(rt->u.dst.dev));
334 if (ip_append_data(sk, icmp_glue_bits, icmp_param,
352 icmp_param->data_len+icmp_param->head_len, 335 icmp_param->data_len+icmp_param->head_len,
353 icmp_param->head_len, 336 icmp_param->head_len,
354 ipc, rt, MSG_DONTWAIT) < 0) 337 ipc, rt, MSG_DONTWAIT) < 0)
355 ip_flush_pending_frames(icmp_socket->sk); 338 ip_flush_pending_frames(sk);
356 else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { 339 else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
357 struct icmphdr *icmph = icmp_hdr(skb); 340 struct icmphdr *icmph = icmp_hdr(skb);
358 __wsum csum = 0; 341 __wsum csum = 0;
359 struct sk_buff *skb1; 342 struct sk_buff *skb1;
360 343
361 skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { 344 skb_queue_walk(&sk->sk_write_queue, skb1) {
362 csum = csum_add(csum, skb1->csum); 345 csum = csum_add(csum, skb1->csum);
363 } 346 }
364 csum = csum_partial_copy_nocheck((void *)&icmp_param->data, 347 csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
@@ -366,7 +349,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
366 icmp_param->head_len, csum); 349 icmp_param->head_len, csum);
367 icmph->checksum = csum_fold(csum); 350 icmph->checksum = csum_fold(csum);
368 skb->ip_summed = CHECKSUM_NONE; 351 skb->ip_summed = CHECKSUM_NONE;
369 ip_push_pending_frames(icmp_socket->sk); 352 ip_push_pending_frames(sk);
370 } 353 }
371} 354}
372 355
@@ -376,16 +359,17 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
376 359
377static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) 360static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
378{ 361{
379 struct sock *sk = icmp_socket->sk;
380 struct inet_sock *inet = inet_sk(sk);
381 struct ipcm_cookie ipc; 362 struct ipcm_cookie ipc;
382 struct rtable *rt = (struct rtable *)skb->dst; 363 struct rtable *rt = skb->rtable;
364 struct net *net = dev_net(rt->u.dst.dev);
365 struct sock *sk = icmp_sk(net);
366 struct inet_sock *inet = inet_sk(sk);
383 __be32 daddr; 367 __be32 daddr;
384 368
385 if (ip_options_echo(&icmp_param->replyopts, skb)) 369 if (ip_options_echo(&icmp_param->replyopts, skb))
386 return; 370 return;
387 371
388 if (icmp_xmit_lock()) 372 if (icmp_xmit_lock(sk))
389 return; 373 return;
390 374
391 icmp_param->data.icmph.checksum = 0; 375 icmp_param->data.icmph.checksum = 0;
@@ -405,15 +389,15 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
405 .tos = RT_TOS(ip_hdr(skb)->tos) } }, 389 .tos = RT_TOS(ip_hdr(skb)->tos) } },
406 .proto = IPPROTO_ICMP }; 390 .proto = IPPROTO_ICMP };
407 security_skb_classify_flow(skb, &fl); 391 security_skb_classify_flow(skb, &fl);
408 if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl)) 392 if (ip_route_output_key(net, &rt, &fl))
409 goto out_unlock; 393 goto out_unlock;
410 } 394 }
411 if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 395 if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type,
412 icmp_param->data.icmph.code)) 396 icmp_param->data.icmph.code))
413 icmp_push_reply(icmp_param, &ipc, rt); 397 icmp_push_reply(icmp_param, &ipc, rt);
414 ip_rt_put(rt); 398 ip_rt_put(rt);
415out_unlock: 399out_unlock:
416 icmp_xmit_unlock(); 400 icmp_xmit_unlock(sk);
417} 401}
418 402
419 403
@@ -433,15 +417,17 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
433 struct iphdr *iph; 417 struct iphdr *iph;
434 int room; 418 int room;
435 struct icmp_bxm icmp_param; 419 struct icmp_bxm icmp_param;
436 struct rtable *rt = (struct rtable *)skb_in->dst; 420 struct rtable *rt = skb_in->rtable;
437 struct ipcm_cookie ipc; 421 struct ipcm_cookie ipc;
438 __be32 saddr; 422 __be32 saddr;
439 u8 tos; 423 u8 tos;
440 struct net *net; 424 struct net *net;
425 struct sock *sk;
441 426
442 if (!rt) 427 if (!rt)
443 goto out; 428 goto out;
444 net = rt->u.dst.dev->nd_net; 429 net = dev_net(rt->u.dst.dev);
430 sk = icmp_sk(net);
445 431
446 /* 432 /*
447 * Find the original header. It is expected to be valid, of course. 433 * Find the original header. It is expected to be valid, of course.
@@ -505,7 +491,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
505 } 491 }
506 } 492 }
507 493
508 if (icmp_xmit_lock()) 494 if (icmp_xmit_lock(sk))
509 return; 495 return;
510 496
511 /* 497 /*
@@ -516,7 +502,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
516 if (!(rt->rt_flags & RTCF_LOCAL)) { 502 if (!(rt->rt_flags & RTCF_LOCAL)) {
517 struct net_device *dev = NULL; 503 struct net_device *dev = NULL;
518 504
519 if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) 505 if (rt->fl.iif &&
506 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
520 dev = dev_get_by_index(net, rt->fl.iif); 507 dev = dev_get_by_index(net, rt->fl.iif);
521 508
522 if (dev) { 509 if (dev) {
@@ -544,7 +531,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
544 icmp_param.data.icmph.checksum = 0; 531 icmp_param.data.icmph.checksum = 0;
545 icmp_param.skb = skb_in; 532 icmp_param.skb = skb_in;
546 icmp_param.offset = skb_network_offset(skb_in); 533 icmp_param.offset = skb_network_offset(skb_in);
547 inet_sk(icmp_socket->sk)->tos = tos; 534 inet_sk(sk)->tos = tos;
548 ipc.addr = iph->saddr; 535 ipc.addr = iph->saddr;
549 ipc.opt = &icmp_param.replyopts; 536 ipc.opt = &icmp_param.replyopts;
550 537
@@ -609,7 +596,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
609 RT_TOS(tos), rt2->u.dst.dev); 596 RT_TOS(tos), rt2->u.dst.dev);
610 597
611 dst_release(&rt2->u.dst); 598 dst_release(&rt2->u.dst);
612 rt2 = (struct rtable *)skb_in->dst; 599 rt2 = skb_in->rtable;
613 skb_in->dst = odst; 600 skb_in->dst = odst;
614 } 601 }
615 602
@@ -634,7 +621,7 @@ relookup_failed:
634 } 621 }
635 622
636route_done: 623route_done:
637 if (!icmpv4_xrlim_allow(rt, type, code)) 624 if (!icmpv4_xrlim_allow(net, rt, type, code))
638 goto ende; 625 goto ende;
639 626
640 /* RFC says return as much as we can without exceeding 576 bytes. */ 627 /* RFC says return as much as we can without exceeding 576 bytes. */
@@ -654,7 +641,7 @@ route_done:
654ende: 641ende:
655 ip_rt_put(rt); 642 ip_rt_put(rt);
656out_unlock: 643out_unlock:
657 icmp_xmit_unlock(); 644 icmp_xmit_unlock(sk);
658out:; 645out:;
659} 646}
660 647
@@ -672,7 +659,7 @@ static void icmp_unreach(struct sk_buff *skb)
672 u32 info = 0; 659 u32 info = 0;
673 struct net *net; 660 struct net *net;
674 661
675 net = skb->dst->dev->nd_net; 662 net = dev_net(skb->dst->dev);
676 663
677 /* 664 /*
678 * Incomplete header ? 665 * Incomplete header ?
@@ -698,7 +685,7 @@ static void icmp_unreach(struct sk_buff *skb)
698 break; 685 break;
699 case ICMP_FRAG_NEEDED: 686 case ICMP_FRAG_NEEDED:
700 if (ipv4_config.no_pmtu_disc) { 687 if (ipv4_config.no_pmtu_disc) {
701 LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: " 688 LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": "
702 "fragmentation needed " 689 "fragmentation needed "
703 "and DF set.\n", 690 "and DF set.\n",
704 NIPQUAD(iph->daddr)); 691 NIPQUAD(iph->daddr));
@@ -710,7 +697,7 @@ static void icmp_unreach(struct sk_buff *skb)
710 } 697 }
711 break; 698 break;
712 case ICMP_SR_FAILED: 699 case ICMP_SR_FAILED:
713 LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source " 700 LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": Source "
714 "Route Failed.\n", 701 "Route Failed.\n",
715 NIPQUAD(iph->daddr)); 702 NIPQUAD(iph->daddr));
716 break; 703 break;
@@ -740,12 +727,12 @@ static void icmp_unreach(struct sk_buff *skb)
740 * get the other vendor to fix their kit. 727 * get the other vendor to fix their kit.
741 */ 728 */
742 729
743 if (!sysctl_icmp_ignore_bogus_error_responses && 730 if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
744 inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { 731 inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
745 if (net_ratelimit()) 732 if (net_ratelimit())
746 printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " 733 printk(KERN_WARNING NIPQUAD_FMT " sent an invalid ICMP "
747 "type %u, code %u " 734 "type %u, code %u "
748 "error to a broadcast: %u.%u.%u.%u on %s\n", 735 "error to a broadcast: " NIPQUAD_FMT " on %s\n",
749 NIPQUAD(ip_hdr(skb)->saddr), 736 NIPQUAD(ip_hdr(skb)->saddr),
750 icmph->type, icmph->code, 737 icmph->type, icmph->code,
751 NIPQUAD(iph->daddr), 738 NIPQUAD(iph->daddr),
@@ -835,7 +822,10 @@ out_err:
835 822
836static void icmp_echo(struct sk_buff *skb) 823static void icmp_echo(struct sk_buff *skb)
837{ 824{
838 if (!sysctl_icmp_echo_ignore_all) { 825 struct net *net;
826
827 net = dev_net(skb->dst->dev);
828 if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
839 struct icmp_bxm icmp_param; 829 struct icmp_bxm icmp_param;
840 830
841 icmp_param.data.icmph = *icmp_hdr(skb); 831 icmp_param.data.icmph = *icmp_hdr(skb);
@@ -938,7 +928,7 @@ static void icmp_address(struct sk_buff *skb)
938 928
939static void icmp_address_reply(struct sk_buff *skb) 929static void icmp_address_reply(struct sk_buff *skb)
940{ 930{
941 struct rtable *rt = (struct rtable *)skb->dst; 931 struct rtable *rt = skb->rtable;
942 struct net_device *dev = skb->dev; 932 struct net_device *dev = skb->dev;
943 struct in_device *in_dev; 933 struct in_device *in_dev;
944 struct in_ifaddr *ifa; 934 struct in_ifaddr *ifa;
@@ -963,8 +953,8 @@ static void icmp_address_reply(struct sk_buff *skb)
963 break; 953 break;
964 } 954 }
965 if (!ifa && net_ratelimit()) { 955 if (!ifa && net_ratelimit()) {
966 printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from " 956 printk(KERN_INFO "Wrong address mask " NIPQUAD_FMT " from "
967 "%s/%u.%u.%u.%u\n", 957 "%s/" NIPQUAD_FMT "\n",
968 NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src)); 958 NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src));
969 } 959 }
970 } 960 }
@@ -983,7 +973,7 @@ static void icmp_discard(struct sk_buff *skb)
983int icmp_rcv(struct sk_buff *skb) 973int icmp_rcv(struct sk_buff *skb)
984{ 974{
985 struct icmphdr *icmph; 975 struct icmphdr *icmph;
986 struct rtable *rt = (struct rtable *)skb->dst; 976 struct rtable *rt = skb->rtable;
987 977
988 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 978 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
989 int nh; 979 int nh;
@@ -1038,6 +1028,9 @@ int icmp_rcv(struct sk_buff *skb)
1038 */ 1028 */
1039 1029
1040 if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 1030 if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
1031 struct net *net;
1032
1033 net = dev_net(rt->u.dst.dev);
1041 /* 1034 /*
1042 * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be 1035 * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
1043 * silently ignored (we let user decide with a sysctl). 1036 * silently ignored (we let user decide with a sysctl).
@@ -1046,7 +1039,7 @@ int icmp_rcv(struct sk_buff *skb)
1046 */ 1039 */
1047 if ((icmph->type == ICMP_ECHO || 1040 if ((icmph->type == ICMP_ECHO ||
1048 icmph->type == ICMP_TIMESTAMP) && 1041 icmph->type == ICMP_TIMESTAMP) &&
1049 sysctl_icmp_echo_ignore_broadcasts) { 1042 net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
1050 goto error; 1043 goto error;
1051 } 1044 }
1052 if (icmph->type != ICMP_ECHO && 1045 if (icmph->type != ICMP_ECHO &&
@@ -1141,38 +1134,84 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
1141 }, 1134 },
1142}; 1135};
1143 1136
1144void __init icmp_init(struct net_proto_family *ops) 1137static void __net_exit icmp_sk_exit(struct net *net)
1145{ 1138{
1146 struct inet_sock *inet;
1147 int i; 1139 int i;
1148 1140
1149 for_each_possible_cpu(i) { 1141 for_each_possible_cpu(i)
1150 int err; 1142 inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
1143 kfree(net->ipv4.icmp_sk);
1144 net->ipv4.icmp_sk = NULL;
1145}
1146
1147int __net_init icmp_sk_init(struct net *net)
1148{
1149 int i, err;
1151 1150
1152 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, 1151 net->ipv4.icmp_sk =
1153 &per_cpu(__icmp_socket, i)); 1152 kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
1153 if (net->ipv4.icmp_sk == NULL)
1154 return -ENOMEM;
1154 1155
1156 for_each_possible_cpu(i) {
1157 struct sock *sk;
1158
1159 err = inet_ctl_sock_create(&sk, PF_INET,
1160 SOCK_RAW, IPPROTO_ICMP, net);
1155 if (err < 0) 1161 if (err < 0)
1156 panic("Failed to create the ICMP control socket.\n"); 1162 goto fail;
1157 1163
1158 per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; 1164 net->ipv4.icmp_sk[i] = sk;
1159 1165
1160 /* Enough space for 2 64K ICMP packets, including 1166 /* Enough space for 2 64K ICMP packets, including
1161 * sk_buff struct overhead. 1167 * sk_buff struct overhead.
1162 */ 1168 */
1163 per_cpu(__icmp_socket, i)->sk->sk_sndbuf = 1169 sk->sk_sndbuf =
1164 (2 * ((64 * 1024) + sizeof(struct sk_buff))); 1170 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
1165 1171
1166 inet = inet_sk(per_cpu(__icmp_socket, i)->sk); 1172 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
1167 inet->uc_ttl = -1;
1168 inet->pmtudisc = IP_PMTUDISC_DONT;
1169
1170 /* Unhash it so that IP input processing does not even
1171 * see it, we do not wish this socket to see incoming
1172 * packets.
1173 */
1174 per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
1175 } 1173 }
1174
1175 /* Control parameters for ECHO replies. */
1176 net->ipv4.sysctl_icmp_echo_ignore_all = 0;
1177 net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
1178
1179 /* Control parameter - ignore bogus broadcast responses? */
1180 net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;
1181
1182 /*
1183 * Configurable global rate limit.
1184 *
1185 * ratelimit defines tokens/packet consumed for dst->rate_token
1186 * bucket ratemask defines which icmp types are ratelimited by
1187 * setting it's bit position.
1188 *
1189 * default:
1190 * dest unreachable (3), source quench (4),
1191 * time exceeded (11), parameter problem (12)
1192 */
1193
1194 net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
1195 net->ipv4.sysctl_icmp_ratemask = 0x1818;
1196 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
1197
1198 return 0;
1199
1200fail:
1201 for_each_possible_cpu(i)
1202 inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
1203 kfree(net->ipv4.icmp_sk);
1204 return err;
1205}
1206
1207static struct pernet_operations __net_initdata icmp_sk_ops = {
1208 .init = icmp_sk_init,
1209 .exit = icmp_sk_exit,
1210};
1211
1212int __init icmp_init(void)
1213{
1214 return register_pernet_device(&icmp_sk_ops);
1176} 1215}
1177 1216
1178EXPORT_SYMBOL(icmp_err_convert); 1217EXPORT_SYMBOL(icmp_err_convert);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 732cd07e6071..6250f4239b61 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -130,12 +130,12 @@
130 */ 130 */
131 131
132#define IGMP_V1_SEEN(in_dev) \ 132#define IGMP_V1_SEEN(in_dev) \
133 (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \ 133 (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \
134 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ 134 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
135 ((in_dev)->mr_v1_seen && \ 135 ((in_dev)->mr_v1_seen && \
136 time_before(jiffies, (in_dev)->mr_v1_seen))) 136 time_before(jiffies, (in_dev)->mr_v1_seen)))
137#define IGMP_V2_SEEN(in_dev) \ 137#define IGMP_V2_SEEN(in_dev) \
138 (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \ 138 (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \
139 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ 139 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
140 ((in_dev)->mr_v2_seen && \ 140 ((in_dev)->mr_v2_seen && \
141 time_before(jiffies, (in_dev)->mr_v2_seen))) 141 time_before(jiffies, (in_dev)->mr_v2_seen)))
@@ -948,7 +948,7 @@ int igmp_rcv(struct sk_buff *skb)
948 case IGMPV2_HOST_MEMBERSHIP_REPORT: 948 case IGMPV2_HOST_MEMBERSHIP_REPORT:
949 case IGMPV3_HOST_MEMBERSHIP_REPORT: 949 case IGMPV3_HOST_MEMBERSHIP_REPORT:
950 /* Is it our report looped back? */ 950 /* Is it our report looped back? */
951 if (((struct rtable*)skb->dst)->fl.iif == 0) 951 if (skb->rtable->fl.iif == 0)
952 break; 952 break;
953 /* don't rely on MC router hearing unicast reports */ 953 /* don't rely on MC router hearing unicast reports */
954 if (skb->pkt_type == PACKET_MULTICAST || 954 if (skb->pkt_type == PACKET_MULTICAST ||
@@ -1198,6 +1198,9 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1198 1198
1199 ASSERT_RTNL(); 1199 ASSERT_RTNL();
1200 1200
1201 if (dev_net(in_dev->dev) != &init_net)
1202 return;
1203
1201 for (im=in_dev->mc_list; im; im=im->next) { 1204 for (im=in_dev->mc_list; im; im=im->next) {
1202 if (im->multiaddr == addr) { 1205 if (im->multiaddr == addr) {
1203 im->users++; 1206 im->users++;
@@ -1277,6 +1280,9 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1277 1280
1278 ASSERT_RTNL(); 1281 ASSERT_RTNL();
1279 1282
1283 if (dev_net(in_dev->dev) != &init_net)
1284 return;
1285
1280 for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { 1286 for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
1281 if (i->multiaddr==addr) { 1287 if (i->multiaddr==addr) {
1282 if (--i->users == 0) { 1288 if (--i->users == 0) {
@@ -1304,6 +1310,9 @@ void ip_mc_down(struct in_device *in_dev)
1304 1310
1305 ASSERT_RTNL(); 1311 ASSERT_RTNL();
1306 1312
1313 if (dev_net(in_dev->dev) != &init_net)
1314 return;
1315
1307 for (i=in_dev->mc_list; i; i=i->next) 1316 for (i=in_dev->mc_list; i; i=i->next)
1308 igmp_group_dropped(i); 1317 igmp_group_dropped(i);
1309 1318
@@ -1324,6 +1333,9 @@ void ip_mc_init_dev(struct in_device *in_dev)
1324{ 1333{
1325 ASSERT_RTNL(); 1334 ASSERT_RTNL();
1326 1335
1336 if (dev_net(in_dev->dev) != &init_net)
1337 return;
1338
1327 in_dev->mc_tomb = NULL; 1339 in_dev->mc_tomb = NULL;
1328#ifdef CONFIG_IP_MULTICAST 1340#ifdef CONFIG_IP_MULTICAST
1329 in_dev->mr_gq_running = 0; 1341 in_dev->mr_gq_running = 0;
@@ -1347,6 +1359,9 @@ void ip_mc_up(struct in_device *in_dev)
1347 1359
1348 ASSERT_RTNL(); 1360 ASSERT_RTNL();
1349 1361
1362 if (dev_net(in_dev->dev) != &init_net)
1363 return;
1364
1350 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); 1365 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
1351 1366
1352 for (i=in_dev->mc_list; i; i=i->next) 1367 for (i=in_dev->mc_list; i; i=i->next)
@@ -1363,6 +1378,9 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
1363 1378
1364 ASSERT_RTNL(); 1379 ASSERT_RTNL();
1365 1380
1381 if (dev_net(in_dev->dev) != &init_net)
1382 return;
1383
1366 /* Deactivate timers */ 1384 /* Deactivate timers */
1367 ip_mc_down(in_dev); 1385 ip_mc_down(in_dev);
1368 1386
@@ -1744,6 +1762,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1744 if (!ipv4_is_multicast(addr)) 1762 if (!ipv4_is_multicast(addr))
1745 return -EINVAL; 1763 return -EINVAL;
1746 1764
1765 if (sock_net(sk) != &init_net)
1766 return -EPROTONOSUPPORT;
1767
1747 rtnl_lock(); 1768 rtnl_lock();
1748 1769
1749 in_dev = ip_mc_find_dev(imr); 1770 in_dev = ip_mc_find_dev(imr);
@@ -1812,6 +1833,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1812 u32 ifindex; 1833 u32 ifindex;
1813 int ret = -EADDRNOTAVAIL; 1834 int ret = -EADDRNOTAVAIL;
1814 1835
1836 if (sock_net(sk) != &init_net)
1837 return -EPROTONOSUPPORT;
1838
1815 rtnl_lock(); 1839 rtnl_lock();
1816 in_dev = ip_mc_find_dev(imr); 1840 in_dev = ip_mc_find_dev(imr);
1817 ifindex = imr->imr_ifindex; 1841 ifindex = imr->imr_ifindex;
@@ -1857,6 +1881,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1857 if (!ipv4_is_multicast(addr)) 1881 if (!ipv4_is_multicast(addr))
1858 return -EINVAL; 1882 return -EINVAL;
1859 1883
1884 if (sock_net(sk) != &init_net)
1885 return -EPROTONOSUPPORT;
1886
1860 rtnl_lock(); 1887 rtnl_lock();
1861 1888
1862 imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; 1889 imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr;
@@ -1990,6 +2017,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1990 msf->imsf_fmode != MCAST_EXCLUDE) 2017 msf->imsf_fmode != MCAST_EXCLUDE)
1991 return -EINVAL; 2018 return -EINVAL;
1992 2019
2020 if (sock_net(sk) != &init_net)
2021 return -EPROTONOSUPPORT;
2022
1993 rtnl_lock(); 2023 rtnl_lock();
1994 2024
1995 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; 2025 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
@@ -2070,6 +2100,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2070 if (!ipv4_is_multicast(addr)) 2100 if (!ipv4_is_multicast(addr))
2071 return -EINVAL; 2101 return -EINVAL;
2072 2102
2103 if (sock_net(sk) != &init_net)
2104 return -EPROTONOSUPPORT;
2105
2073 rtnl_lock(); 2106 rtnl_lock();
2074 2107
2075 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; 2108 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
@@ -2132,6 +2165,9 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2132 if (!ipv4_is_multicast(addr)) 2165 if (!ipv4_is_multicast(addr))
2133 return -EINVAL; 2166 return -EINVAL;
2134 2167
2168 if (sock_net(sk) != &init_net)
2169 return -EPROTONOSUPPORT;
2170
2135 rtnl_lock(); 2171 rtnl_lock();
2136 2172
2137 err = -EADDRNOTAVAIL; 2173 err = -EADDRNOTAVAIL;
@@ -2216,6 +2252,9 @@ void ip_mc_drop_socket(struct sock *sk)
2216 if (inet->mc_list == NULL) 2252 if (inet->mc_list == NULL)
2217 return; 2253 return;
2218 2254
2255 if (sock_net(sk) != &init_net)
2256 return;
2257
2219 rtnl_lock(); 2258 rtnl_lock();
2220 while ((iml = inet->mc_list) != NULL) { 2259 while ((iml = inet->mc_list) != NULL) {
2221 struct in_device *in_dev; 2260 struct in_device *in_dev;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b189278c7bc1..828ea211ff21 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -55,6 +55,13 @@ int inet_csk_bind_conflict(const struct sock *sk,
55 struct hlist_node *node; 55 struct hlist_node *node;
56 int reuse = sk->sk_reuse; 56 int reuse = sk->sk_reuse;
57 57
58 /*
59 * Unlike other sk lookup places we do not check
60 * for sk_net here, since _all_ the socks listed
61 * in tb->owners list belong to the same net - the
62 * one this bucket belongs to.
63 */
64
58 sk_for_each_bound(sk2, node, &tb->owners) { 65 sk_for_each_bound(sk2, node, &tb->owners) {
59 if (sk != sk2 && 66 if (sk != sk2 &&
60 !inet_v6_ipv6only(sk2) && 67 !inet_v6_ipv6only(sk2) &&
@@ -80,12 +87,12 @@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
80 */ 87 */
81int inet_csk_get_port(struct sock *sk, unsigned short snum) 88int inet_csk_get_port(struct sock *sk, unsigned short snum)
82{ 89{
83 struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; 90 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
84 struct inet_bind_hashbucket *head; 91 struct inet_bind_hashbucket *head;
85 struct hlist_node *node; 92 struct hlist_node *node;
86 struct inet_bind_bucket *tb; 93 struct inet_bind_bucket *tb;
87 int ret; 94 int ret;
88 struct net *net = sk->sk_net; 95 struct net *net = sock_net(sk);
89 96
90 local_bh_disable(); 97 local_bh_disable();
91 if (!snum) { 98 if (!snum) {
@@ -133,8 +140,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
133 goto tb_not_found; 140 goto tb_not_found;
134tb_found: 141tb_found:
135 if (!hlist_empty(&tb->owners)) { 142 if (!hlist_empty(&tb->owners)) {
136 if (sk->sk_reuse > 1)
137 goto success;
138 if (tb->fastreuse > 0 && 143 if (tb->fastreuse > 0 &&
139 sk->sk_reuse && sk->sk_state != TCP_LISTEN) { 144 sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
140 goto success; 145 goto success;
@@ -333,7 +338,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
333 .dport = ireq->rmt_port } } }; 338 .dport = ireq->rmt_port } } };
334 339
335 security_req_classify_flow(req, &fl); 340 security_req_classify_flow(req, &fl);
336 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) { 341 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) {
337 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 342 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
338 return NULL; 343 return NULL;
339 } 344 }
@@ -414,8 +419,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
414 struct inet_connection_sock *icsk = inet_csk(parent); 419 struct inet_connection_sock *icsk = inet_csk(parent);
415 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 420 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
416 struct listen_sock *lopt = queue->listen_opt; 421 struct listen_sock *lopt = queue->listen_opt;
417 int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 422 int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
418 int thresh = max_retries;
419 unsigned long now = jiffies; 423 unsigned long now = jiffies;
420 struct request_sock **reqp, *req; 424 struct request_sock **reqp, *req;
421 int i, budget; 425 int i, budget;
@@ -451,9 +455,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
451 } 455 }
452 } 456 }
453 457
454 if (queue->rskq_defer_accept)
455 max_retries = queue->rskq_defer_accept;
456
457 budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 458 budget = 2 * (lopt->nr_table_entries / (timeout / interval));
458 i = lopt->clock_hand; 459 i = lopt->clock_hand;
459 460
@@ -461,9 +462,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
461 reqp=&lopt->syn_table[i]; 462 reqp=&lopt->syn_table[i];
462 while ((req = *reqp) != NULL) { 463 while ((req = *reqp) != NULL) {
463 if (time_after_eq(now, req->expires)) { 464 if (time_after_eq(now, req->expires)) {
464 if ((req->retrans < thresh || 465 if (req->retrans < thresh &&
465 (inet_rsk(req)->acked && req->retrans < max_retries)) 466 !req->rsk_ops->rtx_syn_ack(parent, req)) {
466 && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
467 unsigned long timeo; 467 unsigned long timeo;
468 468
469 if (req->retrans++ == 0) 469 if (req->retrans++ == 0)
@@ -656,25 +656,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
656 656
657EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 657EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
658 658
659int inet_csk_ctl_sock_create(struct socket **sock, unsigned short family,
660 unsigned short type, unsigned char protocol)
661{
662 int rc = sock_create_kern(family, type, protocol, sock);
663
664 if (rc == 0) {
665 (*sock)->sk->sk_allocation = GFP_ATOMIC;
666 inet_sk((*sock)->sk)->uc_ttl = -1;
667 /*
668 * Unhash it so that IP input processing does not even see it,
669 * we do not wish this socket to see incoming packets.
670 */
671 (*sock)->sk->sk_prot->unhash((*sock)->sk);
672 }
673 return rc;
674}
675
676EXPORT_SYMBOL_GPL(inet_csk_ctl_sock_create);
677
678#ifdef CONFIG_COMPAT 659#ifdef CONFIG_COMPAT
679int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, 660int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
680 char __user *optval, int __user *optlen) 661 char __user *optval, int __user *optlen)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index a0a3c78cb5e0..4ed429bd5951 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -107,10 +107,10 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
107 if (del_timer(&fq->timer)) 107 if (del_timer(&fq->timer))
108 atomic_dec(&fq->refcnt); 108 atomic_dec(&fq->refcnt);
109 109
110 if (!(fq->last_in & COMPLETE)) { 110 if (!(fq->last_in & INET_FRAG_COMPLETE)) {
111 fq_unlink(fq, f); 111 fq_unlink(fq, f);
112 atomic_dec(&fq->refcnt); 112 atomic_dec(&fq->refcnt);
113 fq->last_in |= COMPLETE; 113 fq->last_in |= INET_FRAG_COMPLETE;
114 } 114 }
115} 115}
116 116
@@ -134,7 +134,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
134 struct sk_buff *fp; 134 struct sk_buff *fp;
135 struct netns_frags *nf; 135 struct netns_frags *nf;
136 136
137 BUG_TRAP(q->last_in & COMPLETE); 137 BUG_TRAP(q->last_in & INET_FRAG_COMPLETE);
138 BUG_TRAP(del_timer(&q->timer) == 0); 138 BUG_TRAP(del_timer(&q->timer) == 0);
139 139
140 /* Release all fragment data. */ 140 /* Release all fragment data. */
@@ -177,7 +177,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
177 read_unlock(&f->lock); 177 read_unlock(&f->lock);
178 178
179 spin_lock(&q->lock); 179 spin_lock(&q->lock);
180 if (!(q->last_in & COMPLETE)) 180 if (!(q->last_in & INET_FRAG_COMPLETE))
181 inet_frag_kill(q, f); 181 inet_frag_kill(q, f);
182 spin_unlock(&q->lock); 182 spin_unlock(&q->lock);
183 183
@@ -209,7 +209,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
209 if (qp->net == nf && f->match(qp, arg)) { 209 if (qp->net == nf && f->match(qp, arg)) {
210 atomic_inc(&qp->refcnt); 210 atomic_inc(&qp->refcnt);
211 write_unlock(&f->lock); 211 write_unlock(&f->lock);
212 qp_in->last_in |= COMPLETE; 212 qp_in->last_in |= INET_FRAG_COMPLETE;
213 inet_frag_put(qp_in, f); 213 inet_frag_put(qp_in, f);
214 return qp; 214 return qp;
215 } 215 }
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1aba606f6bbb..2023d37b2708 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -35,7 +35,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
35 struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); 35 struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
36 36
37 if (tb != NULL) { 37 if (tb != NULL) {
38 tb->ib_net = net; 38 tb->ib_net = hold_net(net);
39 tb->port = snum; 39 tb->port = snum;
40 tb->fastreuse = 0; 40 tb->fastreuse = 0;
41 INIT_HLIST_HEAD(&tb->owners); 41 INIT_HLIST_HEAD(&tb->owners);
@@ -51,6 +51,7 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
51{ 51{
52 if (hlist_empty(&tb->owners)) { 52 if (hlist_empty(&tb->owners)) {
53 __hlist_del(&tb->node); 53 __hlist_del(&tb->node);
54 release_net(tb->ib_net);
54 kmem_cache_free(cachep, tb); 55 kmem_cache_free(cachep, tb);
55 } 56 }
56} 57}
@@ -68,7 +69,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
68 */ 69 */
69static void __inet_put_port(struct sock *sk) 70static void __inet_put_port(struct sock *sk)
70{ 71{
71 struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; 72 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
72 const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); 73 const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
73 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 74 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
74 struct inet_bind_bucket *tb; 75 struct inet_bind_bucket *tb;
@@ -91,6 +92,22 @@ void inet_put_port(struct sock *sk)
91 92
92EXPORT_SYMBOL(inet_put_port); 93EXPORT_SYMBOL(inet_put_port);
93 94
95void __inet_inherit_port(struct sock *sk, struct sock *child)
96{
97 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
98 const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
99 struct inet_bind_hashbucket *head = &table->bhash[bhash];
100 struct inet_bind_bucket *tb;
101
102 spin_lock(&head->lock);
103 tb = inet_csk(sk)->icsk_bind_hash;
104 sk_add_bind_node(child, &tb->owners);
105 inet_csk(child)->icsk_bind_hash = tb;
106 spin_unlock(&head->lock);
107}
108
109EXPORT_SYMBOL_GPL(__inet_inherit_port);
110
94/* 111/*
95 * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. 112 * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
96 * Look, when several writers sleep and reader wakes them up, all but one 113 * Look, when several writers sleep and reader wakes them up, all but one
@@ -139,7 +156,7 @@ static struct sock *inet_lookup_listener_slow(struct net *net,
139 sk_for_each(sk, node, head) { 156 sk_for_each(sk, node, head) {
140 const struct inet_sock *inet = inet_sk(sk); 157 const struct inet_sock *inet = inet_sk(sk);
141 158
142 if (sk->sk_net == net && inet->num == hnum && 159 if (net_eq(sock_net(sk), net) && inet->num == hnum &&
143 !ipv6_only_sock(sk)) { 160 !ipv6_only_sock(sk)) {
144 const __be32 rcv_saddr = inet->rcv_saddr; 161 const __be32 rcv_saddr = inet->rcv_saddr;
145 int score = sk->sk_family == PF_INET ? 1 : 0; 162 int score = sk->sk_family == PF_INET ? 1 : 0;
@@ -182,7 +199,7 @@ struct sock *__inet_lookup_listener(struct net *net,
182 if (inet->num == hnum && !sk->sk_node.next && 199 if (inet->num == hnum && !sk->sk_node.next &&
183 (!inet->rcv_saddr || inet->rcv_saddr == daddr) && 200 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
184 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && 201 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
185 !sk->sk_bound_dev_if && sk->sk_net == net) 202 !sk->sk_bound_dev_if && net_eq(sock_net(sk), net))
186 goto sherry_cache; 203 goto sherry_cache;
187 sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); 204 sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif);
188 } 205 }
@@ -254,7 +271,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
254 struct sock *sk2; 271 struct sock *sk2;
255 const struct hlist_node *node; 272 const struct hlist_node *node;
256 struct inet_timewait_sock *tw; 273 struct inet_timewait_sock *tw;
257 struct net *net = sk->sk_net; 274 struct net *net = sock_net(sk);
258 275
259 prefetch(head->chain.first); 276 prefetch(head->chain.first);
260 write_lock(lock); 277 write_lock(lock);
@@ -288,7 +305,7 @@ unique:
288 sk->sk_hash = hash; 305 sk->sk_hash = hash;
289 BUG_TRAP(sk_unhashed(sk)); 306 BUG_TRAP(sk_unhashed(sk));
290 __sk_add_node(sk, &head->chain); 307 __sk_add_node(sk, &head->chain);
291 sock_prot_inuse_add(sk->sk_prot, 1); 308 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
292 write_unlock(lock); 309 write_unlock(lock);
293 310
294 if (twp) { 311 if (twp) {
@@ -318,7 +335,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk)
318 335
319void __inet_hash_nolisten(struct sock *sk) 336void __inet_hash_nolisten(struct sock *sk)
320{ 337{
321 struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; 338 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
322 struct hlist_head *list; 339 struct hlist_head *list;
323 rwlock_t *lock; 340 rwlock_t *lock;
324 struct inet_ehash_bucket *head; 341 struct inet_ehash_bucket *head;
@@ -332,14 +349,14 @@ void __inet_hash_nolisten(struct sock *sk)
332 349
333 write_lock(lock); 350 write_lock(lock);
334 __sk_add_node(sk, list); 351 __sk_add_node(sk, list);
335 sock_prot_inuse_add(sk->sk_prot, 1); 352 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
336 write_unlock(lock); 353 write_unlock(lock);
337} 354}
338EXPORT_SYMBOL_GPL(__inet_hash_nolisten); 355EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
339 356
340static void __inet_hash(struct sock *sk) 357static void __inet_hash(struct sock *sk)
341{ 358{
342 struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; 359 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
343 struct hlist_head *list; 360 struct hlist_head *list;
344 rwlock_t *lock; 361 rwlock_t *lock;
345 362
@@ -354,7 +371,7 @@ static void __inet_hash(struct sock *sk)
354 371
355 inet_listen_wlock(hashinfo); 372 inet_listen_wlock(hashinfo);
356 __sk_add_node(sk, list); 373 __sk_add_node(sk, list);
357 sock_prot_inuse_add(sk->sk_prot, 1); 374 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
358 write_unlock(lock); 375 write_unlock(lock);
359 wake_up(&hashinfo->lhash_wait); 376 wake_up(&hashinfo->lhash_wait);
360} 377}
@@ -372,7 +389,7 @@ EXPORT_SYMBOL_GPL(inet_hash);
372void inet_unhash(struct sock *sk) 389void inet_unhash(struct sock *sk)
373{ 390{
374 rwlock_t *lock; 391 rwlock_t *lock;
375 struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; 392 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
376 393
377 if (sk_unhashed(sk)) 394 if (sk_unhashed(sk))
378 goto out; 395 goto out;
@@ -387,7 +404,7 @@ void inet_unhash(struct sock *sk)
387 } 404 }
388 405
389 if (__sk_del_node_init(sk)) 406 if (__sk_del_node_init(sk))
390 sock_prot_inuse_add(sk->sk_prot, -1); 407 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
391 write_unlock_bh(lock); 408 write_unlock_bh(lock);
392out: 409out:
393 if (sk->sk_state == TCP_LISTEN) 410 if (sk->sk_state == TCP_LISTEN)
@@ -406,7 +423,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
406 struct inet_bind_hashbucket *head; 423 struct inet_bind_hashbucket *head;
407 struct inet_bind_bucket *tb; 424 struct inet_bind_bucket *tb;
408 int ret; 425 int ret;
409 struct net *net = sk->sk_net; 426 struct net *net = sock_net(sk);
410 427
411 if (!snum) { 428 if (!snum) {
412 int i, remaining, low, high, port; 429 int i, remaining, low, high, port;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 717c411a5c6b..ce16e9ac24c1 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -57,6 +57,7 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
57 printk(KERN_DEBUG "%s timewait_sock %p released\n", 57 printk(KERN_DEBUG "%s timewait_sock %p released\n",
58 tw->tw_prot->name, tw); 58 tw->tw_prot->name, tw);
59#endif 59#endif
60 release_net(twsk_net(tw));
60 kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); 61 kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
61 module_put(owner); 62 module_put(owner);
62 } 63 }
@@ -91,7 +92,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
91 92
92 /* Step 2: Remove SK from established hash. */ 93 /* Step 2: Remove SK from established hash. */
93 if (__sk_del_node_init(sk)) 94 if (__sk_del_node_init(sk))
94 sock_prot_inuse_add(sk->sk_prot, -1); 95 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
95 96
96 /* Step 3: Hash TW into TIMEWAIT chain. */ 97 /* Step 3: Hash TW into TIMEWAIT chain. */
97 inet_twsk_add_node(tw, &ehead->twchain); 98 inet_twsk_add_node(tw, &ehead->twchain);
@@ -124,7 +125,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
124 tw->tw_hash = sk->sk_hash; 125 tw->tw_hash = sk->sk_hash;
125 tw->tw_ipv6only = 0; 126 tw->tw_ipv6only = 0;
126 tw->tw_prot = sk->sk_prot_creator; 127 tw->tw_prot = sk->sk_prot_creator;
127 tw->tw_net = sk->sk_net; 128 twsk_net_set(tw, hold_net(sock_net(sk)));
128 atomic_set(&tw->tw_refcnt, 1); 129 atomic_set(&tw->tw_refcnt, 1);
129 inet_twsk_dead_node_init(tw); 130 inet_twsk_dead_node_init(tw);
130 __module_get(tw->tw_prot->owner); 131 __module_get(tw->tw_prot->owner);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index a4506c8cfef0..4813c39b438b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -80,7 +80,7 @@ int ip_forward(struct sk_buff *skb)
80 if (!xfrm4_route_forward(skb)) 80 if (!xfrm4_route_forward(skb))
81 goto drop; 81 goto drop;
82 82
83 rt = (struct rtable*)skb->dst; 83 rt = skb->rtable;
84 84
85 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 85 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
86 goto sr_failed; 86 goto sr_failed;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3b2e5adca838..cd6ce6ac6358 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -194,7 +194,7 @@ static void ip_expire(unsigned long arg)
194 194
195 spin_lock(&qp->q.lock); 195 spin_lock(&qp->q.lock);
196 196
197 if (qp->q.last_in & COMPLETE) 197 if (qp->q.last_in & INET_FRAG_COMPLETE)
198 goto out; 198 goto out;
199 199
200 ipq_kill(qp); 200 ipq_kill(qp);
@@ -202,10 +202,13 @@ static void ip_expire(unsigned long arg)
202 IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); 202 IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
203 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); 203 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
204 204
205 if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) { 205 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
206 struct sk_buff *head = qp->q.fragments; 206 struct sk_buff *head = qp->q.fragments;
207 struct net *net;
208
209 net = container_of(qp->q.net, struct net, ipv4.frags);
207 /* Send an ICMP "Fragment Reassembly Timeout" message. */ 210 /* Send an ICMP "Fragment Reassembly Timeout" message. */
208 if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { 211 if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) {
209 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); 212 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
210 dev_put(head->dev); 213 dev_put(head->dev);
211 } 214 }
@@ -298,7 +301,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
298 int ihl, end; 301 int ihl, end;
299 int err = -ENOENT; 302 int err = -ENOENT;
300 303
301 if (qp->q.last_in & COMPLETE) 304 if (qp->q.last_in & INET_FRAG_COMPLETE)
302 goto err; 305 goto err;
303 306
304 if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && 307 if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
@@ -324,9 +327,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
324 * or have different end, the segment is corrrupted. 327 * or have different end, the segment is corrrupted.
325 */ 328 */
326 if (end < qp->q.len || 329 if (end < qp->q.len ||
327 ((qp->q.last_in & LAST_IN) && end != qp->q.len)) 330 ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len))
328 goto err; 331 goto err;
329 qp->q.last_in |= LAST_IN; 332 qp->q.last_in |= INET_FRAG_LAST_IN;
330 qp->q.len = end; 333 qp->q.len = end;
331 } else { 334 } else {
332 if (end&7) { 335 if (end&7) {
@@ -336,7 +339,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
336 } 339 }
337 if (end > qp->q.len) { 340 if (end > qp->q.len) {
338 /* Some bits beyond end -> corruption. */ 341 /* Some bits beyond end -> corruption. */
339 if (qp->q.last_in & LAST_IN) 342 if (qp->q.last_in & INET_FRAG_LAST_IN)
340 goto err; 343 goto err;
341 qp->q.len = end; 344 qp->q.len = end;
342 } 345 }
@@ -435,9 +438,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
435 qp->q.meat += skb->len; 438 qp->q.meat += skb->len;
436 atomic_add(skb->truesize, &qp->q.net->mem); 439 atomic_add(skb->truesize, &qp->q.net->mem);
437 if (offset == 0) 440 if (offset == 0)
438 qp->q.last_in |= FIRST_IN; 441 qp->q.last_in |= INET_FRAG_FIRST_IN;
439 442
440 if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len) 443 if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
444 qp->q.meat == qp->q.len)
441 return ip_frag_reasm(qp, prev, dev); 445 return ip_frag_reasm(qp, prev, dev);
442 446
443 write_lock(&ip4_frags.lock); 447 write_lock(&ip4_frags.lock);
@@ -553,7 +557,7 @@ out_nomem:
553out_oversize: 557out_oversize:
554 if (net_ratelimit()) 558 if (net_ratelimit())
555 printk(KERN_INFO 559 printk(KERN_INFO
556 "Oversized IP packet from %d.%d.%d.%d.\n", 560 "Oversized IP packet from " NIPQUAD_FMT ".\n",
557 NIPQUAD(qp->saddr)); 561 NIPQUAD(qp->saddr));
558out_fail: 562out_fail:
559 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); 563 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
@@ -568,7 +572,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
568 572
569 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); 573 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
570 574
571 net = skb->dev ? skb->dev->nd_net : skb->dst->dev->nd_net; 575 net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
572 /* Start by cleaning up the memory. */ 576 /* Start by cleaning up the memory. */
573 if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) 577 if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
574 ip_evictor(net); 578 ip_evictor(net);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e7821ba7a9a0..2ada033406de 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -39,6 +39,8 @@
39#include <net/dsfield.h> 39#include <net/dsfield.h>
40#include <net/inet_ecn.h> 40#include <net/inet_ecn.h>
41#include <net/xfrm.h> 41#include <net/xfrm.h>
42#include <net/net_namespace.h>
43#include <net/netns/generic.h>
42 44
43#ifdef CONFIG_IPV6 45#ifdef CONFIG_IPV6
44#include <net/ipv6.h> 46#include <net/ipv6.h>
@@ -122,7 +124,14 @@ static void ipgre_tunnel_setup(struct net_device *dev);
122 124
123static int ipgre_fb_tunnel_init(struct net_device *dev); 125static int ipgre_fb_tunnel_init(struct net_device *dev);
124 126
125static struct net_device *ipgre_fb_tunnel_dev; 127#define HASH_SIZE 16
128
129static int ipgre_net_id;
130struct ipgre_net {
131 struct ip_tunnel *tunnels[4][HASH_SIZE];
132
133 struct net_device *fb_tunnel_dev;
134};
126 135
127/* Tunnel hash table */ 136/* Tunnel hash table */
128 137
@@ -142,39 +151,38 @@ static struct net_device *ipgre_fb_tunnel_dev;
142 will match fallback tunnel. 151 will match fallback tunnel.
143 */ 152 */
144 153
145#define HASH_SIZE 16
146#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 154#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
147 155
148static struct ip_tunnel *tunnels[4][HASH_SIZE]; 156#define tunnels_r_l tunnels[3]
149 157#define tunnels_r tunnels[2]
150#define tunnels_r_l (tunnels[3]) 158#define tunnels_l tunnels[1]
151#define tunnels_r (tunnels[2]) 159#define tunnels_wc tunnels[0]
152#define tunnels_l (tunnels[1])
153#define tunnels_wc (tunnels[0])
154 160
155static DEFINE_RWLOCK(ipgre_lock); 161static DEFINE_RWLOCK(ipgre_lock);
156 162
157/* Given src, dst and key, find appropriate for input tunnel. */ 163/* Given src, dst and key, find appropriate for input tunnel. */
158 164
159static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key) 165static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
166 __be32 remote, __be32 local, __be32 key)
160{ 167{
161 unsigned h0 = HASH(remote); 168 unsigned h0 = HASH(remote);
162 unsigned h1 = HASH(key); 169 unsigned h1 = HASH(key);
163 struct ip_tunnel *t; 170 struct ip_tunnel *t;
171 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
164 172
165 for (t = tunnels_r_l[h0^h1]; t; t = t->next) { 173 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
166 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 174 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
167 if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 175 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
168 return t; 176 return t;
169 } 177 }
170 } 178 }
171 for (t = tunnels_r[h0^h1]; t; t = t->next) { 179 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
172 if (remote == t->parms.iph.daddr) { 180 if (remote == t->parms.iph.daddr) {
173 if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 181 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
174 return t; 182 return t;
175 } 183 }
176 } 184 }
177 for (t = tunnels_l[h1]; t; t = t->next) { 185 for (t = ign->tunnels_l[h1]; t; t = t->next) {
178 if (local == t->parms.iph.saddr || 186 if (local == t->parms.iph.saddr ||
179 (local == t->parms.iph.daddr && 187 (local == t->parms.iph.daddr &&
180 ipv4_is_multicast(local))) { 188 ipv4_is_multicast(local))) {
@@ -182,17 +190,18 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3
182 return t; 190 return t;
183 } 191 }
184 } 192 }
185 for (t = tunnels_wc[h1]; t; t = t->next) { 193 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
186 if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 194 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
187 return t; 195 return t;
188 } 196 }
189 197
190 if (ipgre_fb_tunnel_dev->flags&IFF_UP) 198 if (ign->fb_tunnel_dev->flags&IFF_UP)
191 return netdev_priv(ipgre_fb_tunnel_dev); 199 return netdev_priv(ign->fb_tunnel_dev);
192 return NULL; 200 return NULL;
193} 201}
194 202
195static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms) 203static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
204 struct ip_tunnel_parm *parms)
196{ 205{
197 __be32 remote = parms->iph.daddr; 206 __be32 remote = parms->iph.daddr;
198 __be32 local = parms->iph.saddr; 207 __be32 local = parms->iph.saddr;
@@ -207,17 +216,18 @@ static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
207 h ^= HASH(remote); 216 h ^= HASH(remote);
208 } 217 }
209 218
210 return &tunnels[prio][h]; 219 return &ign->tunnels[prio][h];
211} 220}
212 221
213static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) 222static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
223 struct ip_tunnel *t)
214{ 224{
215 return __ipgre_bucket(&t->parms); 225 return __ipgre_bucket(ign, &t->parms);
216} 226}
217 227
218static void ipgre_tunnel_link(struct ip_tunnel *t) 228static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
219{ 229{
220 struct ip_tunnel **tp = ipgre_bucket(t); 230 struct ip_tunnel **tp = ipgre_bucket(ign, t);
221 231
222 t->next = *tp; 232 t->next = *tp;
223 write_lock_bh(&ipgre_lock); 233 write_lock_bh(&ipgre_lock);
@@ -225,11 +235,11 @@ static void ipgre_tunnel_link(struct ip_tunnel *t)
225 write_unlock_bh(&ipgre_lock); 235 write_unlock_bh(&ipgre_lock);
226} 236}
227 237
228static void ipgre_tunnel_unlink(struct ip_tunnel *t) 238static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
229{ 239{
230 struct ip_tunnel **tp; 240 struct ip_tunnel **tp;
231 241
232 for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) { 242 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
233 if (t == *tp) { 243 if (t == *tp) {
234 write_lock_bh(&ipgre_lock); 244 write_lock_bh(&ipgre_lock);
235 *tp = t->next; 245 *tp = t->next;
@@ -239,7 +249,8 @@ static void ipgre_tunnel_unlink(struct ip_tunnel *t)
239 } 249 }
240} 250}
241 251
242static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create) 252static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
253 struct ip_tunnel_parm *parms, int create)
243{ 254{
244 __be32 remote = parms->iph.daddr; 255 __be32 remote = parms->iph.daddr;
245 __be32 local = parms->iph.saddr; 256 __be32 local = parms->iph.saddr;
@@ -247,8 +258,9 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
247 struct ip_tunnel *t, **tp, *nt; 258 struct ip_tunnel *t, **tp, *nt;
248 struct net_device *dev; 259 struct net_device *dev;
249 char name[IFNAMSIZ]; 260 char name[IFNAMSIZ];
261 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
250 262
251 for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) { 263 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
252 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 264 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
253 if (key == t->parms.i_key) 265 if (key == t->parms.i_key)
254 return t; 266 return t;
@@ -266,6 +278,8 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
266 if (!dev) 278 if (!dev)
267 return NULL; 279 return NULL;
268 280
281 dev_net_set(dev, net);
282
269 if (strchr(name, '%')) { 283 if (strchr(name, '%')) {
270 if (dev_alloc_name(dev, name) < 0) 284 if (dev_alloc_name(dev, name) < 0)
271 goto failed_free; 285 goto failed_free;
@@ -279,7 +293,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
279 goto failed_free; 293 goto failed_free;
280 294
281 dev_hold(dev); 295 dev_hold(dev);
282 ipgre_tunnel_link(nt); 296 ipgre_tunnel_link(ign, nt);
283 return nt; 297 return nt;
284 298
285failed_free: 299failed_free:
@@ -289,7 +303,10 @@ failed_free:
289 303
290static void ipgre_tunnel_uninit(struct net_device *dev) 304static void ipgre_tunnel_uninit(struct net_device *dev)
291{ 305{
292 ipgre_tunnel_unlink(netdev_priv(dev)); 306 struct net *net = dev_net(dev);
307 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
308
309 ipgre_tunnel_unlink(ign, netdev_priv(dev));
293 dev_put(dev); 310 dev_put(dev);
294} 311}
295 312
@@ -363,7 +380,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
363 } 380 }
364 381
365 read_lock(&ipgre_lock); 382 read_lock(&ipgre_lock);
366 t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0); 383 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
384 (flags&GRE_KEY) ?
385 *(((__be32*)p) + (grehlen>>2) - 1) : 0);
367 if (t == NULL || t->parms.iph.daddr == 0 || 386 if (t == NULL || t->parms.iph.daddr == 0 ||
368 ipv4_is_multicast(t->parms.iph.daddr)) 387 ipv4_is_multicast(t->parms.iph.daddr))
369 goto out; 388 goto out;
@@ -476,7 +495,7 @@ out:
476 fl.fl4_dst = eiph->saddr; 495 fl.fl4_dst = eiph->saddr;
477 fl.fl4_tos = RT_TOS(eiph->tos); 496 fl.fl4_tos = RT_TOS(eiph->tos);
478 fl.proto = IPPROTO_GRE; 497 fl.proto = IPPROTO_GRE;
479 if (ip_route_output_key(&init_net, &rt, &fl)) { 498 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
480 kfree_skb(skb2); 499 kfree_skb(skb2);
481 return; 500 return;
482 } 501 }
@@ -489,7 +508,7 @@ out:
489 fl.fl4_dst = eiph->daddr; 508 fl.fl4_dst = eiph->daddr;
490 fl.fl4_src = eiph->saddr; 509 fl.fl4_src = eiph->saddr;
491 fl.fl4_tos = eiph->tos; 510 fl.fl4_tos = eiph->tos;
492 if (ip_route_output_key(&init_net, &rt, &fl) || 511 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
493 rt->u.dst.dev->type != ARPHRD_IPGRE) { 512 rt->u.dst.dev->type != ARPHRD_IPGRE) {
494 ip_rt_put(rt); 513 ip_rt_put(rt);
495 kfree_skb(skb2); 514 kfree_skb(skb2);
@@ -596,7 +615,8 @@ static int ipgre_rcv(struct sk_buff *skb)
596 } 615 }
597 616
598 read_lock(&ipgre_lock); 617 read_lock(&ipgre_lock);
599 if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) { 618 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
619 iph->saddr, iph->daddr, key)) != NULL) {
600 secpath_reset(skb); 620 secpath_reset(skb);
601 621
602 skb->protocol = *(__be16*)(h + 2); 622 skb->protocol = *(__be16*)(h + 2);
@@ -619,7 +639,7 @@ static int ipgre_rcv(struct sk_buff *skb)
619#ifdef CONFIG_NET_IPGRE_BROADCAST 639#ifdef CONFIG_NET_IPGRE_BROADCAST
620 if (ipv4_is_multicast(iph->daddr)) { 640 if (ipv4_is_multicast(iph->daddr)) {
621 /* Looped back packet, drop it! */ 641 /* Looped back packet, drop it! */
622 if (((struct rtable*)skb->dst)->fl.iif == 0) 642 if (skb->rtable->fl.iif == 0)
623 goto drop; 643 goto drop;
624 tunnel->stat.multicast++; 644 tunnel->stat.multicast++;
625 skb->pkt_type = PACKET_BROADCAST; 645 skb->pkt_type = PACKET_BROADCAST;
@@ -699,7 +719,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
699 } 719 }
700 720
701 if (skb->protocol == htons(ETH_P_IP)) { 721 if (skb->protocol == htons(ETH_P_IP)) {
702 rt = (struct rtable*)skb->dst; 722 rt = skb->rtable;
703 if ((dst = rt->rt_gateway) == 0) 723 if ((dst = rt->rt_gateway) == 0)
704 goto tx_error_icmp; 724 goto tx_error_icmp;
705 } 725 }
@@ -744,7 +764,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
744 .saddr = tiph->saddr, 764 .saddr = tiph->saddr,
745 .tos = RT_TOS(tos) } }, 765 .tos = RT_TOS(tos) } },
746 .proto = IPPROTO_GRE }; 766 .proto = IPPROTO_GRE };
747 if (ip_route_output_key(&init_net, &rt, &fl)) { 767 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
748 tunnel->stat.tx_carrier_errors++; 768 tunnel->stat.tx_carrier_errors++;
749 goto tx_error; 769 goto tx_error;
750 } 770 }
@@ -917,7 +937,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
917 .tos = RT_TOS(iph->tos) } }, 937 .tos = RT_TOS(iph->tos) } },
918 .proto = IPPROTO_GRE }; 938 .proto = IPPROTO_GRE };
919 struct rtable *rt; 939 struct rtable *rt;
920 if (!ip_route_output_key(&init_net, &rt, &fl)) { 940 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
921 tdev = rt->u.dst.dev; 941 tdev = rt->u.dst.dev;
922 ip_rt_put(rt); 942 ip_rt_put(rt);
923 } 943 }
@@ -925,7 +945,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev)
925 } 945 }
926 946
927 if (!tdev && tunnel->parms.link) 947 if (!tdev && tunnel->parms.link)
928 tdev = __dev_get_by_index(&init_net, tunnel->parms.link); 948 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
929 949
930 if (tdev) { 950 if (tdev) {
931 hlen = tdev->hard_header_len; 951 hlen = tdev->hard_header_len;
@@ -954,16 +974,18 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
954 int err = 0; 974 int err = 0;
955 struct ip_tunnel_parm p; 975 struct ip_tunnel_parm p;
956 struct ip_tunnel *t; 976 struct ip_tunnel *t;
977 struct net *net = dev_net(dev);
978 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
957 979
958 switch (cmd) { 980 switch (cmd) {
959 case SIOCGETTUNNEL: 981 case SIOCGETTUNNEL:
960 t = NULL; 982 t = NULL;
961 if (dev == ipgre_fb_tunnel_dev) { 983 if (dev == ign->fb_tunnel_dev) {
962 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 984 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
963 err = -EFAULT; 985 err = -EFAULT;
964 break; 986 break;
965 } 987 }
966 t = ipgre_tunnel_locate(&p, 0); 988 t = ipgre_tunnel_locate(net, &p, 0);
967 } 989 }
968 if (t == NULL) 990 if (t == NULL)
969 t = netdev_priv(dev); 991 t = netdev_priv(dev);
@@ -995,9 +1017,9 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
995 if (!(p.o_flags&GRE_KEY)) 1017 if (!(p.o_flags&GRE_KEY))
996 p.o_key = 0; 1018 p.o_key = 0;
997 1019
998 t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL); 1020 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
999 1021
1000 if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 1022 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1001 if (t != NULL) { 1023 if (t != NULL) {
1002 if (t->dev != dev) { 1024 if (t->dev != dev) {
1003 err = -EEXIST; 1025 err = -EEXIST;
@@ -1017,14 +1039,14 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1017 err = -EINVAL; 1039 err = -EINVAL;
1018 break; 1040 break;
1019 } 1041 }
1020 ipgre_tunnel_unlink(t); 1042 ipgre_tunnel_unlink(ign, t);
1021 t->parms.iph.saddr = p.iph.saddr; 1043 t->parms.iph.saddr = p.iph.saddr;
1022 t->parms.iph.daddr = p.iph.daddr; 1044 t->parms.iph.daddr = p.iph.daddr;
1023 t->parms.i_key = p.i_key; 1045 t->parms.i_key = p.i_key;
1024 t->parms.o_key = p.o_key; 1046 t->parms.o_key = p.o_key;
1025 memcpy(dev->dev_addr, &p.iph.saddr, 4); 1047 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1026 memcpy(dev->broadcast, &p.iph.daddr, 4); 1048 memcpy(dev->broadcast, &p.iph.daddr, 4);
1027 ipgre_tunnel_link(t); 1049 ipgre_tunnel_link(ign, t);
1028 netdev_state_change(dev); 1050 netdev_state_change(dev);
1029 } 1051 }
1030 } 1052 }
@@ -1052,15 +1074,15 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1052 if (!capable(CAP_NET_ADMIN)) 1074 if (!capable(CAP_NET_ADMIN))
1053 goto done; 1075 goto done;
1054 1076
1055 if (dev == ipgre_fb_tunnel_dev) { 1077 if (dev == ign->fb_tunnel_dev) {
1056 err = -EFAULT; 1078 err = -EFAULT;
1057 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 1079 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1058 goto done; 1080 goto done;
1059 err = -ENOENT; 1081 err = -ENOENT;
1060 if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) 1082 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1061 goto done; 1083 goto done;
1062 err = -EPERM; 1084 err = -EPERM;
1063 if (t == netdev_priv(ipgre_fb_tunnel_dev)) 1085 if (t == netdev_priv(ign->fb_tunnel_dev))
1064 goto done; 1086 goto done;
1065 dev = t->dev; 1087 dev = t->dev;
1066 } 1088 }
@@ -1173,7 +1195,7 @@ static int ipgre_open(struct net_device *dev)
1173 .tos = RT_TOS(t->parms.iph.tos) } }, 1195 .tos = RT_TOS(t->parms.iph.tos) } },
1174 .proto = IPPROTO_GRE }; 1196 .proto = IPPROTO_GRE };
1175 struct rtable *rt; 1197 struct rtable *rt;
1176 if (ip_route_output_key(&init_net, &rt, &fl)) 1198 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1177 return -EADDRNOTAVAIL; 1199 return -EADDRNOTAVAIL;
1178 dev = rt->u.dst.dev; 1200 dev = rt->u.dst.dev;
1179 ip_rt_put(rt); 1201 ip_rt_put(rt);
@@ -1190,7 +1212,7 @@ static int ipgre_close(struct net_device *dev)
1190 struct ip_tunnel *t = netdev_priv(dev); 1212 struct ip_tunnel *t = netdev_priv(dev);
1191 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 1213 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1192 struct in_device *in_dev; 1214 struct in_device *in_dev;
1193 in_dev = inetdev_by_index(dev->nd_net, t->mlink); 1215 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1194 if (in_dev) { 1216 if (in_dev) {
1195 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1217 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1196 in_dev_put(in_dev); 1218 in_dev_put(in_dev);
@@ -1216,6 +1238,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
1216 dev->flags = IFF_NOARP; 1238 dev->flags = IFF_NOARP;
1217 dev->iflink = 0; 1239 dev->iflink = 0;
1218 dev->addr_len = 4; 1240 dev->addr_len = 4;
1241 dev->features |= NETIF_F_NETNS_LOCAL;
1219} 1242}
1220 1243
1221static int ipgre_tunnel_init(struct net_device *dev) 1244static int ipgre_tunnel_init(struct net_device *dev)
@@ -1251,10 +1274,11 @@ static int ipgre_tunnel_init(struct net_device *dev)
1251 return 0; 1274 return 0;
1252} 1275}
1253 1276
1254static int __init ipgre_fb_tunnel_init(struct net_device *dev) 1277static int ipgre_fb_tunnel_init(struct net_device *dev)
1255{ 1278{
1256 struct ip_tunnel *tunnel = netdev_priv(dev); 1279 struct ip_tunnel *tunnel = netdev_priv(dev);
1257 struct iphdr *iph = &tunnel->parms.iph; 1280 struct iphdr *iph = &tunnel->parms.iph;
1281 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1258 1282
1259 tunnel->dev = dev; 1283 tunnel->dev = dev;
1260 strcpy(tunnel->parms.name, dev->name); 1284 strcpy(tunnel->parms.name, dev->name);
@@ -1265,7 +1289,7 @@ static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1265 tunnel->hlen = sizeof(struct iphdr) + 4; 1289 tunnel->hlen = sizeof(struct iphdr) + 4;
1266 1290
1267 dev_hold(dev); 1291 dev_hold(dev);
1268 tunnels_wc[0] = tunnel; 1292 ign->tunnels_wc[0] = tunnel;
1269 return 0; 1293 return 0;
1270} 1294}
1271 1295
@@ -1273,56 +1297,98 @@ static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1273static struct net_protocol ipgre_protocol = { 1297static struct net_protocol ipgre_protocol = {
1274 .handler = ipgre_rcv, 1298 .handler = ipgre_rcv,
1275 .err_handler = ipgre_err, 1299 .err_handler = ipgre_err,
1300 .netns_ok = 1,
1276}; 1301};
1277 1302
1303static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1304{
1305 int prio;
1278 1306
1279/* 1307 for (prio = 0; prio < 4; prio++) {
1280 * And now the modules code and kernel interface. 1308 int h;
1281 */ 1309 for (h = 0; h < HASH_SIZE; h++) {
1310 struct ip_tunnel *t;
1311 while ((t = ign->tunnels[prio][h]) != NULL)
1312 unregister_netdevice(t->dev);
1313 }
1314 }
1315}
1282 1316
1283static int __init ipgre_init(void) 1317static int ipgre_init_net(struct net *net)
1284{ 1318{
1285 int err; 1319 int err;
1320 struct ipgre_net *ign;
1286 1321
1287 printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); 1322 err = -ENOMEM;
1323 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1324 if (ign == NULL)
1325 goto err_alloc;
1288 1326
1289 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { 1327 err = net_assign_generic(net, ipgre_net_id, ign);
1290 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1328 if (err < 0)
1291 return -EAGAIN; 1329 goto err_assign;
1292 }
1293 1330
1294 ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", 1331 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1295 ipgre_tunnel_setup); 1332 ipgre_tunnel_setup);
1296 if (!ipgre_fb_tunnel_dev) { 1333 if (!ign->fb_tunnel_dev) {
1297 err = -ENOMEM; 1334 err = -ENOMEM;
1298 goto err1; 1335 goto err_alloc_dev;
1299 } 1336 }
1300 1337
1301 ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init; 1338 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1339 dev_net_set(ign->fb_tunnel_dev, net);
1302 1340
1303 if ((err = register_netdev(ipgre_fb_tunnel_dev))) 1341 if ((err = register_netdev(ign->fb_tunnel_dev)))
1304 goto err2; 1342 goto err_reg_dev;
1305out: 1343
1344 return 0;
1345
1346err_reg_dev:
1347 free_netdev(ign->fb_tunnel_dev);
1348err_alloc_dev:
1349 /* nothing */
1350err_assign:
1351 kfree(ign);
1352err_alloc:
1306 return err; 1353 return err;
1307err2:
1308 free_netdev(ipgre_fb_tunnel_dev);
1309err1:
1310 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1311 goto out;
1312} 1354}
1313 1355
1314static void __exit ipgre_destroy_tunnels(void) 1356static void ipgre_exit_net(struct net *net)
1315{ 1357{
1316 int prio; 1358 struct ipgre_net *ign;
1317 1359
1318 for (prio = 0; prio < 4; prio++) { 1360 ign = net_generic(net, ipgre_net_id);
1319 int h; 1361 rtnl_lock();
1320 for (h = 0; h < HASH_SIZE; h++) { 1362 ipgre_destroy_tunnels(ign);
1321 struct ip_tunnel *t; 1363 rtnl_unlock();
1322 while ((t = tunnels[prio][h]) != NULL) 1364 kfree(ign);
1323 unregister_netdevice(t->dev); 1365}
1324 } 1366
1367static struct pernet_operations ipgre_net_ops = {
1368 .init = ipgre_init_net,
1369 .exit = ipgre_exit_net,
1370};
1371
1372/*
1373 * And now the modules code and kernel interface.
1374 */
1375
1376static int __init ipgre_init(void)
1377{
1378 int err;
1379
1380 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1381
1382 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1383 printk(KERN_INFO "ipgre init: can't add protocol\n");
1384 return -EAGAIN;
1325 } 1385 }
1386
1387 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1388 if (err < 0)
1389 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1390
1391 return err;
1326} 1392}
1327 1393
1328static void __exit ipgre_fini(void) 1394static void __exit ipgre_fini(void)
@@ -1330,9 +1396,7 @@ static void __exit ipgre_fini(void)
1330 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1396 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1331 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1397 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1332 1398
1333 rtnl_lock(); 1399 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1334 ipgre_destroy_tunnels();
1335 rtnl_unlock();
1336} 1400}
1337 1401
1338module_init(ipgre_init); 1402module_init(ipgre_init);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 65631391d479..7b4bad6d572f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -160,6 +160,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
160 struct ip_ra_chain *ra; 160 struct ip_ra_chain *ra;
161 u8 protocol = ip_hdr(skb)->protocol; 161 u8 protocol = ip_hdr(skb)->protocol;
162 struct sock *last = NULL; 162 struct sock *last = NULL;
163 struct net_device *dev = skb->dev;
163 164
164 read_lock(&ip_ra_lock); 165 read_lock(&ip_ra_lock);
165 for (ra = ip_ra_chain; ra; ra = ra->next) { 166 for (ra = ip_ra_chain; ra; ra = ra->next) {
@@ -170,7 +171,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
170 */ 171 */
171 if (sk && inet_sk(sk)->num == protocol && 172 if (sk && inet_sk(sk)->num == protocol &&
172 (!sk->sk_bound_dev_if || 173 (!sk->sk_bound_dev_if ||
173 sk->sk_bound_dev_if == skb->dev->ifindex)) { 174 sk->sk_bound_dev_if == dev->ifindex) &&
175 sock_net(sk) == dev_net(dev)) {
174 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 176 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
175 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { 177 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
176 read_unlock(&ip_ra_lock); 178 read_unlock(&ip_ra_lock);
@@ -197,6 +199,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
197 199
198static int ip_local_deliver_finish(struct sk_buff *skb) 200static int ip_local_deliver_finish(struct sk_buff *skb)
199{ 201{
202 struct net *net = dev_net(skb->dev);
203
200 __skb_pull(skb, ip_hdrlen(skb)); 204 __skb_pull(skb, ip_hdrlen(skb));
201 205
202 /* Point into the IP datagram, just past the header. */ 206 /* Point into the IP datagram, just past the header. */
@@ -212,7 +216,8 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
212 raw = raw_local_deliver(skb, protocol); 216 raw = raw_local_deliver(skb, protocol);
213 217
214 hash = protocol & (MAX_INET_PROTOS - 1); 218 hash = protocol & (MAX_INET_PROTOS - 1);
215 if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { 219 ipprot = rcu_dereference(inet_protos[hash]);
220 if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) {
216 int ret; 221 int ret;
217 222
218 if (!ipprot->no_policy) { 223 if (!ipprot->no_policy) {
@@ -283,13 +288,14 @@ static inline int ip_rcv_options(struct sk_buff *skb)
283 } 288 }
284 289
285 iph = ip_hdr(skb); 290 iph = ip_hdr(skb);
291 opt = &(IPCB(skb)->opt);
292 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
286 293
287 if (ip_options_compile(NULL, skb)) { 294 if (ip_options_compile(dev_net(dev), opt, skb)) {
288 IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); 295 IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
289 goto drop; 296 goto drop;
290 } 297 }
291 298
292 opt = &(IPCB(skb)->opt);
293 if (unlikely(opt->srr)) { 299 if (unlikely(opt->srr)) {
294 struct in_device *in_dev = in_dev_get(dev); 300 struct in_device *in_dev = in_dev_get(dev);
295 if (in_dev) { 301 if (in_dev) {
@@ -297,7 +303,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
297 if (IN_DEV_LOG_MARTIANS(in_dev) && 303 if (IN_DEV_LOG_MARTIANS(in_dev) &&
298 net_ratelimit()) 304 net_ratelimit())
299 printk(KERN_INFO "source route option " 305 printk(KERN_INFO "source route option "
300 "%u.%u.%u.%u -> %u.%u.%u.%u\n", 306 NIPQUAD_FMT " -> " NIPQUAD_FMT "\n",
301 NIPQUAD(iph->saddr), 307 NIPQUAD(iph->saddr),
302 NIPQUAD(iph->daddr)); 308 NIPQUAD(iph->daddr));
303 in_dev_put(in_dev); 309 in_dev_put(in_dev);
@@ -351,7 +357,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
351 if (iph->ihl > 5 && ip_rcv_options(skb)) 357 if (iph->ihl > 5 && ip_rcv_options(skb))
352 goto drop; 358 goto drop;
353 359
354 rt = (struct rtable*)skb->dst; 360 rt = skb->rtable;
355 if (rt->rt_type == RTN_MULTICAST) 361 if (rt->rt_type == RTN_MULTICAST)
356 IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); 362 IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
357 else if (rt->rt_type == RTN_BROADCAST) 363 else if (rt->rt_type == RTN_BROADCAST)
@@ -372,9 +378,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
372 struct iphdr *iph; 378 struct iphdr *iph;
373 u32 len; 379 u32 len;
374 380
375 if (dev->nd_net != &init_net)
376 goto drop;
377
378 /* When the interface is in promisc. mode, drop all the crap 381 /* When the interface is in promisc. mode, drop all the crap
379 * that it receives, do not try to analyse it. 382 * that it receives, do not try to analyse it.
380 */ 383 */
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4d315158fd3c..d107543d3f81 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -45,7 +45,6 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
45 memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); 45 memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
46 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); 46 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
47 opt = &(IPCB(skb)->opt); 47 opt = &(IPCB(skb)->opt);
48 opt->is_data = 0;
49 48
50 if (opt->srr) 49 if (opt->srr)
51 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4); 50 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
@@ -95,8 +94,6 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
95 94
96 memset(dopt, 0, sizeof(struct ip_options)); 95 memset(dopt, 0, sizeof(struct ip_options));
97 96
98 dopt->is_data = 1;
99
100 sopt = &(IPCB(skb)->opt); 97 sopt = &(IPCB(skb)->opt);
101 98
102 if (sopt->optlen == 0) { 99 if (sopt->optlen == 0) {
@@ -107,10 +104,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
107 sptr = skb_network_header(skb); 104 sptr = skb_network_header(skb);
108 dptr = dopt->__data; 105 dptr = dopt->__data;
109 106
110 if (skb->dst) 107 daddr = skb->rtable->rt_spec_dst;
111 daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
112 else
113 daddr = ip_hdr(skb)->daddr;
114 108
115 if (sopt->rr) { 109 if (sopt->rr) {
116 optlen = sptr[sopt->rr+1]; 110 optlen = sptr[sopt->rr+1];
@@ -151,7 +145,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
151 __be32 addr; 145 __be32 addr;
152 146
153 memcpy(&addr, sptr+soffset-1, 4); 147 memcpy(&addr, sptr+soffset-1, 4);
154 if (inet_addr_type(&init_net, addr) != RTN_LOCAL) { 148 if (inet_addr_type(dev_net(skb->dst->dev), addr) != RTN_LOCAL) {
155 dopt->ts_needtime = 1; 149 dopt->ts_needtime = 1;
156 soffset += 8; 150 soffset += 8;
157 } 151 }
@@ -254,26 +248,22 @@ void ip_options_fragment(struct sk_buff * skb)
254 * If opt == NULL, then skb->data should point to IP header. 248 * If opt == NULL, then skb->data should point to IP header.
255 */ 249 */
256 250
257int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) 251int ip_options_compile(struct net *net,
252 struct ip_options * opt, struct sk_buff * skb)
258{ 253{
259 int l; 254 int l;
260 unsigned char * iph; 255 unsigned char * iph;
261 unsigned char * optptr; 256 unsigned char * optptr;
262 int optlen; 257 int optlen;
263 unsigned char * pp_ptr = NULL; 258 unsigned char * pp_ptr = NULL;
264 struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL; 259 struct rtable *rt = NULL;
265 260
266 if (!opt) { 261 if (skb != NULL) {
267 opt = &(IPCB(skb)->opt); 262 rt = skb->rtable;
268 iph = skb_network_header(skb); 263 optptr = (unsigned char *)&(ip_hdr(skb)[1]);
269 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); 264 } else
270 optptr = iph + sizeof(struct iphdr); 265 optptr = opt->__data;
271 opt->is_data = 0; 266 iph = optptr - sizeof(struct iphdr);
272 } else {
273 optptr = opt->is_data ? opt->__data :
274 (unsigned char *)&(ip_hdr(skb)[1]);
275 iph = optptr - sizeof(struct iphdr);
276 }
277 267
278 for (l = opt->optlen; l > 0; ) { 268 for (l = opt->optlen; l > 0; ) {
279 switch (*optptr) { 269 switch (*optptr) {
@@ -400,7 +390,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
400 { 390 {
401 __be32 addr; 391 __be32 addr;
402 memcpy(&addr, &optptr[optptr[2]-1], 4); 392 memcpy(&addr, &optptr[optptr[2]-1], 4);
403 if (inet_addr_type(&init_net, addr) == RTN_UNICAST) 393 if (inet_addr_type(net, addr) == RTN_UNICAST)
404 break; 394 break;
405 if (skb) 395 if (skb)
406 timeptr = (__be32*)&optptr[optptr[2]+3]; 396 timeptr = (__be32*)&optptr[optptr[2]+3];
@@ -517,14 +507,13 @@ static struct ip_options *ip_options_get_alloc(const int optlen)
517 GFP_KERNEL); 507 GFP_KERNEL);
518} 508}
519 509
520static int ip_options_get_finish(struct ip_options **optp, 510static int ip_options_get_finish(struct net *net, struct ip_options **optp,
521 struct ip_options *opt, int optlen) 511 struct ip_options *opt, int optlen)
522{ 512{
523 while (optlen & 3) 513 while (optlen & 3)
524 opt->__data[optlen++] = IPOPT_END; 514 opt->__data[optlen++] = IPOPT_END;
525 opt->optlen = optlen; 515 opt->optlen = optlen;
526 opt->is_data = 1; 516 if (optlen && ip_options_compile(net, opt, NULL)) {
527 if (optlen && ip_options_compile(opt, NULL)) {
528 kfree(opt); 517 kfree(opt);
529 return -EINVAL; 518 return -EINVAL;
530 } 519 }
@@ -533,7 +522,8 @@ static int ip_options_get_finish(struct ip_options **optp,
533 return 0; 522 return 0;
534} 523}
535 524
536int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen) 525int ip_options_get_from_user(struct net *net, struct ip_options **optp,
526 unsigned char __user *data, int optlen)
537{ 527{
538 struct ip_options *opt = ip_options_get_alloc(optlen); 528 struct ip_options *opt = ip_options_get_alloc(optlen);
539 529
@@ -543,10 +533,11 @@ int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *dat
543 kfree(opt); 533 kfree(opt);
544 return -EFAULT; 534 return -EFAULT;
545 } 535 }
546 return ip_options_get_finish(optp, opt, optlen); 536 return ip_options_get_finish(net, optp, opt, optlen);
547} 537}
548 538
549int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) 539int ip_options_get(struct net *net, struct ip_options **optp,
540 unsigned char *data, int optlen)
550{ 541{
551 struct ip_options *opt = ip_options_get_alloc(optlen); 542 struct ip_options *opt = ip_options_get_alloc(optlen);
552 543
@@ -554,14 +545,14 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen)
554 return -ENOMEM; 545 return -ENOMEM;
555 if (optlen) 546 if (optlen)
556 memcpy(opt->__data, data, optlen); 547 memcpy(opt->__data, data, optlen);
557 return ip_options_get_finish(optp, opt, optlen); 548 return ip_options_get_finish(net, optp, opt, optlen);
558} 549}
559 550
560void ip_forward_options(struct sk_buff *skb) 551void ip_forward_options(struct sk_buff *skb)
561{ 552{
562 struct ip_options * opt = &(IPCB(skb)->opt); 553 struct ip_options * opt = &(IPCB(skb)->opt);
563 unsigned char * optptr; 554 unsigned char * optptr;
564 struct rtable *rt = (struct rtable*)skb->dst; 555 struct rtable *rt = skb->rtable;
565 unsigned char *raw = skb_network_header(skb); 556 unsigned char *raw = skb_network_header(skb);
566 557
567 if (opt->rr_needaddr) { 558 if (opt->rr_needaddr) {
@@ -609,7 +600,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
609 __be32 nexthop; 600 __be32 nexthop;
610 struct iphdr *iph = ip_hdr(skb); 601 struct iphdr *iph = ip_hdr(skb);
611 unsigned char *optptr = skb_network_header(skb) + opt->srr; 602 unsigned char *optptr = skb_network_header(skb) + opt->srr;
612 struct rtable *rt = (struct rtable*)skb->dst; 603 struct rtable *rt = skb->rtable;
613 struct rtable *rt2; 604 struct rtable *rt2;
614 int err; 605 int err;
615 606
@@ -634,13 +625,13 @@ int ip_options_rcv_srr(struct sk_buff *skb)
634 } 625 }
635 memcpy(&nexthop, &optptr[srrptr-1], 4); 626 memcpy(&nexthop, &optptr[srrptr-1], 4);
636 627
637 rt = (struct rtable*)skb->dst; 628 rt = skb->rtable;
638 skb->dst = NULL; 629 skb->rtable = NULL;
639 err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); 630 err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
640 rt2 = (struct rtable*)skb->dst; 631 rt2 = skb->rtable;
641 if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { 632 if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
642 ip_rt_put(rt2); 633 ip_rt_put(rt2);
643 skb->dst = &rt->u.dst; 634 skb->rtable = rt;
644 return -EINVAL; 635 return -EINVAL;
645 } 636 }
646 ip_rt_put(rt); 637 ip_rt_put(rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 341779e685d9..08349267ceb4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -142,7 +142,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
142 __be32 saddr, __be32 daddr, struct ip_options *opt) 142 __be32 saddr, __be32 daddr, struct ip_options *opt)
143{ 143{
144 struct inet_sock *inet = inet_sk(sk); 144 struct inet_sock *inet = inet_sk(sk);
145 struct rtable *rt = (struct rtable *)skb->dst; 145 struct rtable *rt = skb->rtable;
146 struct iphdr *iph; 146 struct iphdr *iph;
147 147
148 /* Build the IP header. */ 148 /* Build the IP header. */
@@ -240,7 +240,7 @@ static int ip_finish_output(struct sk_buff *skb)
240int ip_mc_output(struct sk_buff *skb) 240int ip_mc_output(struct sk_buff *skb)
241{ 241{
242 struct sock *sk = skb->sk; 242 struct sock *sk = skb->sk;
243 struct rtable *rt = (struct rtable*)skb->dst; 243 struct rtable *rt = skb->rtable;
244 struct net_device *dev = rt->u.dst.dev; 244 struct net_device *dev = rt->u.dst.dev;
245 245
246 /* 246 /*
@@ -321,7 +321,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
321 /* Skip all of this if the packet is already routed, 321 /* Skip all of this if the packet is already routed,
322 * f.e. by something like SCTP. 322 * f.e. by something like SCTP.
323 */ 323 */
324 rt = (struct rtable *) skb->dst; 324 rt = skb->rtable;
325 if (rt != NULL) 325 if (rt != NULL)
326 goto packet_routed; 326 goto packet_routed;
327 327
@@ -351,7 +351,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
351 * itself out. 351 * itself out.
352 */ 352 */
353 security_sk_classify_flow(sk, &fl); 353 security_sk_classify_flow(sk, &fl);
354 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) 354 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
355 goto no_route; 355 goto no_route;
356 } 356 }
357 sk_setup_caps(sk, &rt->u.dst); 357 sk_setup_caps(sk, &rt->u.dst);
@@ -441,7 +441,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
441 unsigned int mtu, hlen, left, len, ll_rs, pad; 441 unsigned int mtu, hlen, left, len, ll_rs, pad;
442 int offset; 442 int offset;
443 __be16 not_last_frag; 443 __be16 not_last_frag;
444 struct rtable *rt = (struct rtable*)skb->dst; 444 struct rtable *rt = skb->rtable;
445 int err = 0; 445 int err = 0;
446 446
447 dev = rt->u.dst.dev; 447 dev = rt->u.dst.dev;
@@ -825,7 +825,7 @@ int ip_append_data(struct sock *sk,
825 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? 825 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
826 rt->u.dst.dev->mtu : 826 rt->u.dst.dev->mtu :
827 dst_mtu(rt->u.dst.path); 827 dst_mtu(rt->u.dst.path);
828 inet->cork.rt = rt; 828 inet->cork.dst = &rt->u.dst;
829 inet->cork.length = 0; 829 inet->cork.length = 0;
830 sk->sk_sndmsg_page = NULL; 830 sk->sk_sndmsg_page = NULL;
831 sk->sk_sndmsg_off = 0; 831 sk->sk_sndmsg_off = 0;
@@ -834,7 +834,7 @@ int ip_append_data(struct sock *sk,
834 transhdrlen += exthdrlen; 834 transhdrlen += exthdrlen;
835 } 835 }
836 } else { 836 } else {
837 rt = inet->cork.rt; 837 rt = (struct rtable *)inet->cork.dst;
838 if (inet->cork.flags & IPCORK_OPT) 838 if (inet->cork.flags & IPCORK_OPT)
839 opt = inet->cork.opt; 839 opt = inet->cork.opt;
840 840
@@ -1083,7 +1083,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1083 if (skb_queue_empty(&sk->sk_write_queue)) 1083 if (skb_queue_empty(&sk->sk_write_queue))
1084 return -EINVAL; 1084 return -EINVAL;
1085 1085
1086 rt = inet->cork.rt; 1086 rt = (struct rtable *)inet->cork.dst;
1087 if (inet->cork.flags & IPCORK_OPT) 1087 if (inet->cork.flags & IPCORK_OPT)
1088 opt = inet->cork.opt; 1088 opt = inet->cork.opt;
1089 1089
@@ -1208,10 +1208,8 @@ static void ip_cork_release(struct inet_sock *inet)
1208 inet->cork.flags &= ~IPCORK_OPT; 1208 inet->cork.flags &= ~IPCORK_OPT;
1209 kfree(inet->cork.opt); 1209 kfree(inet->cork.opt);
1210 inet->cork.opt = NULL; 1210 inet->cork.opt = NULL;
1211 if (inet->cork.rt) { 1211 dst_release(inet->cork.dst);
1212 ip_rt_put(inet->cork.rt); 1212 inet->cork.dst = NULL;
1213 inet->cork.rt = NULL;
1214 }
1215} 1213}
1216 1214
1217/* 1215/*
@@ -1224,7 +1222,7 @@ int ip_push_pending_frames(struct sock *sk)
1224 struct sk_buff **tail_skb; 1222 struct sk_buff **tail_skb;
1225 struct inet_sock *inet = inet_sk(sk); 1223 struct inet_sock *inet = inet_sk(sk);
1226 struct ip_options *opt = NULL; 1224 struct ip_options *opt = NULL;
1227 struct rtable *rt = inet->cork.rt; 1225 struct rtable *rt = (struct rtable *)inet->cork.dst;
1228 struct iphdr *iph; 1226 struct iphdr *iph;
1229 __be16 df = 0; 1227 __be16 df = 0;
1230 __u8 ttl; 1228 __u8 ttl;
@@ -1357,7 +1355,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1357 } replyopts; 1355 } replyopts;
1358 struct ipcm_cookie ipc; 1356 struct ipcm_cookie ipc;
1359 __be32 daddr; 1357 __be32 daddr;
1360 struct rtable *rt = (struct rtable*)skb->dst; 1358 struct rtable *rt = skb->rtable;
1361 1359
1362 if (ip_options_echo(&replyopts.opt, skb)) 1360 if (ip_options_echo(&replyopts.opt, skb))
1363 return; 1361 return;
@@ -1384,7 +1382,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1384 .dport = tcp_hdr(skb)->source } }, 1382 .dport = tcp_hdr(skb)->source } },
1385 .proto = sk->sk_protocol }; 1383 .proto = sk->sk_protocol };
1386 security_skb_classify_flow(skb, &fl); 1384 security_skb_classify_flow(skb, &fl);
1387 if (ip_route_output_key(sk->sk_net, &rt, &fl)) 1385 if (ip_route_output_key(sock_net(sk), &rt, &fl))
1388 return; 1386 return;
1389 } 1387 }
1390 1388
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c2921d01e925..d8adfd4972e2 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -57,7 +57,7 @@
57static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) 57static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
58{ 58{
59 struct in_pktinfo info; 59 struct in_pktinfo info;
60 struct rtable *rt = (struct rtable *)skb->dst; 60 struct rtable *rt = skb->rtable;
61 61
62 info.ipi_addr.s_addr = ip_hdr(skb)->daddr; 62 info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
63 if (rt) { 63 if (rt) {
@@ -163,7 +163,7 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
163 ip_cmsg_recv_security(msg, skb); 163 ip_cmsg_recv_security(msg, skb);
164} 164}
165 165
166int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc) 166int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
167{ 167{
168 int err; 168 int err;
169 struct cmsghdr *cmsg; 169 struct cmsghdr *cmsg;
@@ -176,7 +176,7 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
176 switch (cmsg->cmsg_type) { 176 switch (cmsg->cmsg_type) {
177 case IP_RETOPTS: 177 case IP_RETOPTS:
178 err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); 178 err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
179 err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); 179 err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40);
180 if (err) 180 if (err)
181 return err; 181 return err;
182 break; 182 break;
@@ -449,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
449 struct ip_options * opt = NULL; 449 struct ip_options * opt = NULL;
450 if (optlen > 40 || optlen < 0) 450 if (optlen > 40 || optlen < 0)
451 goto e_inval; 451 goto e_inval;
452 err = ip_options_get_from_user(&opt, optval, optlen); 452 err = ip_options_get_from_user(sock_net(sk), &opt,
453 optval, optlen);
453 if (err) 454 if (err)
454 break; 455 break;
455 if (inet->is_icsk) { 456 if (inet->is_icsk) {
@@ -589,13 +590,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
589 err = 0; 590 err = 0;
590 break; 591 break;
591 } 592 }
592 dev = ip_dev_find(&init_net, mreq.imr_address.s_addr); 593 dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
593 if (dev) { 594 if (dev) {
594 mreq.imr_ifindex = dev->ifindex; 595 mreq.imr_ifindex = dev->ifindex;
595 dev_put(dev); 596 dev_put(dev);
596 } 597 }
597 } else 598 } else
598 dev = __dev_get_by_index(&init_net, mreq.imr_ifindex); 599 dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
599 600
600 601
601 err = -EADDRNOTAVAIL; 602 err = -EADDRNOTAVAIL;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 58b60b2fb011..fb53ddfea5b5 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -179,7 +179,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
179 spi, IPPROTO_COMP, AF_INET); 179 spi, IPPROTO_COMP, AF_INET);
180 if (!x) 180 if (!x)
181 return; 181 return;
182 NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n", 182 NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIPQUAD_FMT "\n",
183 spi, NIPQUAD(iph->daddr)); 183 spi, NIPQUAD(iph->daddr));
184 xfrm_state_put(x); 184 xfrm_state_put(x);
185} 185}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4824fe8996bf..0f42d1c1f690 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -292,7 +292,7 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
292 292
293 mm_segment_t oldfs = get_fs(); 293 mm_segment_t oldfs = get_fs();
294 set_fs(get_ds()); 294 set_fs(get_ds());
295 res = devinet_ioctl(cmd, (struct ifreq __user *) arg); 295 res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
296 set_fs(oldfs); 296 set_fs(oldfs);
297 return res; 297 return res;
298} 298}
@@ -376,7 +376,7 @@ static int __init ic_defaults(void)
376 */ 376 */
377 377
378 if (!ic_host_name_set) 378 if (!ic_host_name_set)
379 sprintf(init_utsname()->nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr)); 379 sprintf(init_utsname()->nodename, NIPQUAD_FMT, NIPQUAD(ic_myaddr));
380 380
381 if (root_server_addr == NONE) 381 if (root_server_addr == NONE)
382 root_server_addr = ic_servaddr; 382 root_server_addr = ic_servaddr;
@@ -389,11 +389,11 @@ static int __init ic_defaults(void)
389 else if (IN_CLASSC(ntohl(ic_myaddr))) 389 else if (IN_CLASSC(ntohl(ic_myaddr)))
390 ic_netmask = htonl(IN_CLASSC_NET); 390 ic_netmask = htonl(IN_CLASSC_NET);
391 else { 391 else {
392 printk(KERN_ERR "IP-Config: Unable to guess netmask for address %u.%u.%u.%u\n", 392 printk(KERN_ERR "IP-Config: Unable to guess netmask for address " NIPQUAD_FMT "\n",
393 NIPQUAD(ic_myaddr)); 393 NIPQUAD(ic_myaddr));
394 return -1; 394 return -1;
395 } 395 }
396 printk("IP-Config: Guessing netmask %u.%u.%u.%u\n", NIPQUAD(ic_netmask)); 396 printk("IP-Config: Guessing netmask " NIPQUAD_FMT "\n", NIPQUAD(ic_netmask));
397 } 397 }
398 398
399 return 0; 399 return 0;
@@ -434,7 +434,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
434 unsigned char *sha, *tha; /* s for "source", t for "target" */ 434 unsigned char *sha, *tha; /* s for "source", t for "target" */
435 struct ic_device *d; 435 struct ic_device *d;
436 436
437 if (dev->nd_net != &init_net) 437 if (dev_net(dev) != &init_net)
438 goto drop; 438 goto drop;
439 439
440 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 440 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -460,10 +460,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
460 if (rarp->ar_pro != htons(ETH_P_IP)) 460 if (rarp->ar_pro != htons(ETH_P_IP))
461 goto drop; 461 goto drop;
462 462
463 if (!pskb_may_pull(skb, 463 if (!pskb_may_pull(skb, arp_hdr_len(dev)))
464 sizeof(struct arphdr) +
465 (2 * dev->addr_len) +
466 (2 * 4)))
467 goto drop; 464 goto drop;
468 465
469 /* OK, it is all there and looks valid, process... */ 466 /* OK, it is all there and looks valid, process... */
@@ -857,7 +854,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
857 struct ic_device *d; 854 struct ic_device *d;
858 int len, ext_len; 855 int len, ext_len;
859 856
860 if (dev->nd_net != &init_net) 857 if (dev_net(dev) != &init_net)
861 goto drop; 858 goto drop;
862 859
863 /* Perform verifications before taking the lock. */ 860 /* Perform verifications before taking the lock. */
@@ -984,9 +981,9 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
984 ic_myaddr = b->your_ip; 981 ic_myaddr = b->your_ip;
985 ic_servaddr = server_id; 982 ic_servaddr = server_id;
986#ifdef IPCONFIG_DEBUG 983#ifdef IPCONFIG_DEBUG
987 printk("DHCP: Offered address %u.%u.%u.%u", 984 printk("DHCP: Offered address " NIPQUAD_FMT,
988 NIPQUAD(ic_myaddr)); 985 NIPQUAD(ic_myaddr));
989 printk(" by server %u.%u.%u.%u\n", 986 printk(" by server " NIPQUAD_FMT "\n",
990 NIPQUAD(ic_servaddr)); 987 NIPQUAD(ic_servaddr));
991#endif 988#endif
992 /* The DHCP indicated server address takes 989 /* The DHCP indicated server address takes
@@ -1182,11 +1179,11 @@ static int __init ic_dynamic(void)
1182 return -1; 1179 return -1;
1183 } 1180 }
1184 1181
1185 printk("IP-Config: Got %s answer from %u.%u.%u.%u, ", 1182 printk("IP-Config: Got %s answer from " NIPQUAD_FMT ", ",
1186 ((ic_got_reply & IC_RARP) ? "RARP" 1183 ((ic_got_reply & IC_RARP) ? "RARP"
1187 : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), 1184 : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"),
1188 NIPQUAD(ic_servaddr)); 1185 NIPQUAD(ic_servaddr));
1189 printk("my address is %u.%u.%u.%u\n", NIPQUAD(ic_myaddr)); 1186 printk("my address is " NIPQUAD_FMT "\n", NIPQUAD(ic_myaddr));
1190 1187
1191 return 0; 1188 return 0;
1192} 1189}
@@ -1212,12 +1209,12 @@ static int pnp_seq_show(struct seq_file *seq, void *v)
1212 for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { 1209 for (i = 0; i < CONF_NAMESERVERS_MAX; i++) {
1213 if (ic_nameservers[i] != NONE) 1210 if (ic_nameservers[i] != NONE)
1214 seq_printf(seq, 1211 seq_printf(seq,
1215 "nameserver %u.%u.%u.%u\n", 1212 "nameserver " NIPQUAD_FMT "\n",
1216 NIPQUAD(ic_nameservers[i])); 1213 NIPQUAD(ic_nameservers[i]));
1217 } 1214 }
1218 if (ic_servaddr != NONE) 1215 if (ic_servaddr != NONE)
1219 seq_printf(seq, 1216 seq_printf(seq,
1220 "bootserver %u.%u.%u.%u\n", 1217 "bootserver " NIPQUAD_FMT "\n",
1221 NIPQUAD(ic_servaddr)); 1218 NIPQUAD(ic_servaddr));
1222 return 0; 1219 return 0;
1223} 1220}
@@ -1392,13 +1389,13 @@ static int __init ip_auto_config(void)
1392 */ 1389 */
1393 printk("IP-Config: Complete:"); 1390 printk("IP-Config: Complete:");
1394 printk("\n device=%s", ic_dev->name); 1391 printk("\n device=%s", ic_dev->name);
1395 printk(", addr=%u.%u.%u.%u", NIPQUAD(ic_myaddr)); 1392 printk(", addr=" NIPQUAD_FMT, NIPQUAD(ic_myaddr));
1396 printk(", mask=%u.%u.%u.%u", NIPQUAD(ic_netmask)); 1393 printk(", mask=" NIPQUAD_FMT, NIPQUAD(ic_netmask));
1397 printk(", gw=%u.%u.%u.%u", NIPQUAD(ic_gateway)); 1394 printk(", gw=" NIPQUAD_FMT, NIPQUAD(ic_gateway));
1398 printk(",\n host=%s, domain=%s, nis-domain=%s", 1395 printk(",\n host=%s, domain=%s, nis-domain=%s",
1399 utsname()->nodename, ic_domain, utsname()->domainname); 1396 utsname()->nodename, ic_domain, utsname()->domainname);
1400 printk(",\n bootserver=%u.%u.%u.%u", NIPQUAD(ic_servaddr)); 1397 printk(",\n bootserver=" NIPQUAD_FMT, NIPQUAD(ic_servaddr));
1401 printk(", rootserver=%u.%u.%u.%u", NIPQUAD(root_server_addr)); 1398 printk(", rootserver=" NIPQUAD_FMT, NIPQUAD(root_server_addr));
1402 printk(", rootpath=%s", root_server_path); 1399 printk(", rootpath=%s", root_server_path);
1403 printk("\n"); 1400 printk("\n");
1404#endif /* !SILENT */ 1401#endif /* !SILENT */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index dbaed69de06a..149111f08e8d 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -115,49 +115,57 @@
115#include <net/ipip.h> 115#include <net/ipip.h>
116#include <net/inet_ecn.h> 116#include <net/inet_ecn.h>
117#include <net/xfrm.h> 117#include <net/xfrm.h>
118#include <net/net_namespace.h>
119#include <net/netns/generic.h>
118 120
119#define HASH_SIZE 16 121#define HASH_SIZE 16
120#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 122#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121 123
124static int ipip_net_id;
125struct ipip_net {
126 struct ip_tunnel *tunnels_r_l[HASH_SIZE];
127 struct ip_tunnel *tunnels_r[HASH_SIZE];
128 struct ip_tunnel *tunnels_l[HASH_SIZE];
129 struct ip_tunnel *tunnels_wc[1];
130 struct ip_tunnel **tunnels[4];
131
132 struct net_device *fb_tunnel_dev;
133};
134
122static int ipip_fb_tunnel_init(struct net_device *dev); 135static int ipip_fb_tunnel_init(struct net_device *dev);
123static int ipip_tunnel_init(struct net_device *dev); 136static int ipip_tunnel_init(struct net_device *dev);
124static void ipip_tunnel_setup(struct net_device *dev); 137static void ipip_tunnel_setup(struct net_device *dev);
125 138
126static struct net_device *ipip_fb_tunnel_dev;
127
128static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129static struct ip_tunnel *tunnels_r[HASH_SIZE];
130static struct ip_tunnel *tunnels_l[HASH_SIZE];
131static struct ip_tunnel *tunnels_wc[1];
132static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133
134static DEFINE_RWLOCK(ipip_lock); 139static DEFINE_RWLOCK(ipip_lock);
135 140
136static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local) 141static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
142 __be32 remote, __be32 local)
137{ 143{
138 unsigned h0 = HASH(remote); 144 unsigned h0 = HASH(remote);
139 unsigned h1 = HASH(local); 145 unsigned h1 = HASH(local);
140 struct ip_tunnel *t; 146 struct ip_tunnel *t;
147 struct ipip_net *ipn = net_generic(net, ipip_net_id);
141 148
142 for (t = tunnels_r_l[h0^h1]; t; t = t->next) { 149 for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) {
143 if (local == t->parms.iph.saddr && 150 if (local == t->parms.iph.saddr &&
144 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 151 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145 return t; 152 return t;
146 } 153 }
147 for (t = tunnels_r[h0]; t; t = t->next) { 154 for (t = ipn->tunnels_r[h0]; t; t = t->next) {
148 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 155 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149 return t; 156 return t;
150 } 157 }
151 for (t = tunnels_l[h1]; t; t = t->next) { 158 for (t = ipn->tunnels_l[h1]; t; t = t->next) {
152 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 159 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153 return t; 160 return t;
154 } 161 }
155 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) 162 if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156 return t; 163 return t;
157 return NULL; 164 return NULL;
158} 165}
159 166
160static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms) 167static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
168 struct ip_tunnel_parm *parms)
161{ 169{
162 __be32 remote = parms->iph.daddr; 170 __be32 remote = parms->iph.daddr;
163 __be32 local = parms->iph.saddr; 171 __be32 local = parms->iph.saddr;
@@ -172,19 +180,20 @@ static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
172 prio |= 1; 180 prio |= 1;
173 h ^= HASH(local); 181 h ^= HASH(local);
174 } 182 }
175 return &tunnels[prio][h]; 183 return &ipn->tunnels[prio][h];
176} 184}
177 185
178static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) 186static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
187 struct ip_tunnel *t)
179{ 188{
180 return __ipip_bucket(&t->parms); 189 return __ipip_bucket(ipn, &t->parms);
181} 190}
182 191
183static void ipip_tunnel_unlink(struct ip_tunnel *t) 192static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
184{ 193{
185 struct ip_tunnel **tp; 194 struct ip_tunnel **tp;
186 195
187 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) { 196 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
188 if (t == *tp) { 197 if (t == *tp) {
189 write_lock_bh(&ipip_lock); 198 write_lock_bh(&ipip_lock);
190 *tp = t->next; 199 *tp = t->next;
@@ -194,9 +203,9 @@ static void ipip_tunnel_unlink(struct ip_tunnel *t)
194 } 203 }
195} 204}
196 205
197static void ipip_tunnel_link(struct ip_tunnel *t) 206static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
198{ 207{
199 struct ip_tunnel **tp = ipip_bucket(t); 208 struct ip_tunnel **tp = ipip_bucket(ipn, t);
200 209
201 t->next = *tp; 210 t->next = *tp;
202 write_lock_bh(&ipip_lock); 211 write_lock_bh(&ipip_lock);
@@ -204,15 +213,17 @@ static void ipip_tunnel_link(struct ip_tunnel *t)
204 write_unlock_bh(&ipip_lock); 213 write_unlock_bh(&ipip_lock);
205} 214}
206 215
207static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create) 216static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
217 struct ip_tunnel_parm *parms, int create)
208{ 218{
209 __be32 remote = parms->iph.daddr; 219 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr; 220 __be32 local = parms->iph.saddr;
211 struct ip_tunnel *t, **tp, *nt; 221 struct ip_tunnel *t, **tp, *nt;
212 struct net_device *dev; 222 struct net_device *dev;
213 char name[IFNAMSIZ]; 223 char name[IFNAMSIZ];
224 struct ipip_net *ipn = net_generic(net, ipip_net_id);
214 225
215 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) { 226 for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
216 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 227 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217 return t; 228 return t;
218 } 229 }
@@ -228,6 +239,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
228 if (dev == NULL) 239 if (dev == NULL)
229 return NULL; 240 return NULL;
230 241
242 dev_net_set(dev, net);
243
231 if (strchr(name, '%')) { 244 if (strchr(name, '%')) {
232 if (dev_alloc_name(dev, name) < 0) 245 if (dev_alloc_name(dev, name) < 0)
233 goto failed_free; 246 goto failed_free;
@@ -241,7 +254,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
241 goto failed_free; 254 goto failed_free;
242 255
243 dev_hold(dev); 256 dev_hold(dev);
244 ipip_tunnel_link(nt); 257 ipip_tunnel_link(ipn, nt);
245 return nt; 258 return nt;
246 259
247failed_free: 260failed_free:
@@ -251,12 +264,15 @@ failed_free:
251 264
252static void ipip_tunnel_uninit(struct net_device *dev) 265static void ipip_tunnel_uninit(struct net_device *dev)
253{ 266{
254 if (dev == ipip_fb_tunnel_dev) { 267 struct net *net = dev_net(dev);
268 struct ipip_net *ipn = net_generic(net, ipip_net_id);
269
270 if (dev == ipn->fb_tunnel_dev) {
255 write_lock_bh(&ipip_lock); 271 write_lock_bh(&ipip_lock);
256 tunnels_wc[0] = NULL; 272 ipn->tunnels_wc[0] = NULL;
257 write_unlock_bh(&ipip_lock); 273 write_unlock_bh(&ipip_lock);
258 } else 274 } else
259 ipip_tunnel_unlink(netdev_priv(dev)); 275 ipip_tunnel_unlink(ipn, netdev_priv(dev));
260 dev_put(dev); 276 dev_put(dev);
261} 277}
262 278
@@ -305,7 +321,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
305 err = -ENOENT; 321 err = -ENOENT;
306 322
307 read_lock(&ipip_lock); 323 read_lock(&ipip_lock);
308 t = ipip_tunnel_lookup(iph->daddr, iph->saddr); 324 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
309 if (t == NULL || t->parms.iph.daddr == 0) 325 if (t == NULL || t->parms.iph.daddr == 0)
310 goto out; 326 goto out;
311 327
@@ -401,7 +417,7 @@ out:
401 fl.fl4_daddr = eiph->saddr; 417 fl.fl4_daddr = eiph->saddr;
402 fl.fl4_tos = RT_TOS(eiph->tos); 418 fl.fl4_tos = RT_TOS(eiph->tos);
403 fl.proto = IPPROTO_IPIP; 419 fl.proto = IPPROTO_IPIP;
404 if (ip_route_output_key(&init_net, &rt, &key)) { 420 if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) {
405 kfree_skb(skb2); 421 kfree_skb(skb2);
406 return 0; 422 return 0;
407 } 423 }
@@ -414,7 +430,7 @@ out:
414 fl.fl4_daddr = eiph->daddr; 430 fl.fl4_daddr = eiph->daddr;
415 fl.fl4_src = eiph->saddr; 431 fl.fl4_src = eiph->saddr;
416 fl.fl4_tos = eiph->tos; 432 fl.fl4_tos = eiph->tos;
417 if (ip_route_output_key(&init_net, &rt, &fl) || 433 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
418 rt->u.dst.dev->type != ARPHRD_TUNNEL) { 434 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
419 ip_rt_put(rt); 435 ip_rt_put(rt);
420 kfree_skb(skb2); 436 kfree_skb(skb2);
@@ -465,7 +481,8 @@ static int ipip_rcv(struct sk_buff *skb)
465 const struct iphdr *iph = ip_hdr(skb); 481 const struct iphdr *iph = ip_hdr(skb);
466 482
467 read_lock(&ipip_lock); 483 read_lock(&ipip_lock);
468 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { 484 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
485 iph->saddr, iph->daddr)) != NULL) {
469 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 486 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
470 read_unlock(&ipip_lock); 487 read_unlock(&ipip_lock);
471 kfree_skb(skb); 488 kfree_skb(skb);
@@ -528,7 +545,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
528 545
529 if (!dst) { 546 if (!dst) {
530 /* NBMA tunnel */ 547 /* NBMA tunnel */
531 if ((rt = (struct rtable*)skb->dst) == NULL) { 548 if ((rt = skb->rtable) == NULL) {
532 tunnel->stat.tx_fifo_errors++; 549 tunnel->stat.tx_fifo_errors++;
533 goto tx_error; 550 goto tx_error;
534 } 551 }
@@ -543,7 +560,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
543 .saddr = tiph->saddr, 560 .saddr = tiph->saddr,
544 .tos = RT_TOS(tos) } }, 561 .tos = RT_TOS(tos) } },
545 .proto = IPPROTO_IPIP }; 562 .proto = IPPROTO_IPIP };
546 if (ip_route_output_key(&init_net, &rt, &fl)) { 563 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
547 tunnel->stat.tx_carrier_errors++; 564 tunnel->stat.tx_carrier_errors++;
548 goto tx_error_icmp; 565 goto tx_error_icmp;
549 } 566 }
@@ -664,7 +681,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
664 .tos = RT_TOS(iph->tos) } }, 681 .tos = RT_TOS(iph->tos) } },
665 .proto = IPPROTO_IPIP }; 682 .proto = IPPROTO_IPIP };
666 struct rtable *rt; 683 struct rtable *rt;
667 if (!ip_route_output_key(&init_net, &rt, &fl)) { 684 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
668 tdev = rt->u.dst.dev; 685 tdev = rt->u.dst.dev;
669 ip_rt_put(rt); 686 ip_rt_put(rt);
670 } 687 }
@@ -672,7 +689,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
672 } 689 }
673 690
674 if (!tdev && tunnel->parms.link) 691 if (!tdev && tunnel->parms.link)
675 tdev = __dev_get_by_index(&init_net, tunnel->parms.link); 692 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
676 693
677 if (tdev) { 694 if (tdev) {
678 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); 695 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
@@ -687,16 +704,18 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
687 int err = 0; 704 int err = 0;
688 struct ip_tunnel_parm p; 705 struct ip_tunnel_parm p;
689 struct ip_tunnel *t; 706 struct ip_tunnel *t;
707 struct net *net = dev_net(dev);
708 struct ipip_net *ipn = net_generic(net, ipip_net_id);
690 709
691 switch (cmd) { 710 switch (cmd) {
692 case SIOCGETTUNNEL: 711 case SIOCGETTUNNEL:
693 t = NULL; 712 t = NULL;
694 if (dev == ipip_fb_tunnel_dev) { 713 if (dev == ipn->fb_tunnel_dev) {
695 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 714 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
696 err = -EFAULT; 715 err = -EFAULT;
697 break; 716 break;
698 } 717 }
699 t = ipip_tunnel_locate(&p, 0); 718 t = ipip_tunnel_locate(net, &p, 0);
700 } 719 }
701 if (t == NULL) 720 if (t == NULL)
702 t = netdev_priv(dev); 721 t = netdev_priv(dev);
@@ -722,9 +741,9 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
722 if (p.iph.ttl) 741 if (p.iph.ttl)
723 p.iph.frag_off |= htons(IP_DF); 742 p.iph.frag_off |= htons(IP_DF);
724 743
725 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL); 744 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
726 745
727 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 746 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
728 if (t != NULL) { 747 if (t != NULL) {
729 if (t->dev != dev) { 748 if (t->dev != dev) {
730 err = -EEXIST; 749 err = -EEXIST;
@@ -737,12 +756,12 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
737 break; 756 break;
738 } 757 }
739 t = netdev_priv(dev); 758 t = netdev_priv(dev);
740 ipip_tunnel_unlink(t); 759 ipip_tunnel_unlink(ipn, t);
741 t->parms.iph.saddr = p.iph.saddr; 760 t->parms.iph.saddr = p.iph.saddr;
742 t->parms.iph.daddr = p.iph.daddr; 761 t->parms.iph.daddr = p.iph.daddr;
743 memcpy(dev->dev_addr, &p.iph.saddr, 4); 762 memcpy(dev->dev_addr, &p.iph.saddr, 4);
744 memcpy(dev->broadcast, &p.iph.daddr, 4); 763 memcpy(dev->broadcast, &p.iph.daddr, 4);
745 ipip_tunnel_link(t); 764 ipip_tunnel_link(ipn, t);
746 netdev_state_change(dev); 765 netdev_state_change(dev);
747 } 766 }
748 } 767 }
@@ -770,15 +789,15 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
770 if (!capable(CAP_NET_ADMIN)) 789 if (!capable(CAP_NET_ADMIN))
771 goto done; 790 goto done;
772 791
773 if (dev == ipip_fb_tunnel_dev) { 792 if (dev == ipn->fb_tunnel_dev) {
774 err = -EFAULT; 793 err = -EFAULT;
775 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 794 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
776 goto done; 795 goto done;
777 err = -ENOENT; 796 err = -ENOENT;
778 if ((t = ipip_tunnel_locate(&p, 0)) == NULL) 797 if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
779 goto done; 798 goto done;
780 err = -EPERM; 799 err = -EPERM;
781 if (t->dev == ipip_fb_tunnel_dev) 800 if (t->dev == ipn->fb_tunnel_dev)
782 goto done; 801 goto done;
783 dev = t->dev; 802 dev = t->dev;
784 } 803 }
@@ -822,6 +841,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
822 dev->flags = IFF_NOARP; 841 dev->flags = IFF_NOARP;
823 dev->iflink = 0; 842 dev->iflink = 0;
824 dev->addr_len = 4; 843 dev->addr_len = 4;
844 dev->features |= NETIF_F_NETNS_LOCAL;
825} 845}
826 846
827static int ipip_tunnel_init(struct net_device *dev) 847static int ipip_tunnel_init(struct net_device *dev)
@@ -841,10 +861,11 @@ static int ipip_tunnel_init(struct net_device *dev)
841 return 0; 861 return 0;
842} 862}
843 863
844static int __init ipip_fb_tunnel_init(struct net_device *dev) 864static int ipip_fb_tunnel_init(struct net_device *dev)
845{ 865{
846 struct ip_tunnel *tunnel = netdev_priv(dev); 866 struct ip_tunnel *tunnel = netdev_priv(dev);
847 struct iphdr *iph = &tunnel->parms.iph; 867 struct iphdr *iph = &tunnel->parms.iph;
868 struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
848 869
849 tunnel->dev = dev; 870 tunnel->dev = dev;
850 strcpy(tunnel->parms.name, dev->name); 871 strcpy(tunnel->parms.name, dev->name);
@@ -854,7 +875,7 @@ static int __init ipip_fb_tunnel_init(struct net_device *dev)
854 iph->ihl = 5; 875 iph->ihl = 5;
855 876
856 dev_hold(dev); 877 dev_hold(dev);
857 tunnels_wc[0] = tunnel; 878 ipn->tunnels_wc[0] = tunnel;
858 return 0; 879 return 0;
859} 880}
860 881
@@ -867,50 +888,98 @@ static struct xfrm_tunnel ipip_handler = {
867static char banner[] __initdata = 888static char banner[] __initdata =
868 KERN_INFO "IPv4 over IPv4 tunneling driver\n"; 889 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
869 890
870static int __init ipip_init(void) 891static void ipip_destroy_tunnels(struct ipip_net *ipn)
892{
893 int prio;
894
895 for (prio = 1; prio < 4; prio++) {
896 int h;
897 for (h = 0; h < HASH_SIZE; h++) {
898 struct ip_tunnel *t;
899 while ((t = ipn->tunnels[prio][h]) != NULL)
900 unregister_netdevice(t->dev);
901 }
902 }
903}
904
905static int ipip_init_net(struct net *net)
871{ 906{
872 int err; 907 int err;
908 struct ipip_net *ipn;
873 909
874 printk(banner); 910 err = -ENOMEM;
911 ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL);
912 if (ipn == NULL)
913 goto err_alloc;
875 914
876 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) { 915 err = net_assign_generic(net, ipip_net_id, ipn);
877 printk(KERN_INFO "ipip init: can't register tunnel\n"); 916 if (err < 0)
878 return -EAGAIN; 917 goto err_assign;
879 }
880 918
881 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), 919 ipn->tunnels[0] = ipn->tunnels_wc;
920 ipn->tunnels[1] = ipn->tunnels_l;
921 ipn->tunnels[2] = ipn->tunnels_r;
922 ipn->tunnels[3] = ipn->tunnels_r_l;
923
924 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
882 "tunl0", 925 "tunl0",
883 ipip_tunnel_setup); 926 ipip_tunnel_setup);
884 if (!ipip_fb_tunnel_dev) { 927 if (!ipn->fb_tunnel_dev) {
885 err = -ENOMEM; 928 err = -ENOMEM;
886 goto err1; 929 goto err_alloc_dev;
887 } 930 }
888 931
889 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init; 932 ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
933 dev_net_set(ipn->fb_tunnel_dev, net);
934
935 if ((err = register_netdev(ipn->fb_tunnel_dev)))
936 goto err_reg_dev;
937
938 return 0;
890 939
891 if ((err = register_netdev(ipip_fb_tunnel_dev))) 940err_reg_dev:
892 goto err2; 941 free_netdev(ipn->fb_tunnel_dev);
893 out: 942err_alloc_dev:
943 /* nothing */
944err_assign:
945 kfree(ipn);
946err_alloc:
894 return err; 947 return err;
895 err2:
896 free_netdev(ipip_fb_tunnel_dev);
897 err1:
898 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
899 goto out;
900} 948}
901 949
902static void __exit ipip_destroy_tunnels(void) 950static void ipip_exit_net(struct net *net)
903{ 951{
904 int prio; 952 struct ipip_net *ipn;
905 953
906 for (prio = 1; prio < 4; prio++) { 954 ipn = net_generic(net, ipip_net_id);
907 int h; 955 rtnl_lock();
908 for (h = 0; h < HASH_SIZE; h++) { 956 ipip_destroy_tunnels(ipn);
909 struct ip_tunnel *t; 957 unregister_netdevice(ipn->fb_tunnel_dev);
910 while ((t = tunnels[prio][h]) != NULL) 958 rtnl_unlock();
911 unregister_netdevice(t->dev); 959 kfree(ipn);
912 } 960}
961
962static struct pernet_operations ipip_net_ops = {
963 .init = ipip_init_net,
964 .exit = ipip_exit_net,
965};
966
967static int __init ipip_init(void)
968{
969 int err;
970
971 printk(banner);
972
973 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
974 printk(KERN_INFO "ipip init: can't register tunnel\n");
975 return -EAGAIN;
913 } 976 }
977
978 err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
979 if (err)
980 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
981
982 return err;
914} 983}
915 984
916static void __exit ipip_fini(void) 985static void __exit ipip_fini(void)
@@ -918,10 +987,7 @@ static void __exit ipip_fini(void)
918 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) 987 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
919 printk(KERN_INFO "ipip close: can't deregister tunnel\n"); 988 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
920 989
921 rtnl_lock(); 990 unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
922 ipip_destroy_tunnels();
923 unregister_netdevice(ipip_fb_tunnel_dev);
924 rtnl_unlock();
925} 991}
926 992
927module_init(ipip_init); 993module_init(ipip_init);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a94f52c207a7..11700a4dcd95 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -849,7 +849,7 @@ static void mrtsock_destruct(struct sock *sk)
849{ 849{
850 rtnl_lock(); 850 rtnl_lock();
851 if (sk == mroute_socket) { 851 if (sk == mroute_socket) {
852 IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--; 852 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
853 853
854 write_lock_bh(&mrt_lock); 854 write_lock_bh(&mrt_lock);
855 mroute_socket=NULL; 855 mroute_socket=NULL;
@@ -898,7 +898,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
898 mroute_socket=sk; 898 mroute_socket=sk;
899 write_unlock_bh(&mrt_lock); 899 write_unlock_bh(&mrt_lock);
900 900
901 IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++; 901 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
902 } 902 }
903 rtnl_unlock(); 903 rtnl_unlock();
904 return ret; 904 return ret;
@@ -1089,7 +1089,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1089 struct vif_device *v; 1089 struct vif_device *v;
1090 int ct; 1090 int ct;
1091 1091
1092 if (dev->nd_net != &init_net) 1092 if (dev_net(dev) != &init_net)
1093 return NOTIFY_DONE; 1093 return NOTIFY_DONE;
1094 1094
1095 if (event != NETDEV_UNREGISTER) 1095 if (event != NETDEV_UNREGISTER)
@@ -1283,7 +1283,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1283 if (vif_table[vif].dev != skb->dev) { 1283 if (vif_table[vif].dev != skb->dev) {
1284 int true_vifi; 1284 int true_vifi;
1285 1285
1286 if (((struct rtable*)skb->dst)->fl.iif == 0) { 1286 if (skb->rtable->fl.iif == 0) {
1287 /* It is our own packet, looped back. 1287 /* It is our own packet, looped back.
1288 Very complicated situation... 1288 Very complicated situation...
1289 1289
@@ -1357,7 +1357,7 @@ dont_forward:
1357int ip_mr_input(struct sk_buff *skb) 1357int ip_mr_input(struct sk_buff *skb)
1358{ 1358{
1359 struct mfc_cache *cache; 1359 struct mfc_cache *cache;
1360 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; 1360 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1361 1361
1362 /* Packet is looped back after forward, it should not be 1362 /* Packet is looped back after forward, it should not be
1363 forwarded second time, but still can be delivered locally. 1363 forwarded second time, but still can be delivered locally.
@@ -1594,7 +1594,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1594{ 1594{
1595 int err; 1595 int err;
1596 struct mfc_cache *cache; 1596 struct mfc_cache *cache;
1597 struct rtable *rt = (struct rtable*)skb->dst; 1597 struct rtable *rt = skb->rtable;
1598 1598
1599 read_lock(&mrt_lock); 1599 read_lock(&mrt_lock);
1600 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); 1600 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 12dc0d640b6d..620e40ff79a9 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -550,7 +550,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
550 550
551 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" 551 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
552 "%u.%u.%u.%u:%u to app %s on port %u\n", 552 "%u.%u.%u.%u:%u to app %s on port %u\n",
553 __FUNCTION__, 553 __func__,
554 NIPQUAD(cp->caddr), ntohs(cp->cport), 554 NIPQUAD(cp->caddr), ntohs(cp->cport),
555 NIPQUAD(cp->vaddr), ntohs(cp->vport), 555 NIPQUAD(cp->vaddr), ntohs(cp->vport),
556 inc->name, ntohs(inc->port)); 556 inc->name, ntohs(inc->port));
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 1fa7b330b9ac..1caa2908373f 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -344,7 +344,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
344 344
345 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" 345 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
346 "%u.%u.%u.%u:%u to app %s on port %u\n", 346 "%u.%u.%u.%u:%u to app %s on port %u\n",
347 __FUNCTION__, 347 __func__,
348 NIPQUAD(cp->caddr), ntohs(cp->cport), 348 NIPQUAD(cp->caddr), ntohs(cp->cport),
349 NIPQUAD(cp->vaddr), ntohs(cp->vport), 349 NIPQUAD(cp->vaddr), ntohs(cp->vport),
350 inc->name, ntohs(inc->port)); 350 inc->name, ntohs(inc->port));
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 948378d0a755..69c56663cc9a 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -916,7 +916,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
916 if (!tinfo) 916 if (!tinfo)
917 return -ENOMEM; 917 return -ENOMEM;
918 918
919 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current)); 919 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
920 IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", 920 IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
921 sizeof(struct ip_vs_sync_conn)); 921 sizeof(struct ip_vs_sync_conn));
922 922
@@ -956,7 +956,7 @@ int stop_sync_thread(int state)
956 (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) 956 (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
957 return -ESRCH; 957 return -ESRCH;
958 958
959 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current)); 959 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
960 IP_VS_INFO("stopping sync thread %d ...\n", 960 IP_VS_INFO("stopping sync thread %d ...\n",
961 (state == IP_VS_STATE_MASTER) ? 961 (state == IP_VS_STATE_MASTER) ?
962 sync_master_pid : sync_backup_pid); 962 sync_master_pid : sync_backup_pid);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 9a904c6c0dc8..f8edacdf991d 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -182,21 +182,44 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
182 } 182 }
183 return csum; 183 return csum;
184} 184}
185
186EXPORT_SYMBOL(nf_ip_checksum); 185EXPORT_SYMBOL(nf_ip_checksum);
187 186
187static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
188 unsigned int dataoff, unsigned int len,
189 u_int8_t protocol)
190{
191 const struct iphdr *iph = ip_hdr(skb);
192 __sum16 csum = 0;
193
194 switch (skb->ip_summed) {
195 case CHECKSUM_COMPLETE:
196 if (len == skb->len - dataoff)
197 return nf_ip_checksum(skb, hook, dataoff, protocol);
198 /* fall through */
199 case CHECKSUM_NONE:
200 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
201 skb->len - dataoff, 0);
202 skb->ip_summed = CHECKSUM_NONE;
203 csum = __skb_checksum_complete_head(skb, dataoff + len);
204 if (!csum)
205 skb->ip_summed = CHECKSUM_UNNECESSARY;
206 }
207 return csum;
208}
209
188static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) 210static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
189{ 211{
190 return ip_route_output_key(&init_net, (struct rtable **)dst, fl); 212 return ip_route_output_key(&init_net, (struct rtable **)dst, fl);
191} 213}
192 214
193static const struct nf_afinfo nf_ip_afinfo = { 215static const struct nf_afinfo nf_ip_afinfo = {
194 .family = AF_INET, 216 .family = AF_INET,
195 .checksum = nf_ip_checksum, 217 .checksum = nf_ip_checksum,
196 .route = nf_ip_route, 218 .checksum_partial = nf_ip_checksum_partial,
197 .saveroute = nf_ip_saveroute, 219 .route = nf_ip_route,
198 .reroute = nf_ip_reroute, 220 .saveroute = nf_ip_saveroute,
199 .route_key_size = sizeof(struct ip_rt_info), 221 .reroute = nf_ip_reroute,
222 .route_key_size = sizeof(struct ip_rt_info),
200}; 223};
201 224
202static int ipv4_netfilter_init(void) 225static int ipv4_netfilter_init(void)
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 9a077cb24798..0c95cd5872f3 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -241,10 +241,25 @@ config NF_NAT_SNMP_BASIC
241# <expr> '&&' <expr> (6) 241# <expr> '&&' <expr> (6)
242# 242#
243# (6) Returns the result of min(/expr/, /expr/). 243# (6) Returns the result of min(/expr/, /expr/).
244config NF_NAT_PROTO_DCCP
245 tristate
246 depends on NF_NAT && NF_CT_PROTO_DCCP
247 default NF_NAT && NF_CT_PROTO_DCCP
248
244config NF_NAT_PROTO_GRE 249config NF_NAT_PROTO_GRE
245 tristate 250 tristate
246 depends on NF_NAT && NF_CT_PROTO_GRE 251 depends on NF_NAT && NF_CT_PROTO_GRE
247 252
253config NF_NAT_PROTO_UDPLITE
254 tristate
255 depends on NF_NAT && NF_CT_PROTO_UDPLITE
256 default NF_NAT && NF_CT_PROTO_UDPLITE
257
258config NF_NAT_PROTO_SCTP
259 tristate
260 default NF_NAT && NF_CT_PROTO_SCTP
261 depends on NF_NAT && NF_CT_PROTO_SCTP
262
248config NF_NAT_FTP 263config NF_NAT_FTP
249 tristate 264 tristate
250 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT 265 depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 0c7dc78a62e9..d9b92fbf5579 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -10,7 +10,7 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
10endif 10endif
11endif 11endif
12 12
13nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o 13nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
14iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o 14iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o
15 15
16# connection tracking 16# connection tracking
@@ -29,7 +29,10 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
29obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o 29obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
30 30
31# NAT protocols (nf_nat) 31# NAT protocols (nf_nat)
32obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
32obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o 33obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
34obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
35obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
33 36
34# generic IP tables 37# generic IP tables
35obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o 38obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index a7591ce344d2..03e83a65aec5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -52,14 +52,14 @@ MODULE_DESCRIPTION("arptables core");
52do { \ 52do { \
53 if (!(x)) \ 53 if (!(x)) \
54 printk("ARP_NF_ASSERT: %s:%s:%u\n", \ 54 printk("ARP_NF_ASSERT: %s:%s:%u\n", \
55 __FUNCTION__, __FILE__, __LINE__); \ 55 __func__, __FILE__, __LINE__); \
56} while(0) 56} while(0)
57#else 57#else
58#define ARP_NF_ASSERT(x) 58#define ARP_NF_ASSERT(x)
59#endif 59#endif
60 60
61static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, 61static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
62 char *hdr_addr, int len) 62 const char *hdr_addr, int len)
63{ 63{
64 int i, ret; 64 int i, ret;
65 65
@@ -80,8 +80,8 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
80 const char *outdev, 80 const char *outdev,
81 const struct arpt_arp *arpinfo) 81 const struct arpt_arp *arpinfo)
82{ 82{
83 char *arpptr = (char *)(arphdr + 1); 83 const char *arpptr = (char *)(arphdr + 1);
84 char *src_devaddr, *tgt_devaddr; 84 const char *src_devaddr, *tgt_devaddr;
85 __be32 src_ipaddr, tgt_ipaddr; 85 __be32 src_ipaddr, tgt_ipaddr;
86 int i, ret; 86 int i, ret;
87 87
@@ -222,21 +222,18 @@ unsigned int arpt_do_table(struct sk_buff *skb,
222 unsigned int hook, 222 unsigned int hook,
223 const struct net_device *in, 223 const struct net_device *in,
224 const struct net_device *out, 224 const struct net_device *out,
225 struct arpt_table *table) 225 struct xt_table *table)
226{ 226{
227 static const char nulldevname[IFNAMSIZ]; 227 static const char nulldevname[IFNAMSIZ];
228 unsigned int verdict = NF_DROP; 228 unsigned int verdict = NF_DROP;
229 struct arphdr *arp; 229 const struct arphdr *arp;
230 bool hotdrop = false; 230 bool hotdrop = false;
231 struct arpt_entry *e, *back; 231 struct arpt_entry *e, *back;
232 const char *indev, *outdev; 232 const char *indev, *outdev;
233 void *table_base; 233 void *table_base;
234 struct xt_table_info *private; 234 const struct xt_table_info *private;
235 235
236 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ 236 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
237 if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
238 (2 * skb->dev->addr_len) +
239 (2 * sizeof(u32)))))
240 return NF_DROP; 237 return NF_DROP;
241 238
242 indev = in ? in->name : nulldevname; 239 indev = in ? in->name : nulldevname;
@@ -355,7 +352,7 @@ static int mark_source_chains(struct xt_table_info *newinfo,
355 e->counters.pcnt = pos; 352 e->counters.pcnt = pos;
356 353
357 for (;;) { 354 for (;;) {
358 struct arpt_standard_target *t 355 const struct arpt_standard_target *t
359 = (void *)arpt_get_target(e); 356 = (void *)arpt_get_target(e);
360 int visited = e->comefrom & (1 << hook); 357 int visited = e->comefrom & (1 << hook);
361 358
@@ -440,7 +437,7 @@ static int mark_source_chains(struct xt_table_info *newinfo,
440 437
441static inline int check_entry(struct arpt_entry *e, const char *name) 438static inline int check_entry(struct arpt_entry *e, const char *name)
442{ 439{
443 struct arpt_entry_target *t; 440 const struct arpt_entry_target *t;
444 441
445 if (!arp_checkentry(&e->arp)) { 442 if (!arp_checkentry(&e->arp)) {
446 duprintf("arp_tables: arp check failed %p %s.\n", e, name); 443 duprintf("arp_tables: arp check failed %p %s.\n", e, name);
@@ -460,7 +457,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name)
460static inline int check_target(struct arpt_entry *e, const char *name) 457static inline int check_target(struct arpt_entry *e, const char *name)
461{ 458{
462 struct arpt_entry_target *t; 459 struct arpt_entry_target *t;
463 struct arpt_target *target; 460 struct xt_target *target;
464 int ret; 461 int ret;
465 462
466 t = arpt_get_target(e); 463 t = arpt_get_target(e);
@@ -483,7 +480,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
483 unsigned int *i) 480 unsigned int *i)
484{ 481{
485 struct arpt_entry_target *t; 482 struct arpt_entry_target *t;
486 struct arpt_target *target; 483 struct xt_target *target;
487 int ret; 484 int ret;
488 485
489 ret = check_entry(e, name); 486 ret = check_entry(e, name);
@@ -709,11 +706,11 @@ static void get_counters(const struct xt_table_info *t,
709 } 706 }
710} 707}
711 708
712static inline struct xt_counters *alloc_counters(struct arpt_table *table) 709static inline struct xt_counters *alloc_counters(struct xt_table *table)
713{ 710{
714 unsigned int countersize; 711 unsigned int countersize;
715 struct xt_counters *counters; 712 struct xt_counters *counters;
716 struct xt_table_info *private = table->private; 713 const struct xt_table_info *private = table->private;
717 714
718 /* We need atomic snapshot of counters: rest doesn't change 715 /* We need atomic snapshot of counters: rest doesn't change
719 * (other than comefrom, which userspace doesn't care 716 * (other than comefrom, which userspace doesn't care
@@ -734,7 +731,7 @@ static inline struct xt_counters *alloc_counters(struct arpt_table *table)
734} 731}
735 732
736static int copy_entries_to_user(unsigned int total_size, 733static int copy_entries_to_user(unsigned int total_size,
737 struct arpt_table *table, 734 struct xt_table *table,
738 void __user *userptr) 735 void __user *userptr)
739{ 736{
740 unsigned int off, num; 737 unsigned int off, num;
@@ -854,7 +851,7 @@ static int compat_table_info(const struct xt_table_info *info,
854static int get_info(struct net *net, void __user *user, int *len, int compat) 851static int get_info(struct net *net, void __user *user, int *len, int compat)
855{ 852{
856 char name[ARPT_TABLE_MAXNAMELEN]; 853 char name[ARPT_TABLE_MAXNAMELEN];
857 struct arpt_table *t; 854 struct xt_table *t;
858 int ret; 855 int ret;
859 856
860 if (*len != sizeof(struct arpt_getinfo)) { 857 if (*len != sizeof(struct arpt_getinfo)) {
@@ -875,7 +872,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
875 "arptable_%s", name); 872 "arptable_%s", name);
876 if (t && !IS_ERR(t)) { 873 if (t && !IS_ERR(t)) {
877 struct arpt_getinfo info; 874 struct arpt_getinfo info;
878 struct xt_table_info *private = t->private; 875 const struct xt_table_info *private = t->private;
879 876
880#ifdef CONFIG_COMPAT 877#ifdef CONFIG_COMPAT
881 if (compat) { 878 if (compat) {
@@ -914,7 +911,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
914{ 911{
915 int ret; 912 int ret;
916 struct arpt_get_entries get; 913 struct arpt_get_entries get;
917 struct arpt_table *t; 914 struct xt_table *t;
918 915
919 if (*len < sizeof(get)) { 916 if (*len < sizeof(get)) {
920 duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); 917 duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
@@ -930,7 +927,8 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
930 927
931 t = xt_find_table_lock(net, NF_ARP, get.name); 928 t = xt_find_table_lock(net, NF_ARP, get.name);
932 if (t && !IS_ERR(t)) { 929 if (t && !IS_ERR(t)) {
933 struct xt_table_info *private = t->private; 930 const struct xt_table_info *private = t->private;
931
934 duprintf("t->private->number = %u\n", 932 duprintf("t->private->number = %u\n",
935 private->number); 933 private->number);
936 if (get.size == private->size) 934 if (get.size == private->size)
@@ -939,7 +937,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
939 else { 937 else {
940 duprintf("get_entries: I've got %u not %u!\n", 938 duprintf("get_entries: I've got %u not %u!\n",
941 private->size, get.size); 939 private->size, get.size);
942 ret = -EINVAL; 940 ret = -EAGAIN;
943 } 941 }
944 module_put(t->me); 942 module_put(t->me);
945 xt_table_unlock(t); 943 xt_table_unlock(t);
@@ -956,7 +954,7 @@ static int __do_replace(struct net *net, const char *name,
956 void __user *counters_ptr) 954 void __user *counters_ptr)
957{ 955{
958 int ret; 956 int ret;
959 struct arpt_table *t; 957 struct xt_table *t;
960 struct xt_table_info *oldinfo; 958 struct xt_table_info *oldinfo;
961 struct xt_counters *counters; 959 struct xt_counters *counters;
962 void *loc_cpu_old_entry; 960 void *loc_cpu_old_entry;
@@ -1090,11 +1088,11 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1090 struct xt_counters_info tmp; 1088 struct xt_counters_info tmp;
1091 struct xt_counters *paddc; 1089 struct xt_counters *paddc;
1092 unsigned int num_counters; 1090 unsigned int num_counters;
1093 char *name; 1091 const char *name;
1094 int size; 1092 int size;
1095 void *ptmp; 1093 void *ptmp;
1096 struct arpt_table *t; 1094 struct xt_table *t;
1097 struct xt_table_info *private; 1095 const struct xt_table_info *private;
1098 int ret = 0; 1096 int ret = 0;
1099 void *loc_cpu_entry; 1097 void *loc_cpu_entry;
1100#ifdef CONFIG_COMPAT 1098#ifdef CONFIG_COMPAT
@@ -1499,11 +1497,11 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
1499 1497
1500 switch (cmd) { 1498 switch (cmd) {
1501 case ARPT_SO_SET_REPLACE: 1499 case ARPT_SO_SET_REPLACE:
1502 ret = compat_do_replace(sk->sk_net, user, len); 1500 ret = compat_do_replace(sock_net(sk), user, len);
1503 break; 1501 break;
1504 1502
1505 case ARPT_SO_SET_ADD_COUNTERS: 1503 case ARPT_SO_SET_ADD_COUNTERS:
1506 ret = do_add_counters(sk->sk_net, user, len, 1); 1504 ret = do_add_counters(sock_net(sk), user, len, 1);
1507 break; 1505 break;
1508 1506
1509 default: 1507 default:
@@ -1557,11 +1555,11 @@ out:
1557} 1555}
1558 1556
1559static int compat_copy_entries_to_user(unsigned int total_size, 1557static int compat_copy_entries_to_user(unsigned int total_size,
1560 struct arpt_table *table, 1558 struct xt_table *table,
1561 void __user *userptr) 1559 void __user *userptr)
1562{ 1560{
1563 struct xt_counters *counters; 1561 struct xt_counters *counters;
1564 struct xt_table_info *private = table->private; 1562 const struct xt_table_info *private = table->private;
1565 void __user *pos; 1563 void __user *pos;
1566 unsigned int size; 1564 unsigned int size;
1567 int ret = 0; 1565 int ret = 0;
@@ -1595,7 +1593,7 @@ static int compat_get_entries(struct net *net,
1595{ 1593{
1596 int ret; 1594 int ret;
1597 struct compat_arpt_get_entries get; 1595 struct compat_arpt_get_entries get;
1598 struct arpt_table *t; 1596 struct xt_table *t;
1599 1597
1600 if (*len < sizeof(get)) { 1598 if (*len < sizeof(get)) {
1601 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); 1599 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
@@ -1612,7 +1610,7 @@ static int compat_get_entries(struct net *net,
1612 xt_compat_lock(NF_ARP); 1610 xt_compat_lock(NF_ARP);
1613 t = xt_find_table_lock(net, NF_ARP, get.name); 1611 t = xt_find_table_lock(net, NF_ARP, get.name);
1614 if (t && !IS_ERR(t)) { 1612 if (t && !IS_ERR(t)) {
1615 struct xt_table_info *private = t->private; 1613 const struct xt_table_info *private = t->private;
1616 struct xt_table_info info; 1614 struct xt_table_info info;
1617 1615
1618 duprintf("t->private->number = %u\n", private->number); 1616 duprintf("t->private->number = %u\n", private->number);
@@ -1623,7 +1621,7 @@ static int compat_get_entries(struct net *net,
1623 } else if (!ret) { 1621 } else if (!ret) {
1624 duprintf("compat_get_entries: I've got %u not %u!\n", 1622 duprintf("compat_get_entries: I've got %u not %u!\n",
1625 private->size, get.size); 1623 private->size, get.size);
1626 ret = -EINVAL; 1624 ret = -EAGAIN;
1627 } 1625 }
1628 xt_compat_flush_offsets(NF_ARP); 1626 xt_compat_flush_offsets(NF_ARP);
1629 module_put(t->me); 1627 module_put(t->me);
@@ -1647,10 +1645,10 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
1647 1645
1648 switch (cmd) { 1646 switch (cmd) {
1649 case ARPT_SO_GET_INFO: 1647 case ARPT_SO_GET_INFO:
1650 ret = get_info(sk->sk_net, user, len, 1); 1648 ret = get_info(sock_net(sk), user, len, 1);
1651 break; 1649 break;
1652 case ARPT_SO_GET_ENTRIES: 1650 case ARPT_SO_GET_ENTRIES:
1653 ret = compat_get_entries(sk->sk_net, user, len); 1651 ret = compat_get_entries(sock_net(sk), user, len);
1654 break; 1652 break;
1655 default: 1653 default:
1656 ret = do_arpt_get_ctl(sk, cmd, user, len); 1654 ret = do_arpt_get_ctl(sk, cmd, user, len);
@@ -1668,11 +1666,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
1668 1666
1669 switch (cmd) { 1667 switch (cmd) {
1670 case ARPT_SO_SET_REPLACE: 1668 case ARPT_SO_SET_REPLACE:
1671 ret = do_replace(sk->sk_net, user, len); 1669 ret = do_replace(sock_net(sk), user, len);
1672 break; 1670 break;
1673 1671
1674 case ARPT_SO_SET_ADD_COUNTERS: 1672 case ARPT_SO_SET_ADD_COUNTERS:
1675 ret = do_add_counters(sk->sk_net, user, len, 0); 1673 ret = do_add_counters(sock_net(sk), user, len, 0);
1676 break; 1674 break;
1677 1675
1678 default: 1676 default:
@@ -1692,11 +1690,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1692 1690
1693 switch (cmd) { 1691 switch (cmd) {
1694 case ARPT_SO_GET_INFO: 1692 case ARPT_SO_GET_INFO:
1695 ret = get_info(sk->sk_net, user, len, 0); 1693 ret = get_info(sock_net(sk), user, len, 0);
1696 break; 1694 break;
1697 1695
1698 case ARPT_SO_GET_ENTRIES: 1696 case ARPT_SO_GET_ENTRIES:
1699 ret = get_entries(sk->sk_net, user, len); 1697 ret = get_entries(sock_net(sk), user, len);
1700 break; 1698 break;
1701 1699
1702 case ARPT_SO_GET_REVISION_TARGET: { 1700 case ARPT_SO_GET_REVISION_TARGET: {
@@ -1725,9 +1723,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1725 return ret; 1723 return ret;
1726} 1724}
1727 1725
1728struct arpt_table *arpt_register_table(struct net *net, 1726struct xt_table *arpt_register_table(struct net *net, struct xt_table *table,
1729 struct arpt_table *table, 1727 const struct arpt_replace *repl)
1730 const struct arpt_replace *repl)
1731{ 1728{
1732 int ret; 1729 int ret;
1733 struct xt_table_info *newinfo; 1730 struct xt_table_info *newinfo;
@@ -1769,7 +1766,7 @@ out:
1769 return ERR_PTR(ret); 1766 return ERR_PTR(ret);
1770} 1767}
1771 1768
1772void arpt_unregister_table(struct arpt_table *table) 1769void arpt_unregister_table(struct xt_table *table)
1773{ 1770{
1774 struct xt_table_info *private; 1771 struct xt_table_info *private;
1775 void *loc_cpu_entry; 1772 void *loc_cpu_entry;
@@ -1787,7 +1784,7 @@ void arpt_unregister_table(struct arpt_table *table)
1787} 1784}
1788 1785
1789/* The built-in targets: standard (NULL) and error. */ 1786/* The built-in targets: standard (NULL) and error. */
1790static struct arpt_target arpt_standard_target __read_mostly = { 1787static struct xt_target arpt_standard_target __read_mostly = {
1791 .name = ARPT_STANDARD_TARGET, 1788 .name = ARPT_STANDARD_TARGET,
1792 .targetsize = sizeof(int), 1789 .targetsize = sizeof(int),
1793 .family = NF_ARP, 1790 .family = NF_ARP,
@@ -1798,7 +1795,7 @@ static struct arpt_target arpt_standard_target __read_mostly = {
1798#endif 1795#endif
1799}; 1796};
1800 1797
1801static struct arpt_target arpt_error_target __read_mostly = { 1798static struct xt_target arpt_error_target __read_mostly = {
1802 .name = ARPT_ERROR_TARGET, 1799 .name = ARPT_ERROR_TARGET,
1803 .target = arpt_error, 1800 .target = arpt_error,
1804 .targetsize = ARPT_FUNCTION_MAXNAMELEN, 1801 .targetsize = ARPT_FUNCTION_MAXNAMELEN,
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 3f4222b0a803..a385959d2655 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -15,7 +15,7 @@ target(struct sk_buff *skb,
15 const void *targinfo) 15 const void *targinfo)
16{ 16{
17 const struct arpt_mangle *mangle = targinfo; 17 const struct arpt_mangle *mangle = targinfo;
18 struct arphdr *arp; 18 const struct arphdr *arp;
19 unsigned char *arpptr; 19 unsigned char *arpptr;
20 int pln, hln; 20 int pln, hln;
21 21
@@ -73,8 +73,9 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target,
73 return true; 73 return true;
74} 74}
75 75
76static struct arpt_target arpt_mangle_reg __read_mostly = { 76static struct xt_target arpt_mangle_reg __read_mostly = {
77 .name = "mangle", 77 .name = "mangle",
78 .family = NF_ARP,
78 .target = target, 79 .target = target,
79 .targetsize = sizeof(struct arpt_mangle), 80 .targetsize = sizeof(struct arpt_mangle),
80 .checkentry = checkentry, 81 .checkentry = checkentry,
@@ -83,15 +84,12 @@ static struct arpt_target arpt_mangle_reg __read_mostly = {
83 84
84static int __init arpt_mangle_init(void) 85static int __init arpt_mangle_init(void)
85{ 86{
86 if (arpt_register_target(&arpt_mangle_reg)) 87 return xt_register_target(&arpt_mangle_reg);
87 return -EINVAL;
88
89 return 0;
90} 88}
91 89
92static void __exit arpt_mangle_fini(void) 90static void __exit arpt_mangle_fini(void)
93{ 91{
94 arpt_unregister_target(&arpt_mangle_reg); 92 xt_unregister_target(&arpt_mangle_reg);
95} 93}
96 94
97module_init(arpt_mangle_init); 95module_init(arpt_mangle_init);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 4e9c496a30c2..3be4d07e7ed9 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -45,10 +45,10 @@ static struct
45 .term = ARPT_ERROR_INIT, 45 .term = ARPT_ERROR_INIT,
46}; 46};
47 47
48static struct arpt_table packet_filter = { 48static struct xt_table packet_filter = {
49 .name = "filter", 49 .name = "filter",
50 .valid_hooks = FILTER_VALID_HOOKS, 50 .valid_hooks = FILTER_VALID_HOOKS,
51 .lock = RW_LOCK_UNLOCKED, 51 .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
52 .private = NULL, 52 .private = NULL,
53 .me = THIS_MODULE, 53 .me = THIS_MODULE,
54 .af = NF_ARP, 54 .af = NF_ARP,
@@ -70,18 +70,21 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = {
70 .owner = THIS_MODULE, 70 .owner = THIS_MODULE,
71 .pf = NF_ARP, 71 .pf = NF_ARP,
72 .hooknum = NF_ARP_IN, 72 .hooknum = NF_ARP_IN,
73 .priority = NF_IP_PRI_FILTER,
73 }, 74 },
74 { 75 {
75 .hook = arpt_hook, 76 .hook = arpt_hook,
76 .owner = THIS_MODULE, 77 .owner = THIS_MODULE,
77 .pf = NF_ARP, 78 .pf = NF_ARP,
78 .hooknum = NF_ARP_OUT, 79 .hooknum = NF_ARP_OUT,
80 .priority = NF_IP_PRI_FILTER,
79 }, 81 },
80 { 82 {
81 .hook = arpt_hook, 83 .hook = arpt_hook,
82 .owner = THIS_MODULE, 84 .owner = THIS_MODULE,
83 .pf = NF_ARP, 85 .pf = NF_ARP,
84 .hooknum = NF_ARP_FORWARD, 86 .hooknum = NF_ARP_FORWARD,
87 .priority = NF_IP_PRI_FILTER,
85 }, 88 },
86}; 89};
87 90
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 4dc162894cb2..719be29f7506 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -481,7 +481,7 @@ ipq_rcv_dev_event(struct notifier_block *this,
481{ 481{
482 struct net_device *dev = ptr; 482 struct net_device *dev = ptr;
483 483
484 if (dev->nd_net != &init_net) 484 if (dev_net(dev) != &init_net)
485 return NOTIFY_DONE; 485 return NOTIFY_DONE;
486 486
487 /* Drop any packets associated with the downed device */ 487 /* Drop any packets associated with the downed device */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 600737f122d2..4e7c719445c2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -53,7 +53,7 @@ MODULE_DESCRIPTION("IPv4 packet filter");
53do { \ 53do { \
54 if (!(x)) \ 54 if (!(x)) \
55 printk("IP_NF_ASSERT: %s:%s:%u\n", \ 55 printk("IP_NF_ASSERT: %s:%s:%u\n", \
56 __FUNCTION__, __FILE__, __LINE__); \ 56 __func__, __FILE__, __LINE__); \
57} while(0) 57} while(0)
58#else 58#else
59#define IP_NF_ASSERT(x) 59#define IP_NF_ASSERT(x)
@@ -296,7 +296,7 @@ static void trace_packet(struct sk_buff *skb,
296 struct ipt_entry *e) 296 struct ipt_entry *e)
297{ 297{
298 void *table_base; 298 void *table_base;
299 struct ipt_entry *root; 299 const struct ipt_entry *root;
300 char *hookname, *chainname, *comment; 300 char *hookname, *chainname, *comment;
301 unsigned int rulenum = 0; 301 unsigned int rulenum = 0;
302 302
@@ -327,7 +327,7 @@ ipt_do_table(struct sk_buff *skb,
327{ 327{
328 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 328 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
329 u_int16_t offset; 329 u_int16_t offset;
330 struct iphdr *ip; 330 const struct iphdr *ip;
331 u_int16_t datalen; 331 u_int16_t datalen;
332 bool hotdrop = false; 332 bool hotdrop = false;
333 /* Initializing verdict to NF_DROP keeps gcc happy. */ 333 /* Initializing verdict to NF_DROP keeps gcc happy. */
@@ -926,7 +926,7 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
926{ 926{
927 unsigned int countersize; 927 unsigned int countersize;
928 struct xt_counters *counters; 928 struct xt_counters *counters;
929 struct xt_table_info *private = table->private; 929 const struct xt_table_info *private = table->private;
930 930
931 /* We need atomic snapshot of counters: rest doesn't change 931 /* We need atomic snapshot of counters: rest doesn't change
932 (other than comefrom, which userspace doesn't care 932 (other than comefrom, which userspace doesn't care
@@ -953,9 +953,9 @@ copy_entries_to_user(unsigned int total_size,
953 unsigned int off, num; 953 unsigned int off, num;
954 struct ipt_entry *e; 954 struct ipt_entry *e;
955 struct xt_counters *counters; 955 struct xt_counters *counters;
956 struct xt_table_info *private = table->private; 956 const struct xt_table_info *private = table->private;
957 int ret = 0; 957 int ret = 0;
958 void *loc_cpu_entry; 958 const void *loc_cpu_entry;
959 959
960 counters = alloc_counters(table); 960 counters = alloc_counters(table);
961 if (IS_ERR(counters)) 961 if (IS_ERR(counters))
@@ -975,8 +975,8 @@ copy_entries_to_user(unsigned int total_size,
975 /* ... then go back and fix counters and names */ 975 /* ... then go back and fix counters and names */
976 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 976 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
977 unsigned int i; 977 unsigned int i;
978 struct ipt_entry_match *m; 978 const struct ipt_entry_match *m;
979 struct ipt_entry_target *t; 979 const struct ipt_entry_target *t;
980 980
981 e = (struct ipt_entry *)(loc_cpu_entry + off); 981 e = (struct ipt_entry *)(loc_cpu_entry + off);
982 if (copy_to_user(userptr + off 982 if (copy_to_user(userptr + off
@@ -1116,7 +1116,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
1116 "iptable_%s", name); 1116 "iptable_%s", name);
1117 if (t && !IS_ERR(t)) { 1117 if (t && !IS_ERR(t)) {
1118 struct ipt_getinfo info; 1118 struct ipt_getinfo info;
1119 struct xt_table_info *private = t->private; 1119 const struct xt_table_info *private = t->private;
1120 1120
1121#ifdef CONFIG_COMPAT 1121#ifdef CONFIG_COMPAT
1122 if (compat) { 1122 if (compat) {
@@ -1172,7 +1172,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1172 1172
1173 t = xt_find_table_lock(net, AF_INET, get.name); 1173 t = xt_find_table_lock(net, AF_INET, get.name);
1174 if (t && !IS_ERR(t)) { 1174 if (t && !IS_ERR(t)) {
1175 struct xt_table_info *private = t->private; 1175 const struct xt_table_info *private = t->private;
1176 duprintf("t->private->number = %u\n", private->number); 1176 duprintf("t->private->number = %u\n", private->number);
1177 if (get.size == private->size) 1177 if (get.size == private->size)
1178 ret = copy_entries_to_user(private->size, 1178 ret = copy_entries_to_user(private->size,
@@ -1180,7 +1180,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1180 else { 1180 else {
1181 duprintf("get_entries: I've got %u not %u!\n", 1181 duprintf("get_entries: I've got %u not %u!\n",
1182 private->size, get.size); 1182 private->size, get.size);
1183 ret = -EINVAL; 1183 ret = -EAGAIN;
1184 } 1184 }
1185 module_put(t->me); 1185 module_put(t->me);
1186 xt_table_unlock(t); 1186 xt_table_unlock(t);
@@ -1337,11 +1337,11 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
1337 struct xt_counters_info tmp; 1337 struct xt_counters_info tmp;
1338 struct xt_counters *paddc; 1338 struct xt_counters *paddc;
1339 unsigned int num_counters; 1339 unsigned int num_counters;
1340 char *name; 1340 const char *name;
1341 int size; 1341 int size;
1342 void *ptmp; 1342 void *ptmp;
1343 struct xt_table *t; 1343 struct xt_table *t;
1344 struct xt_table_info *private; 1344 const struct xt_table_info *private;
1345 int ret = 0; 1345 int ret = 0;
1346 void *loc_cpu_entry; 1346 void *loc_cpu_entry;
1347#ifdef CONFIG_COMPAT 1347#ifdef CONFIG_COMPAT
@@ -1852,11 +1852,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1852 1852
1853 switch (cmd) { 1853 switch (cmd) {
1854 case IPT_SO_SET_REPLACE: 1854 case IPT_SO_SET_REPLACE:
1855 ret = compat_do_replace(sk->sk_net, user, len); 1855 ret = compat_do_replace(sock_net(sk), user, len);
1856 break; 1856 break;
1857 1857
1858 case IPT_SO_SET_ADD_COUNTERS: 1858 case IPT_SO_SET_ADD_COUNTERS:
1859 ret = do_add_counters(sk->sk_net, user, len, 1); 1859 ret = do_add_counters(sock_net(sk), user, len, 1);
1860 break; 1860 break;
1861 1861
1862 default: 1862 default:
@@ -1878,11 +1878,11 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1878 void __user *userptr) 1878 void __user *userptr)
1879{ 1879{
1880 struct xt_counters *counters; 1880 struct xt_counters *counters;
1881 struct xt_table_info *private = table->private; 1881 const struct xt_table_info *private = table->private;
1882 void __user *pos; 1882 void __user *pos;
1883 unsigned int size; 1883 unsigned int size;
1884 int ret = 0; 1884 int ret = 0;
1885 void *loc_cpu_entry; 1885 const void *loc_cpu_entry;
1886 unsigned int i = 0; 1886 unsigned int i = 0;
1887 1887
1888 counters = alloc_counters(table); 1888 counters = alloc_counters(table);
@@ -1929,7 +1929,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1929 xt_compat_lock(AF_INET); 1929 xt_compat_lock(AF_INET);
1930 t = xt_find_table_lock(net, AF_INET, get.name); 1930 t = xt_find_table_lock(net, AF_INET, get.name);
1931 if (t && !IS_ERR(t)) { 1931 if (t && !IS_ERR(t)) {
1932 struct xt_table_info *private = t->private; 1932 const struct xt_table_info *private = t->private;
1933 struct xt_table_info info; 1933 struct xt_table_info info;
1934 duprintf("t->private->number = %u\n", private->number); 1934 duprintf("t->private->number = %u\n", private->number);
1935 ret = compat_table_info(private, &info); 1935 ret = compat_table_info(private, &info);
@@ -1939,7 +1939,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1939 } else if (!ret) { 1939 } else if (!ret) {
1940 duprintf("compat_get_entries: I've got %u not %u!\n", 1940 duprintf("compat_get_entries: I've got %u not %u!\n",
1941 private->size, get.size); 1941 private->size, get.size);
1942 ret = -EINVAL; 1942 ret = -EAGAIN;
1943 } 1943 }
1944 xt_compat_flush_offsets(AF_INET); 1944 xt_compat_flush_offsets(AF_INET);
1945 module_put(t->me); 1945 module_put(t->me);
@@ -1963,10 +1963,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1963 1963
1964 switch (cmd) { 1964 switch (cmd) {
1965 case IPT_SO_GET_INFO: 1965 case IPT_SO_GET_INFO:
1966 ret = get_info(sk->sk_net, user, len, 1); 1966 ret = get_info(sock_net(sk), user, len, 1);
1967 break; 1967 break;
1968 case IPT_SO_GET_ENTRIES: 1968 case IPT_SO_GET_ENTRIES:
1969 ret = compat_get_entries(sk->sk_net, user, len); 1969 ret = compat_get_entries(sock_net(sk), user, len);
1970 break; 1970 break;
1971 default: 1971 default:
1972 ret = do_ipt_get_ctl(sk, cmd, user, len); 1972 ret = do_ipt_get_ctl(sk, cmd, user, len);
@@ -1985,11 +1985,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1985 1985
1986 switch (cmd) { 1986 switch (cmd) {
1987 case IPT_SO_SET_REPLACE: 1987 case IPT_SO_SET_REPLACE:
1988 ret = do_replace(sk->sk_net, user, len); 1988 ret = do_replace(sock_net(sk), user, len);
1989 break; 1989 break;
1990 1990
1991 case IPT_SO_SET_ADD_COUNTERS: 1991 case IPT_SO_SET_ADD_COUNTERS:
1992 ret = do_add_counters(sk->sk_net, user, len, 0); 1992 ret = do_add_counters(sock_net(sk), user, len, 0);
1993 break; 1993 break;
1994 1994
1995 default: 1995 default:
@@ -2010,11 +2010,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2010 2010
2011 switch (cmd) { 2011 switch (cmd) {
2012 case IPT_SO_GET_INFO: 2012 case IPT_SO_GET_INFO:
2013 ret = get_info(sk->sk_net, user, len, 0); 2013 ret = get_info(sock_net(sk), user, len, 0);
2014 break; 2014 break;
2015 2015
2016 case IPT_SO_GET_ENTRIES: 2016 case IPT_SO_GET_ENTRIES:
2017 ret = get_entries(sk->sk_net, user, len); 2017 ret = get_entries(sock_net(sk), user, len);
2018 break; 2018 break;
2019 2019
2020 case IPT_SO_GET_REVISION_MATCH: 2020 case IPT_SO_GET_REVISION_MATCH:
@@ -2130,7 +2130,8 @@ icmp_match(const struct sk_buff *skb,
2130 unsigned int protoff, 2130 unsigned int protoff,
2131 bool *hotdrop) 2131 bool *hotdrop)
2132{ 2132{
2133 struct icmphdr _icmph, *ic; 2133 const struct icmphdr *ic;
2134 struct icmphdr _icmph;
2134 const struct ipt_icmp *icmpinfo = matchinfo; 2135 const struct ipt_icmp *icmpinfo = matchinfo;
2135 2136
2136 /* Must not be a fragment. */ 2137 /* Must not be a fragment. */
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a12dd329e208..22d8e7cd9197 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -144,7 +144,7 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
144} 144}
145 145
146static struct clusterip_config * 146static struct clusterip_config *
147clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, 147clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
148 struct net_device *dev) 148 struct net_device *dev)
149{ 149{
150 struct clusterip_config *c; 150 struct clusterip_config *c;
@@ -333,7 +333,7 @@ clusterip_tg(struct sk_buff *skb, const struct net_device *in,
333 } 333 }
334 334
335#ifdef DEBUG 335#ifdef DEBUG
336 DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 336 nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
337#endif 337#endif
338 pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); 338 pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
339 if (!clusterip_responsible(cipinfo->config, hash)) { 339 if (!clusterip_responsible(cipinfo->config, hash)) {
@@ -418,7 +418,7 @@ clusterip_tg_check(const char *tablename, const void *e_void,
418/* drop reference count of cluster config when rule is deleted */ 418/* drop reference count of cluster config when rule is deleted */
419static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) 419static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo)
420{ 420{
421 struct ipt_clusterip_tgt_info *cipinfo = targinfo; 421 const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
422 422
423 /* if no more entries are referencing the config, remove it 423 /* if no more entries are referencing the config, remove it
424 * from the list and destroy the proc entry */ 424 * from the list and destroy the proc entry */
@@ -567,7 +567,7 @@ struct clusterip_seq_position {
567 567
568static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) 568static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
569{ 569{
570 struct proc_dir_entry *pde = s->private; 570 const struct proc_dir_entry *pde = s->private;
571 struct clusterip_config *c = pde->data; 571 struct clusterip_config *c = pde->data;
572 unsigned int weight; 572 unsigned int weight;
573 u_int32_t local_nodes; 573 u_int32_t local_nodes;
@@ -594,7 +594,7 @@ static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
594 594
595static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) 595static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
596{ 596{
597 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; 597 struct clusterip_seq_position *idx = v;
598 598
599 *pos = ++idx->pos; 599 *pos = ++idx->pos;
600 if (*pos >= idx->weight) { 600 if (*pos >= idx->weight) {
@@ -613,7 +613,7 @@ static void clusterip_seq_stop(struct seq_file *s, void *v)
613 613
614static int clusterip_seq_show(struct seq_file *s, void *v) 614static int clusterip_seq_show(struct seq_file *s, void *v)
615{ 615{
616 struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; 616 struct clusterip_seq_position *idx = v;
617 617
618 if (idx->pos != 0) 618 if (idx->pos != 0)
619 seq_putc(s, ','); 619 seq_putc(s, ',');
@@ -669,7 +669,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
669{ 669{
670#define PROC_WRITELEN 10 670#define PROC_WRITELEN 10
671 char buffer[PROC_WRITELEN+1]; 671 char buffer[PROC_WRITELEN+1];
672 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 672 const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
673 struct clusterip_config *c = pde->data; 673 struct clusterip_config *c = pde->data;
674 unsigned long nodenum; 674 unsigned long nodenum;
675 675
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 21395bc2b27f..d60139c134ca 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -100,7 +100,7 @@ ecn_tg_check(const char *tablename, const void *e_void,
100 const struct xt_target *target, void *targinfo, 100 const struct xt_target *target, void *targinfo,
101 unsigned int hook_mask) 101 unsigned int hook_mask)
102{ 102{
103 const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; 103 const struct ipt_ECN_info *einfo = targinfo;
104 const struct ipt_entry *e = e_void; 104 const struct ipt_entry *e = e_void;
105 105
106 if (einfo->operation & IPT_ECN_OP_MASK) { 106 if (einfo->operation & IPT_ECN_OP_MASK) {
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b38d7850f506..0af14137137b 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -76,7 +76,8 @@ static void dump_packet(const struct nf_loginfo *info,
76 76
77 if ((logflags & IPT_LOG_IPOPT) 77 if ((logflags & IPT_LOG_IPOPT)
78 && ih->ihl * 4 > sizeof(struct iphdr)) { 78 && ih->ihl * 4 > sizeof(struct iphdr)) {
79 unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op; 79 const unsigned char *op;
80 unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
80 unsigned int i, optsize; 81 unsigned int i, optsize;
81 82
82 optsize = ih->ihl * 4 - sizeof(struct iphdr); 83 optsize = ih->ihl * 4 - sizeof(struct iphdr);
@@ -338,12 +339,16 @@ static void dump_packet(const struct nf_loginfo *info,
338 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { 339 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
339 read_lock_bh(&skb->sk->sk_callback_lock); 340 read_lock_bh(&skb->sk->sk_callback_lock);
340 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 341 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
341 printk("UID=%u GID=%u", 342 printk("UID=%u GID=%u ",
342 skb->sk->sk_socket->file->f_uid, 343 skb->sk->sk_socket->file->f_uid,
343 skb->sk->sk_socket->file->f_gid); 344 skb->sk->sk_socket->file->f_gid);
344 read_unlock_bh(&skb->sk->sk_callback_lock); 345 read_unlock_bh(&skb->sk->sk_callback_lock);
345 } 346 }
346 347
348 /* Max length: 16 "MARK=0xFFFFFFFF " */
349 if (!iphoff && skb->mark)
350 printk("MARK=0x%x ", skb->mark);
351
347 /* Proto Max log string length */ 352 /* Proto Max log string length */
348 /* IP: 40+46+6+11+127 = 230 */ 353 /* IP: 40+46+6+11+127 = 230 */
349 /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ 354 /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d80fee8327e4..84c26dd27d81 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -77,7 +77,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in,
77 return NF_ACCEPT; 77 return NF_ACCEPT;
78 78
79 mr = targinfo; 79 mr = targinfo;
80 rt = (struct rtable *)skb->dst; 80 rt = skb->rtable;
81 newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); 81 newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
82 if (!newsrc) { 82 if (!newsrc) {
83 printk("MASQUERADE: %s ate my IP address\n", out->name); 83 printk("MASQUERADE: %s ate my IP address\n", out->name);
@@ -120,7 +120,7 @@ static int masq_device_event(struct notifier_block *this,
120{ 120{
121 const struct net_device *dev = ptr; 121 const struct net_device *dev = ptr;
122 122
123 if (dev->nd_net != &init_net) 123 if (dev_net(dev) != &init_net)
124 return NOTIFY_DONE; 124 return NOTIFY_DONE;
125 125
126 if (event == NETDEV_DOWN) { 126 if (event == NETDEV_DOWN) {
@@ -139,18 +139,8 @@ static int masq_inet_event(struct notifier_block *this,
139 unsigned long event, 139 unsigned long event,
140 void *ptr) 140 void *ptr)
141{ 141{
142 const struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; 142 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
143 143 return masq_device_event(this, event, dev);
144 if (event == NETDEV_DOWN) {
145 /* IP address was deleted. Search entire table for
146 conntracks which were associated with that device,
147 and forget them. */
148 NF_CT_ASSERT(dev->ifindex != 0);
149
150 nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
151 }
152
153 return NOTIFY_DONE;
154} 144}
155 145
156static struct notifier_block masq_dev_notifier = { 146static struct notifier_block masq_dev_notifier = {
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 22606e2baa16..2639872849da 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -35,8 +35,10 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
35static void send_reset(struct sk_buff *oldskb, int hook) 35static void send_reset(struct sk_buff *oldskb, int hook)
36{ 36{
37 struct sk_buff *nskb; 37 struct sk_buff *nskb;
38 struct iphdr *oiph, *niph; 38 const struct iphdr *oiph;
39 struct tcphdr _otcph, *oth, *tcph; 39 struct iphdr *niph;
40 const struct tcphdr *oth;
41 struct tcphdr _otcph, *tcph;
40 unsigned int addr_type; 42 unsigned int addr_type;
41 43
42 /* IP header checks: fragment. */ 44 /* IP header checks: fragment. */
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 50e06690eb5b..21cb053f5d7d 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -340,7 +340,7 @@ static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
340static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) 340static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
341{ 341{
342 struct recent_iter_state *st = seq->private; 342 struct recent_iter_state *st = seq->private;
343 struct recent_table *t = st->table; 343 const struct recent_table *t = st->table;
344 struct recent_entry *e = v; 344 struct recent_entry *e = v;
345 struct list_head *head = e->list.next; 345 struct list_head *head = e->list.next;
346 346
@@ -361,7 +361,7 @@ static void recent_seq_stop(struct seq_file *s, void *v)
361 361
362static int recent_seq_show(struct seq_file *seq, void *v) 362static int recent_seq_show(struct seq_file *seq, void *v)
363{ 363{
364 struct recent_entry *e = v; 364 const struct recent_entry *e = v;
365 unsigned int i; 365 unsigned int i;
366 366
367 i = (e->index - 1) % ip_pkt_list_tot; 367 i = (e->index - 1) % ip_pkt_list_tot;
@@ -396,7 +396,7 @@ static int recent_seq_open(struct inode *inode, struct file *file)
396static ssize_t recent_proc_write(struct file *file, const char __user *input, 396static ssize_t recent_proc_write(struct file *file, const char __user *input,
397 size_t size, loff_t *loff) 397 size_t size, loff_t *loff)
398{ 398{
399 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 399 const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
400 struct recent_table *t = pde->data; 400 struct recent_table *t = pde->data;
401 struct recent_entry *e; 401 struct recent_entry *e;
402 char buf[sizeof("+255.255.255.255")], *c = buf; 402 char buf[sizeof("+255.255.255.255")], *c = buf;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 69f3d7e6e96f..1ea677dcf845 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -56,20 +56,32 @@ static struct
56static struct xt_table packet_filter = { 56static struct xt_table packet_filter = {
57 .name = "filter", 57 .name = "filter",
58 .valid_hooks = FILTER_VALID_HOOKS, 58 .valid_hooks = FILTER_VALID_HOOKS,
59 .lock = RW_LOCK_UNLOCKED, 59 .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
60 .me = THIS_MODULE, 60 .me = THIS_MODULE,
61 .af = AF_INET, 61 .af = AF_INET,
62}; 62};
63 63
64/* The work comes in here from netfilter.c. */ 64/* The work comes in here from netfilter.c. */
65static unsigned int 65static unsigned int
66ipt_local_in_hook(unsigned int hook,
67 struct sk_buff *skb,
68 const struct net_device *in,
69 const struct net_device *out,
70 int (*okfn)(struct sk_buff *))
71{
72 return ipt_do_table(skb, hook, in, out,
73 nf_local_in_net(in, out)->ipv4.iptable_filter);
74}
75
76static unsigned int
66ipt_hook(unsigned int hook, 77ipt_hook(unsigned int hook,
67 struct sk_buff *skb, 78 struct sk_buff *skb,
68 const struct net_device *in, 79 const struct net_device *in,
69 const struct net_device *out, 80 const struct net_device *out,
70 int (*okfn)(struct sk_buff *)) 81 int (*okfn)(struct sk_buff *))
71{ 82{
72 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); 83 return ipt_do_table(skb, hook, in, out,
84 nf_forward_net(in, out)->ipv4.iptable_filter);
73} 85}
74 86
75static unsigned int 87static unsigned int
@@ -88,12 +100,13 @@ ipt_local_out_hook(unsigned int hook,
88 return NF_ACCEPT; 100 return NF_ACCEPT;
89 } 101 }
90 102
91 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); 103 return ipt_do_table(skb, hook, in, out,
104 nf_local_out_net(in, out)->ipv4.iptable_filter);
92} 105}
93 106
94static struct nf_hook_ops ipt_ops[] __read_mostly = { 107static struct nf_hook_ops ipt_ops[] __read_mostly = {
95 { 108 {
96 .hook = ipt_hook, 109 .hook = ipt_local_in_hook,
97 .owner = THIS_MODULE, 110 .owner = THIS_MODULE,
98 .pf = PF_INET, 111 .pf = PF_INET,
99 .hooknum = NF_INET_LOCAL_IN, 112 .hooknum = NF_INET_LOCAL_IN,
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index c55a210853a7..da59182f2226 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -67,20 +67,54 @@ static struct
67static struct xt_table packet_mangler = { 67static struct xt_table packet_mangler = {
68 .name = "mangle", 68 .name = "mangle",
69 .valid_hooks = MANGLE_VALID_HOOKS, 69 .valid_hooks = MANGLE_VALID_HOOKS,
70 .lock = RW_LOCK_UNLOCKED, 70 .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
71 .me = THIS_MODULE, 71 .me = THIS_MODULE,
72 .af = AF_INET, 72 .af = AF_INET,
73}; 73};
74 74
75/* The work comes in here from netfilter.c. */ 75/* The work comes in here from netfilter.c. */
76static unsigned int 76static unsigned int
77ipt_route_hook(unsigned int hook, 77ipt_pre_routing_hook(unsigned int hook,
78 struct sk_buff *skb,
79 const struct net_device *in,
80 const struct net_device *out,
81 int (*okfn)(struct sk_buff *))
82{
83 return ipt_do_table(skb, hook, in, out,
84 nf_pre_routing_net(in, out)->ipv4.iptable_mangle);
85}
86
87static unsigned int
88ipt_post_routing_hook(unsigned int hook,
89 struct sk_buff *skb,
90 const struct net_device *in,
91 const struct net_device *out,
92 int (*okfn)(struct sk_buff *))
93{
94 return ipt_do_table(skb, hook, in, out,
95 nf_post_routing_net(in, out)->ipv4.iptable_mangle);
96}
97
98static unsigned int
99ipt_local_in_hook(unsigned int hook,
100 struct sk_buff *skb,
101 const struct net_device *in,
102 const struct net_device *out,
103 int (*okfn)(struct sk_buff *))
104{
105 return ipt_do_table(skb, hook, in, out,
106 nf_local_in_net(in, out)->ipv4.iptable_mangle);
107}
108
109static unsigned int
110ipt_forward_hook(unsigned int hook,
78 struct sk_buff *skb, 111 struct sk_buff *skb,
79 const struct net_device *in, 112 const struct net_device *in,
80 const struct net_device *out, 113 const struct net_device *out,
81 int (*okfn)(struct sk_buff *)) 114 int (*okfn)(struct sk_buff *))
82{ 115{
83 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); 116 return ipt_do_table(skb, hook, in, out,
117 nf_forward_net(in, out)->ipv4.iptable_mangle);
84} 118}
85 119
86static unsigned int 120static unsigned int
@@ -112,7 +146,8 @@ ipt_local_hook(unsigned int hook,
112 daddr = iph->daddr; 146 daddr = iph->daddr;
113 tos = iph->tos; 147 tos = iph->tos;
114 148
115 ret = ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); 149 ret = ipt_do_table(skb, hook, in, out,
150 nf_local_out_net(in, out)->ipv4.iptable_mangle);
116 /* Reroute for ANY change. */ 151 /* Reroute for ANY change. */
117 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { 152 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
118 iph = ip_hdr(skb); 153 iph = ip_hdr(skb);
@@ -130,21 +165,21 @@ ipt_local_hook(unsigned int hook,
130 165
131static struct nf_hook_ops ipt_ops[] __read_mostly = { 166static struct nf_hook_ops ipt_ops[] __read_mostly = {
132 { 167 {
133 .hook = ipt_route_hook, 168 .hook = ipt_pre_routing_hook,
134 .owner = THIS_MODULE, 169 .owner = THIS_MODULE,
135 .pf = PF_INET, 170 .pf = PF_INET,
136 .hooknum = NF_INET_PRE_ROUTING, 171 .hooknum = NF_INET_PRE_ROUTING,
137 .priority = NF_IP_PRI_MANGLE, 172 .priority = NF_IP_PRI_MANGLE,
138 }, 173 },
139 { 174 {
140 .hook = ipt_route_hook, 175 .hook = ipt_local_in_hook,
141 .owner = THIS_MODULE, 176 .owner = THIS_MODULE,
142 .pf = PF_INET, 177 .pf = PF_INET,
143 .hooknum = NF_INET_LOCAL_IN, 178 .hooknum = NF_INET_LOCAL_IN,
144 .priority = NF_IP_PRI_MANGLE, 179 .priority = NF_IP_PRI_MANGLE,
145 }, 180 },
146 { 181 {
147 .hook = ipt_route_hook, 182 .hook = ipt_forward_hook,
148 .owner = THIS_MODULE, 183 .owner = THIS_MODULE,
149 .pf = PF_INET, 184 .pf = PF_INET,
150 .hooknum = NF_INET_FORWARD, 185 .hooknum = NF_INET_FORWARD,
@@ -158,7 +193,7 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
158 .priority = NF_IP_PRI_MANGLE, 193 .priority = NF_IP_PRI_MANGLE,
159 }, 194 },
160 { 195 {
161 .hook = ipt_route_hook, 196 .hook = ipt_post_routing_hook,
162 .owner = THIS_MODULE, 197 .owner = THIS_MODULE,
163 .pf = PF_INET, 198 .pf = PF_INET,
164 .hooknum = NF_INET_POST_ROUTING, 199 .hooknum = NF_INET_POST_ROUTING,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index e41fe8ca4e1c..fddce7754b72 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -39,7 +39,7 @@ static struct
39static struct xt_table packet_raw = { 39static struct xt_table packet_raw = {
40 .name = "raw", 40 .name = "raw",
41 .valid_hooks = RAW_VALID_HOOKS, 41 .valid_hooks = RAW_VALID_HOOKS,
42 .lock = RW_LOCK_UNLOCKED, 42 .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
43 .me = THIS_MODULE, 43 .me = THIS_MODULE,
44 .af = AF_INET, 44 .af = AF_INET,
45}; 45};
@@ -52,7 +52,8 @@ ipt_hook(unsigned int hook,
52 const struct net_device *out, 52 const struct net_device *out,
53 int (*okfn)(struct sk_buff *)) 53 int (*okfn)(struct sk_buff *))
54{ 54{
55 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); 55 return ipt_do_table(skb, hook, in, out,
56 nf_pre_routing_net(in, out)->ipv4.iptable_raw);
56} 57}
57 58
58static unsigned int 59static unsigned int
@@ -70,7 +71,8 @@ ipt_local_hook(unsigned int hook,
70 "packet.\n"); 71 "packet.\n");
71 return NF_ACCEPT; 72 return NF_ACCEPT;
72 } 73 }
73 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); 74 return ipt_do_table(skb, hook, in, out,
75 nf_local_out_net(in, out)->ipv4.iptable_raw);
74} 76}
75 77
76/* 'raw' is the very first table. */ 78/* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index a65b845c5f15..cacb9cb27dab 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -23,30 +23,36 @@
23#include <net/netfilter/nf_conntrack_l3proto.h> 23#include <net/netfilter/nf_conntrack_l3proto.h>
24#include <net/netfilter/nf_conntrack_core.h> 24#include <net/netfilter/nf_conntrack_core.h>
25#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 25#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
26#include <net/netfilter/nf_nat_helper.h>
26 27
27static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 28int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
28 struct nf_conntrack_tuple *tuple) 29 struct nf_conn *ct,
30 enum ip_conntrack_info ctinfo);
31EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
32
33static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
34 struct nf_conntrack_tuple *tuple)
29{ 35{
30 const __be32 *ap; 36 const __be32 *ap;
31 __be32 _addrs[2]; 37 __be32 _addrs[2];
32 ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), 38 ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
33 sizeof(u_int32_t) * 2, _addrs); 39 sizeof(u_int32_t) * 2, _addrs);
34 if (ap == NULL) 40 if (ap == NULL)
35 return 0; 41 return false;
36 42
37 tuple->src.u3.ip = ap[0]; 43 tuple->src.u3.ip = ap[0];
38 tuple->dst.u3.ip = ap[1]; 44 tuple->dst.u3.ip = ap[1];
39 45
40 return 1; 46 return true;
41} 47}
42 48
43static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, 49static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
44 const struct nf_conntrack_tuple *orig) 50 const struct nf_conntrack_tuple *orig)
45{ 51{
46 tuple->src.u3.ip = orig->dst.u3.ip; 52 tuple->src.u3.ip = orig->dst.u3.ip;
47 tuple->dst.u3.ip = orig->src.u3.ip; 53 tuple->dst.u3.ip = orig->src.u3.ip;
48 54
49 return 1; 55 return true;
50} 56}
51 57
52static int ipv4_print_tuple(struct seq_file *s, 58static int ipv4_print_tuple(struct seq_file *s,
@@ -101,35 +107,41 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
101 const struct net_device *out, 107 const struct net_device *out,
102 int (*okfn)(struct sk_buff *)) 108 int (*okfn)(struct sk_buff *))
103{ 109{
104 /* We've seen it coming out the other side: confirm it */
105 return nf_conntrack_confirm(skb);
106}
107
108static unsigned int ipv4_conntrack_help(unsigned int hooknum,
109 struct sk_buff *skb,
110 const struct net_device *in,
111 const struct net_device *out,
112 int (*okfn)(struct sk_buff *))
113{
114 struct nf_conn *ct; 110 struct nf_conn *ct;
115 enum ip_conntrack_info ctinfo; 111 enum ip_conntrack_info ctinfo;
116 const struct nf_conn_help *help; 112 const struct nf_conn_help *help;
117 const struct nf_conntrack_helper *helper; 113 const struct nf_conntrack_helper *helper;
114 unsigned int ret;
118 115
119 /* This is where we call the helper: as the packet goes out. */ 116 /* This is where we call the helper: as the packet goes out. */
120 ct = nf_ct_get(skb, &ctinfo); 117 ct = nf_ct_get(skb, &ctinfo);
121 if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) 118 if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
122 return NF_ACCEPT; 119 goto out;
123 120
124 help = nfct_help(ct); 121 help = nfct_help(ct);
125 if (!help) 122 if (!help)
126 return NF_ACCEPT; 123 goto out;
124
127 /* rcu_read_lock()ed by nf_hook_slow */ 125 /* rcu_read_lock()ed by nf_hook_slow */
128 helper = rcu_dereference(help->helper); 126 helper = rcu_dereference(help->helper);
129 if (!helper) 127 if (!helper)
130 return NF_ACCEPT; 128 goto out;
131 return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), 129
132 ct, ctinfo); 130 ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
131 ct, ctinfo);
132 if (ret != NF_ACCEPT)
133 return ret;
134
135 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
136 typeof(nf_nat_seq_adjust_hook) seq_adjust;
137
138 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
139 if (!seq_adjust || !seq_adjust(skb, ct, ctinfo))
140 return NF_DROP;
141 }
142out:
143 /* We've seen it coming out the other side: confirm it */
144 return nf_conntrack_confirm(skb);
133} 145}
134 146
135static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, 147static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
@@ -211,20 +223,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
211 .priority = NF_IP_PRI_CONNTRACK, 223 .priority = NF_IP_PRI_CONNTRACK,
212 }, 224 },
213 { 225 {
214 .hook = ipv4_conntrack_help,
215 .owner = THIS_MODULE,
216 .pf = PF_INET,
217 .hooknum = NF_INET_POST_ROUTING,
218 .priority = NF_IP_PRI_CONNTRACK_HELPER,
219 },
220 {
221 .hook = ipv4_conntrack_help,
222 .owner = THIS_MODULE,
223 .pf = PF_INET,
224 .hooknum = NF_INET_LOCAL_IN,
225 .priority = NF_IP_PRI_CONNTRACK_HELPER,
226 },
227 {
228 .hook = ipv4_confirm, 226 .hook = ipv4_confirm,
229 .owner = THIS_MODULE, 227 .owner = THIS_MODULE,
230 .pf = PF_INET, 228 .pf = PF_INET,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f500b0fdaef4..40a46d482490 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -106,21 +106,16 @@ static int ct_seq_show(struct seq_file *s, void *v)
106 /* we only want to print DIR_ORIGINAL */ 106 /* we only want to print DIR_ORIGINAL */
107 if (NF_CT_DIRECTION(hash)) 107 if (NF_CT_DIRECTION(hash))
108 return 0; 108 return 0;
109 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num != AF_INET) 109 if (nf_ct_l3num(ct) != AF_INET)
110 return 0; 110 return 0;
111 111
112 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL] 112 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
113 .tuple.src.l3num);
114 NF_CT_ASSERT(l3proto); 113 NF_CT_ASSERT(l3proto);
115 l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL] 114 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
116 .tuple.src.l3num,
117 ct->tuplehash[IP_CT_DIR_ORIGINAL]
118 .tuple.dst.protonum);
119 NF_CT_ASSERT(l4proto); 115 NF_CT_ASSERT(l4proto);
120 116
121 if (seq_printf(s, "%-8s %u %ld ", 117 if (seq_printf(s, "%-8s %u %ld ",
122 l4proto->name, 118 l4proto->name, nf_ct_protonum(ct),
123 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
124 timer_pending(&ct->timeout) 119 timer_pending(&ct->timeout)
125 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) 120 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
126 return -ENOSPC; 121 return -ENOSPC;
@@ -379,7 +374,7 @@ static const struct file_operations ct_cpu_seq_fops = {
379 .open = ct_cpu_seq_open, 374 .open = ct_cpu_seq_open,
380 .read = seq_read, 375 .read = seq_read,
381 .llseek = seq_lseek, 376 .llseek = seq_lseek,
382 .release = seq_release_private, 377 .release = seq_release,
383}; 378};
384 379
385int __init nf_conntrack_ipv4_compat_init(void) 380int __init nf_conntrack_ipv4_compat_init(void)
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 6873fddb3529..78ab19accace 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -22,22 +22,21 @@
22 22
23static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; 23static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ;
24 24
25static int icmp_pkt_to_tuple(const struct sk_buff *skb, 25static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
26 unsigned int dataoff, 26 struct nf_conntrack_tuple *tuple)
27 struct nf_conntrack_tuple *tuple)
28{ 27{
29 const struct icmphdr *hp; 28 const struct icmphdr *hp;
30 struct icmphdr _hdr; 29 struct icmphdr _hdr;
31 30
32 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 31 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
33 if (hp == NULL) 32 if (hp == NULL)
34 return 0; 33 return false;
35 34
36 tuple->dst.u.icmp.type = hp->type; 35 tuple->dst.u.icmp.type = hp->type;
37 tuple->src.u.icmp.id = hp->un.echo.id; 36 tuple->src.u.icmp.id = hp->un.echo.id;
38 tuple->dst.u.icmp.code = hp->code; 37 tuple->dst.u.icmp.code = hp->code;
39 38
40 return 1; 39 return true;
41} 40}
42 41
43/* Add 1; spaces filled with 0. */ 42/* Add 1; spaces filled with 0. */
@@ -52,17 +51,17 @@ static const u_int8_t invmap[] = {
52 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 51 [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
53}; 52};
54 53
55static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple, 54static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
56 const struct nf_conntrack_tuple *orig) 55 const struct nf_conntrack_tuple *orig)
57{ 56{
58 if (orig->dst.u.icmp.type >= sizeof(invmap) 57 if (orig->dst.u.icmp.type >= sizeof(invmap)
59 || !invmap[orig->dst.u.icmp.type]) 58 || !invmap[orig->dst.u.icmp.type])
60 return 0; 59 return false;
61 60
62 tuple->src.u.icmp.id = orig->src.u.icmp.id; 61 tuple->src.u.icmp.id = orig->src.u.icmp.id;
63 tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; 62 tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
64 tuple->dst.u.icmp.code = orig->dst.u.icmp.code; 63 tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
65 return 1; 64 return true;
66} 65}
67 66
68/* Print out the per-protocol part of the tuple. */ 67/* Print out the per-protocol part of the tuple. */
@@ -101,8 +100,8 @@ static int icmp_packet(struct nf_conn *ct,
101} 100}
102 101
103/* Called when a new connection for this protocol found. */ 102/* Called when a new connection for this protocol found. */
104static int icmp_new(struct nf_conn *ct, 103static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
105 const struct sk_buff *skb, unsigned int dataoff) 104 unsigned int dataoff)
106{ 105{
107 static const u_int8_t valid_new[] = { 106 static const u_int8_t valid_new[] = {
108 [ICMP_ECHO] = 1, 107 [ICMP_ECHO] = 1,
@@ -116,11 +115,11 @@ static int icmp_new(struct nf_conn *ct,
116 /* Can't create a new ICMP `conn' with this. */ 115 /* Can't create a new ICMP `conn' with this. */
117 pr_debug("icmp: can't create new conn with type %u\n", 116 pr_debug("icmp: can't create new conn with type %u\n",
118 ct->tuplehash[0].tuple.dst.u.icmp.type); 117 ct->tuplehash[0].tuple.dst.u.icmp.type);
119 NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple); 118 nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
120 return 0; 119 return false;
121 } 120 }
122 atomic_set(&ct->proto.icmp.count, 0); 121 atomic_set(&ct->proto.icmp.count, 0);
123 return 1; 122 return true;
124} 123}
125 124
126/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ 125/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 36b4e3bb056f..04578593e100 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -150,9 +150,9 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
150 const struct nf_nat_range *range) 150 const struct nf_nat_range *range)
151{ 151{
152 unsigned int h = hash_by_src(tuple); 152 unsigned int h = hash_by_src(tuple);
153 struct nf_conn_nat *nat; 153 const struct nf_conn_nat *nat;
154 struct nf_conn *ct; 154 const struct nf_conn *ct;
155 struct hlist_node *n; 155 const struct hlist_node *n;
156 156
157 rcu_read_lock(); 157 rcu_read_lock();
158 hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { 158 hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) {
@@ -349,7 +349,7 @@ nf_nat_setup_info(struct nf_conn *ct,
349EXPORT_SYMBOL(nf_nat_setup_info); 349EXPORT_SYMBOL(nf_nat_setup_info);
350 350
351/* Returns true if succeeded. */ 351/* Returns true if succeeded. */
352static int 352static bool
353manip_pkt(u_int16_t proto, 353manip_pkt(u_int16_t proto,
354 struct sk_buff *skb, 354 struct sk_buff *skb,
355 unsigned int iphdroff, 355 unsigned int iphdroff,
@@ -360,7 +360,7 @@ manip_pkt(u_int16_t proto,
360 const struct nf_nat_protocol *p; 360 const struct nf_nat_protocol *p;
361 361
362 if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) 362 if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
363 return 0; 363 return false;
364 364
365 iph = (void *)skb->data + iphdroff; 365 iph = (void *)skb->data + iphdroff;
366 366
@@ -369,7 +369,7 @@ manip_pkt(u_int16_t proto,
369 /* rcu_read_lock()ed by nf_hook_slow */ 369 /* rcu_read_lock()ed by nf_hook_slow */
370 p = __nf_nat_proto_find(proto); 370 p = __nf_nat_proto_find(proto);
371 if (!p->manip_pkt(skb, iphdroff, target, maniptype)) 371 if (!p->manip_pkt(skb, iphdroff, target, maniptype))
372 return 0; 372 return false;
373 373
374 iph = (void *)skb->data + iphdroff; 374 iph = (void *)skb->data + iphdroff;
375 375
@@ -380,7 +380,7 @@ manip_pkt(u_int16_t proto,
380 csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); 380 csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
381 iph->daddr = target->dst.u3.ip; 381 iph->daddr = target->dst.u3.ip;
382 } 382 }
383 return 1; 383 return true;
384} 384}
385 385
386/* Do packet manipulations according to nf_nat_setup_info. */ 386/* Do packet manipulations according to nf_nat_setup_info. */
@@ -426,7 +426,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
426 struct icmphdr icmp; 426 struct icmphdr icmp;
427 struct iphdr ip; 427 struct iphdr ip;
428 } *inside; 428 } *inside;
429 struct nf_conntrack_l4proto *l4proto; 429 const struct nf_conntrack_l4proto *l4proto;
430 struct nf_conntrack_tuple inner, target; 430 struct nf_conntrack_tuple inner, target;
431 int hdrlen = ip_hdrlen(skb); 431 int hdrlen = ip_hdrlen(skb);
432 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 432 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -544,46 +544,6 @@ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
544} 544}
545EXPORT_SYMBOL(nf_nat_protocol_unregister); 545EXPORT_SYMBOL(nf_nat_protocol_unregister);
546 546
547#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
548int
549nf_nat_port_range_to_nlattr(struct sk_buff *skb,
550 const struct nf_nat_range *range)
551{
552 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.tcp.port);
553 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.tcp.port);
554
555 return 0;
556
557nla_put_failure:
558 return -1;
559}
560EXPORT_SYMBOL_GPL(nf_nat_port_nlattr_to_range);
561
562int
563nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range)
564{
565 int ret = 0;
566
567 /* we have to return whether we actually parsed something or not */
568
569 if (tb[CTA_PROTONAT_PORT_MIN]) {
570 ret = 1;
571 range->min.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
572 }
573
574 if (!tb[CTA_PROTONAT_PORT_MAX]) {
575 if (ret)
576 range->max.tcp.port = range->min.tcp.port;
577 } else {
578 ret = 1;
579 range->max.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
580 }
581
582 return ret;
583}
584EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nlattr);
585#endif
586
587/* Noone using conntrack by the time this called. */ 547/* Noone using conntrack by the time this called. */
588static void nf_nat_cleanup_conntrack(struct nf_conn *ct) 548static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
589{ 549{
@@ -660,6 +620,9 @@ static int __init nf_nat_init(void)
660 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; 620 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
661 621
662 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); 622 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
623
624 BUG_ON(nf_nat_seq_adjust_hook != NULL);
625 rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
663 return 0; 626 return 0;
664 627
665 cleanup_extend: 628 cleanup_extend:
@@ -686,6 +649,8 @@ static void __exit nf_nat_cleanup(void)
686 nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); 649 nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size);
687 nf_ct_l3proto_put(l3proto); 650 nf_ct_l3proto_put(l3proto);
688 nf_ct_extend_unregister(&nat_extend); 651 nf_ct_extend_unregister(&nat_extend);
652 rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
653 synchronize_net();
689} 654}
690 655
691MODULE_LICENSE("GPL"); 656MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index ca57f47bbd25..11976ea29884 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -139,7 +139,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
139 const char *rep_buffer, 139 const char *rep_buffer,
140 unsigned int rep_len) 140 unsigned int rep_len)
141{ 141{
142 struct rtable *rt = (struct rtable *)skb->dst; 142 struct rtable *rt = skb->rtable;
143 struct iphdr *iph; 143 struct iphdr *iph;
144 struct tcphdr *tcph; 144 struct tcphdr *tcph;
145 int oldlen, datalen; 145 int oldlen, datalen;
@@ -217,7 +217,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
217 const char *rep_buffer, 217 const char *rep_buffer,
218 unsigned int rep_len) 218 unsigned int rep_len)
219{ 219{
220 struct rtable *rt = (struct rtable *)skb->dst; 220 struct rtable *rt = skb->rtable;
221 struct iphdr *iph; 221 struct iphdr *iph;
222 struct udphdr *udph; 222 struct udphdr *udph;
223 int datalen, oldlen; 223 int datalen, oldlen;
@@ -416,7 +416,6 @@ nf_nat_seq_adjust(struct sk_buff *skb,
416 416
417 return 1; 417 return 1;
418} 418}
419EXPORT_SYMBOL(nf_nat_seq_adjust);
420 419
421/* Setup NAT on this expected conntrack so it follows master. */ 420/* Setup NAT on this expected conntrack so it follows master. */
422/* If we fail to get a free NAT slot, we'll get dropped on confirm */ 421/* If we fail to get a free NAT slot, we'll get dropped on confirm */
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 3a1e6d6afc0a..da3d91a5ef5c 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -72,7 +72,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
72 } 72 }
73 73
74 pr_debug("trying to unexpect other dir: "); 74 pr_debug("trying to unexpect other dir: ");
75 NF_CT_DUMP_TUPLE(&t); 75 nf_ct_dump_tuple_ip(&t);
76 other_exp = nf_ct_expect_find_get(&t); 76 other_exp = nf_ct_expect_find_get(&t);
77 if (other_exp) { 77 if (other_exp) {
78 nf_ct_unexpect_related(other_exp); 78 nf_ct_unexpect_related(other_exp);
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
new file mode 100644
index 000000000000..91537f11273f
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -0,0 +1,120 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2008 Patrick McHardy <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/types.h>
11#include <linux/random.h>
12#include <linux/ip.h>
13
14#include <linux/netfilter.h>
15#include <net/netfilter/nf_nat.h>
16#include <net/netfilter/nf_nat_core.h>
17#include <net/netfilter/nf_nat_rule.h>
18#include <net/netfilter/nf_nat_protocol.h>
19
20bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
21 enum nf_nat_manip_type maniptype,
22 const union nf_conntrack_man_proto *min,
23 const union nf_conntrack_man_proto *max)
24{
25 __be16 port;
26
27 if (maniptype == IP_NAT_MANIP_SRC)
28 port = tuple->src.u.all;
29 else
30 port = tuple->dst.u.all;
31
32 return ntohs(port) >= ntohs(min->all) &&
33 ntohs(port) <= ntohs(max->all);
34}
35EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
36
37bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
38 const struct nf_nat_range *range,
39 enum nf_nat_manip_type maniptype,
40 const struct nf_conn *ct,
41 u_int16_t *rover)
42{
43 unsigned int range_size, min, i;
44 __be16 *portptr;
45 u_int16_t off;
46
47 if (maniptype == IP_NAT_MANIP_SRC)
48 portptr = &tuple->src.u.all;
49 else
50 portptr = &tuple->dst.u.all;
51
52 /* If no range specified... */
53 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
54 /* If it's dst rewrite, can't change port */
55 if (maniptype == IP_NAT_MANIP_DST)
56 return false;
57
58 if (ntohs(*portptr) < 1024) {
59 /* Loose convention: >> 512 is credential passing */
60 if (ntohs(*portptr) < 512) {
61 min = 1;
62 range_size = 511 - min + 1;
63 } else {
64 min = 600;
65 range_size = 1023 - min + 1;
66 }
67 } else {
68 min = 1024;
69 range_size = 65535 - 1024 + 1;
70 }
71 } else {
72 min = ntohs(range->min.all);
73 range_size = ntohs(range->max.all) - min + 1;
74 }
75
76 off = *rover;
77 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
78 off = net_random();
79
80 for (i = 0; i < range_size; i++, off++) {
81 *portptr = htons(min + off % range_size);
82 if (nf_nat_used_tuple(tuple, ct))
83 continue;
84 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
85 *rover = off;
86 return true;
87 }
88 return false;
89}
90EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
91
92#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
93int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
94 const struct nf_nat_range *range)
95{
96 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
97 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
98 return 0;
99
100nla_put_failure:
101 return -1;
102}
103EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
104
105int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
106 struct nf_nat_range *range)
107{
108 if (tb[CTA_PROTONAT_PORT_MIN]) {
109 range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
110 range->max.all = range->min.tcp.port;
111 range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
112 }
113 if (tb[CTA_PROTONAT_PORT_MAX]) {
114 range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
115 range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
116 }
117 return 0;
118}
119EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr);
120#endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
new file mode 100644
index 000000000000..22485ce306d4
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -0,0 +1,108 @@
1/*
2 * DCCP NAT protocol helper
3 *
4 * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 */
11
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/skbuff.h>
16#include <linux/ip.h>
17#include <linux/dccp.h>
18
19#include <net/netfilter/nf_conntrack.h>
20#include <net/netfilter/nf_nat.h>
21#include <net/netfilter/nf_nat_protocol.h>
22
23static u_int16_t dccp_port_rover;
24
25static bool
26dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
27 const struct nf_nat_range *range,
28 enum nf_nat_manip_type maniptype,
29 const struct nf_conn *ct)
30{
31 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
32 &dccp_port_rover);
33}
34
35static bool
36dccp_manip_pkt(struct sk_buff *skb,
37 unsigned int iphdroff,
38 const struct nf_conntrack_tuple *tuple,
39 enum nf_nat_manip_type maniptype)
40{
41 const struct iphdr *iph = (const void *)(skb->data + iphdroff);
42 struct dccp_hdr *hdr;
43 unsigned int hdroff = iphdroff + iph->ihl * 4;
44 __be32 oldip, newip;
45 __be16 *portptr, oldport, newport;
46 int hdrsize = 8; /* DCCP connection tracking guarantees this much */
47
48 if (skb->len >= hdroff + sizeof(struct dccp_hdr))
49 hdrsize = sizeof(struct dccp_hdr);
50
51 if (!skb_make_writable(skb, hdroff + hdrsize))
52 return false;
53
54 iph = (struct iphdr *)(skb->data + iphdroff);
55 hdr = (struct dccp_hdr *)(skb->data + hdroff);
56
57 if (maniptype == IP_NAT_MANIP_SRC) {
58 oldip = iph->saddr;
59 newip = tuple->src.u3.ip;
60 newport = tuple->src.u.dccp.port;
61 portptr = &hdr->dccph_sport;
62 } else {
63 oldip = iph->daddr;
64 newip = tuple->dst.u3.ip;
65 newport = tuple->dst.u.dccp.port;
66 portptr = &hdr->dccph_dport;
67 }
68
69 oldport = *portptr;
70 *portptr = newport;
71
72 if (hdrsize < sizeof(*hdr))
73 return true;
74
75 inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1);
76 inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
77 0);
78 return true;
79}
80
81static const struct nf_nat_protocol nf_nat_protocol_dccp = {
82 .protonum = IPPROTO_DCCP,
83 .me = THIS_MODULE,
84 .manip_pkt = dccp_manip_pkt,
85 .in_range = nf_nat_proto_in_range,
86 .unique_tuple = dccp_unique_tuple,
87#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
88 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
89 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
90#endif
91};
92
93static int __init nf_nat_proto_dccp_init(void)
94{
95 return nf_nat_protocol_register(&nf_nat_protocol_dccp);
96}
97
98static void __exit nf_nat_proto_dccp_fini(void)
99{
100 nf_nat_protocol_unregister(&nf_nat_protocol_dccp);
101}
102
103module_init(nf_nat_proto_dccp_init);
104module_exit(nf_nat_proto_dccp_fini);
105
106MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
107MODULE_DESCRIPTION("DCCP NAT protocol helper");
108MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index a1e4da16da2e..d7e89201351e 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -36,26 +36,8 @@ MODULE_LICENSE("GPL");
36MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); 36MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
37MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); 37MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
38 38
39/* is key in given range between min and max */
40static int
41gre_in_range(const struct nf_conntrack_tuple *tuple,
42 enum nf_nat_manip_type maniptype,
43 const union nf_conntrack_man_proto *min,
44 const union nf_conntrack_man_proto *max)
45{
46 __be16 key;
47
48 if (maniptype == IP_NAT_MANIP_SRC)
49 key = tuple->src.u.gre.key;
50 else
51 key = tuple->dst.u.gre.key;
52
53 return ntohs(key) >= ntohs(min->gre.key) &&
54 ntohs(key) <= ntohs(max->gre.key);
55}
56
57/* generate unique tuple ... */ 39/* generate unique tuple ... */
58static int 40static bool
59gre_unique_tuple(struct nf_conntrack_tuple *tuple, 41gre_unique_tuple(struct nf_conntrack_tuple *tuple,
60 const struct nf_nat_range *range, 42 const struct nf_nat_range *range,
61 enum nf_nat_manip_type maniptype, 43 enum nf_nat_manip_type maniptype,
@@ -68,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
68 /* If there is no master conntrack we are not PPTP, 50 /* If there is no master conntrack we are not PPTP,
69 do not change tuples */ 51 do not change tuples */
70 if (!ct->master) 52 if (!ct->master)
71 return 0; 53 return false;
72 54
73 if (maniptype == IP_NAT_MANIP_SRC) 55 if (maniptype == IP_NAT_MANIP_SRC)
74 keyptr = &tuple->src.u.gre.key; 56 keyptr = &tuple->src.u.gre.key;
@@ -89,20 +71,20 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
89 for (i = 0; i < range_size; i++, key++) { 71 for (i = 0; i < range_size; i++, key++) {
90 *keyptr = htons(min + key % range_size); 72 *keyptr = htons(min + key % range_size);
91 if (!nf_nat_used_tuple(tuple, ct)) 73 if (!nf_nat_used_tuple(tuple, ct))
92 return 1; 74 return true;
93 } 75 }
94 76
95 pr_debug("%p: no NAT mapping\n", ct); 77 pr_debug("%p: no NAT mapping\n", ct);
96 return 0; 78 return false;
97} 79}
98 80
99/* manipulate a GRE packet according to maniptype */ 81/* manipulate a GRE packet according to maniptype */
100static int 82static bool
101gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, 83gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
102 const struct nf_conntrack_tuple *tuple, 84 const struct nf_conntrack_tuple *tuple,
103 enum nf_nat_manip_type maniptype) 85 enum nf_nat_manip_type maniptype)
104{ 86{
105 struct gre_hdr *greh; 87 const struct gre_hdr *greh;
106 struct gre_hdr_pptp *pgreh; 88 struct gre_hdr_pptp *pgreh;
107 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 89 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
108 unsigned int hdroff = iphdroff + iph->ihl * 4; 90 unsigned int hdroff = iphdroff + iph->ihl * 4;
@@ -110,7 +92,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
110 /* pgreh includes two optional 32bit fields which are not required 92 /* pgreh includes two optional 32bit fields which are not required
111 * to be there. That's where the magic '8' comes from */ 93 * to be there. That's where the magic '8' comes from */
112 if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) 94 if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
113 return 0; 95 return false;
114 96
115 greh = (void *)skb->data + hdroff; 97 greh = (void *)skb->data + hdroff;
116 pgreh = (struct gre_hdr_pptp *)greh; 98 pgreh = (struct gre_hdr_pptp *)greh;
@@ -118,7 +100,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
118 /* we only have destination manip of a packet, since 'source key' 100 /* we only have destination manip of a packet, since 'source key'
119 * is not present in the packet itself */ 101 * is not present in the packet itself */
120 if (maniptype != IP_NAT_MANIP_DST) 102 if (maniptype != IP_NAT_MANIP_DST)
121 return 1; 103 return true;
122 switch (greh->version) { 104 switch (greh->version) {
123 case GRE_VERSION_1701: 105 case GRE_VERSION_1701:
124 /* We do not currently NAT any GREv0 packets. 106 /* We do not currently NAT any GREv0 packets.
@@ -130,21 +112,20 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
130 break; 112 break;
131 default: 113 default:
132 pr_debug("can't nat unknown GRE version\n"); 114 pr_debug("can't nat unknown GRE version\n");
133 return 0; 115 return false;
134 } 116 }
135 return 1; 117 return true;
136} 118}
137 119
138static const struct nf_nat_protocol gre = { 120static const struct nf_nat_protocol gre = {
139 .name = "GRE",
140 .protonum = IPPROTO_GRE, 121 .protonum = IPPROTO_GRE,
141 .me = THIS_MODULE, 122 .me = THIS_MODULE,
142 .manip_pkt = gre_manip_pkt, 123 .manip_pkt = gre_manip_pkt,
143 .in_range = gre_in_range, 124 .in_range = nf_nat_proto_in_range,
144 .unique_tuple = gre_unique_tuple, 125 .unique_tuple = gre_unique_tuple,
145#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 126#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
146 .range_to_nlattr = nf_nat_port_range_to_nlattr, 127 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
147 .nlattr_to_range = nf_nat_port_nlattr_to_range, 128 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
148#endif 129#endif
149}; 130};
150 131
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 03a02969aa57..19a8b0b07d8e 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -17,7 +17,7 @@
17#include <net/netfilter/nf_nat_rule.h> 17#include <net/netfilter/nf_nat_rule.h>
18#include <net/netfilter/nf_nat_protocol.h> 18#include <net/netfilter/nf_nat_protocol.h>
19 19
20static int 20static bool
21icmp_in_range(const struct nf_conntrack_tuple *tuple, 21icmp_in_range(const struct nf_conntrack_tuple *tuple,
22 enum nf_nat_manip_type maniptype, 22 enum nf_nat_manip_type maniptype,
23 const union nf_conntrack_man_proto *min, 23 const union nf_conntrack_man_proto *min,
@@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
27 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); 27 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
28} 28}
29 29
30static int 30static bool
31icmp_unique_tuple(struct nf_conntrack_tuple *tuple, 31icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
32 const struct nf_nat_range *range, 32 const struct nf_nat_range *range,
33 enum nf_nat_manip_type maniptype, 33 enum nf_nat_manip_type maniptype,
@@ -46,12 +46,12 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
46 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + 46 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
47 (id % range_size)); 47 (id % range_size));
48 if (!nf_nat_used_tuple(tuple, ct)) 48 if (!nf_nat_used_tuple(tuple, ct))
49 return 1; 49 return true;
50 } 50 }
51 return 0; 51 return false;
52} 52}
53 53
54static int 54static bool
55icmp_manip_pkt(struct sk_buff *skb, 55icmp_manip_pkt(struct sk_buff *skb,
56 unsigned int iphdroff, 56 unsigned int iphdroff,
57 const struct nf_conntrack_tuple *tuple, 57 const struct nf_conntrack_tuple *tuple,
@@ -62,24 +62,23 @@ icmp_manip_pkt(struct sk_buff *skb,
62 unsigned int hdroff = iphdroff + iph->ihl*4; 62 unsigned int hdroff = iphdroff + iph->ihl*4;
63 63
64 if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) 64 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
65 return 0; 65 return false;
66 66
67 hdr = (struct icmphdr *)(skb->data + hdroff); 67 hdr = (struct icmphdr *)(skb->data + hdroff);
68 inet_proto_csum_replace2(&hdr->checksum, skb, 68 inet_proto_csum_replace2(&hdr->checksum, skb,
69 hdr->un.echo.id, tuple->src.u.icmp.id, 0); 69 hdr->un.echo.id, tuple->src.u.icmp.id, 0);
70 hdr->un.echo.id = tuple->src.u.icmp.id; 70 hdr->un.echo.id = tuple->src.u.icmp.id;
71 return 1; 71 return true;
72} 72}
73 73
74const struct nf_nat_protocol nf_nat_protocol_icmp = { 74const struct nf_nat_protocol nf_nat_protocol_icmp = {
75 .name = "ICMP",
76 .protonum = IPPROTO_ICMP, 75 .protonum = IPPROTO_ICMP,
77 .me = THIS_MODULE, 76 .me = THIS_MODULE,
78 .manip_pkt = icmp_manip_pkt, 77 .manip_pkt = icmp_manip_pkt,
79 .in_range = icmp_in_range, 78 .in_range = icmp_in_range,
80 .unique_tuple = icmp_unique_tuple, 79 .unique_tuple = icmp_unique_tuple,
81#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 80#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
82 .range_to_nlattr = nf_nat_port_range_to_nlattr, 81 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
83 .nlattr_to_range = nf_nat_port_nlattr_to_range, 82 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
84#endif 83#endif
85}; 84};
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
new file mode 100644
index 000000000000..82e4c0e286b8
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -0,0 +1,96 @@
1/*
2 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/ip.h>
12#include <linux/sctp.h>
13#include <net/sctp/checksum.h>
14
15#include <net/netfilter/nf_nat_protocol.h>
16
17static u_int16_t nf_sctp_port_rover;
18
19static bool
20sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
21 const struct nf_nat_range *range,
22 enum nf_nat_manip_type maniptype,
23 const struct nf_conn *ct)
24{
25 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
26 &nf_sctp_port_rover);
27}
28
29static bool
30sctp_manip_pkt(struct sk_buff *skb,
31 unsigned int iphdroff,
32 const struct nf_conntrack_tuple *tuple,
33 enum nf_nat_manip_type maniptype)
34{
35 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
36 sctp_sctphdr_t *hdr;
37 unsigned int hdroff = iphdroff + iph->ihl*4;
38 __be32 oldip, newip;
39 u32 crc32;
40
41 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
42 return false;
43
44 iph = (struct iphdr *)(skb->data + iphdroff);
45 hdr = (struct sctphdr *)(skb->data + hdroff);
46
47 if (maniptype == IP_NAT_MANIP_SRC) {
48 /* Get rid of src ip and src pt */
49 oldip = iph->saddr;
50 newip = tuple->src.u3.ip;
51 hdr->source = tuple->src.u.sctp.port;
52 } else {
53 /* Get rid of dst ip and dst pt */
54 oldip = iph->daddr;
55 newip = tuple->dst.u3.ip;
56 hdr->dest = tuple->dst.u.sctp.port;
57 }
58
59 crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
60 for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
61 crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb),
62 crc32);
63 crc32 = sctp_end_cksum(crc32);
64 hdr->checksum = htonl(crc32);
65
66 return true;
67}
68
69static const struct nf_nat_protocol nf_nat_protocol_sctp = {
70 .protonum = IPPROTO_SCTP,
71 .me = THIS_MODULE,
72 .manip_pkt = sctp_manip_pkt,
73 .in_range = nf_nat_proto_in_range,
74 .unique_tuple = sctp_unique_tuple,
75#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
76 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
77 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
78#endif
79};
80
81static int __init nf_nat_proto_sctp_init(void)
82{
83 return nf_nat_protocol_register(&nf_nat_protocol_sctp);
84}
85
86static void __exit nf_nat_proto_sctp_exit(void)
87{
88 nf_nat_protocol_unregister(&nf_nat_protocol_sctp);
89}
90
91module_init(nf_nat_proto_sctp_init);
92module_exit(nf_nat_proto_sctp_exit);
93
94MODULE_LICENSE("GPL");
95MODULE_DESCRIPTION("SCTP NAT protocol helper");
96MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index ffd5d1589eca..399e2cfa263b 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -8,7 +8,6 @@
8 8
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/random.h>
12#include <linux/ip.h> 11#include <linux/ip.h>
13#include <linux/tcp.h> 12#include <linux/tcp.h>
14 13
@@ -19,75 +18,19 @@
19#include <net/netfilter/nf_nat_protocol.h> 18#include <net/netfilter/nf_nat_protocol.h>
20#include <net/netfilter/nf_nat_core.h> 19#include <net/netfilter/nf_nat_core.h>
21 20
22static int 21static u_int16_t tcp_port_rover;
23tcp_in_range(const struct nf_conntrack_tuple *tuple,
24 enum nf_nat_manip_type maniptype,
25 const union nf_conntrack_man_proto *min,
26 const union nf_conntrack_man_proto *max)
27{
28 __be16 port;
29
30 if (maniptype == IP_NAT_MANIP_SRC)
31 port = tuple->src.u.tcp.port;
32 else
33 port = tuple->dst.u.tcp.port;
34
35 return ntohs(port) >= ntohs(min->tcp.port) &&
36 ntohs(port) <= ntohs(max->tcp.port);
37}
38 22
39static int 23static bool
40tcp_unique_tuple(struct nf_conntrack_tuple *tuple, 24tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
41 const struct nf_nat_range *range, 25 const struct nf_nat_range *range,
42 enum nf_nat_manip_type maniptype, 26 enum nf_nat_manip_type maniptype,
43 const struct nf_conn *ct) 27 const struct nf_conn *ct)
44{ 28{
45 static u_int16_t port; 29 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
46 __be16 *portptr; 30 &tcp_port_rover);
47 unsigned int range_size, min, i;
48
49 if (maniptype == IP_NAT_MANIP_SRC)
50 portptr = &tuple->src.u.tcp.port;
51 else
52 portptr = &tuple->dst.u.tcp.port;
53
54 /* If no range specified... */
55 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
56 /* If it's dst rewrite, can't change port */
57 if (maniptype == IP_NAT_MANIP_DST)
58 return 0;
59
60 /* Map privileged onto privileged. */
61 if (ntohs(*portptr) < 1024) {
62 /* Loose convention: >> 512 is credential passing */
63 if (ntohs(*portptr)<512) {
64 min = 1;
65 range_size = 511 - min + 1;
66 } else {
67 min = 600;
68 range_size = 1023 - min + 1;
69 }
70 } else {
71 min = 1024;
72 range_size = 65535 - 1024 + 1;
73 }
74 } else {
75 min = ntohs(range->min.tcp.port);
76 range_size = ntohs(range->max.tcp.port) - min + 1;
77 }
78
79 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
80 port = net_random();
81
82 for (i = 0; i < range_size; i++, port++) {
83 *portptr = htons(min + port % range_size);
84 if (!nf_nat_used_tuple(tuple, ct))
85 return 1;
86 }
87 return 0;
88} 31}
89 32
90static int 33static bool
91tcp_manip_pkt(struct sk_buff *skb, 34tcp_manip_pkt(struct sk_buff *skb,
92 unsigned int iphdroff, 35 unsigned int iphdroff,
93 const struct nf_conntrack_tuple *tuple, 36 const struct nf_conntrack_tuple *tuple,
@@ -107,7 +50,7 @@ tcp_manip_pkt(struct sk_buff *skb,
107 hdrsize = sizeof(struct tcphdr); 50 hdrsize = sizeof(struct tcphdr);
108 51
109 if (!skb_make_writable(skb, hdroff + hdrsize)) 52 if (!skb_make_writable(skb, hdroff + hdrsize))
110 return 0; 53 return false;
111 54
112 iph = (struct iphdr *)(skb->data + iphdroff); 55 iph = (struct iphdr *)(skb->data + iphdroff);
113 hdr = (struct tcphdr *)(skb->data + hdroff); 56 hdr = (struct tcphdr *)(skb->data + hdroff);
@@ -130,22 +73,21 @@ tcp_manip_pkt(struct sk_buff *skb,
130 *portptr = newport; 73 *portptr = newport;
131 74
132 if (hdrsize < sizeof(*hdr)) 75 if (hdrsize < sizeof(*hdr))
133 return 1; 76 return true;
134 77
135 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); 78 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
136 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); 79 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
137 return 1; 80 return true;
138} 81}
139 82
140const struct nf_nat_protocol nf_nat_protocol_tcp = { 83const struct nf_nat_protocol nf_nat_protocol_tcp = {
141 .name = "TCP",
142 .protonum = IPPROTO_TCP, 84 .protonum = IPPROTO_TCP,
143 .me = THIS_MODULE, 85 .me = THIS_MODULE,
144 .manip_pkt = tcp_manip_pkt, 86 .manip_pkt = tcp_manip_pkt,
145 .in_range = tcp_in_range, 87 .in_range = nf_nat_proto_in_range,
146 .unique_tuple = tcp_unique_tuple, 88 .unique_tuple = tcp_unique_tuple,
147#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 89#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
148 .range_to_nlattr = nf_nat_port_range_to_nlattr, 90 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
149 .nlattr_to_range = nf_nat_port_nlattr_to_range, 91 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
150#endif 92#endif
151}; 93};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 4b8f49910ff2..9e61c79492e4 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -8,7 +8,6 @@
8 8
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/random.h>
12#include <linux/ip.h> 11#include <linux/ip.h>
13#include <linux/udp.h> 12#include <linux/udp.h>
14 13
@@ -18,74 +17,19 @@
18#include <net/netfilter/nf_nat_rule.h> 17#include <net/netfilter/nf_nat_rule.h>
19#include <net/netfilter/nf_nat_protocol.h> 18#include <net/netfilter/nf_nat_protocol.h>
20 19
21static int 20static u_int16_t udp_port_rover;
22udp_in_range(const struct nf_conntrack_tuple *tuple,
23 enum nf_nat_manip_type maniptype,
24 const union nf_conntrack_man_proto *min,
25 const union nf_conntrack_man_proto *max)
26{
27 __be16 port;
28
29 if (maniptype == IP_NAT_MANIP_SRC)
30 port = tuple->src.u.udp.port;
31 else
32 port = tuple->dst.u.udp.port;
33
34 return ntohs(port) >= ntohs(min->udp.port) &&
35 ntohs(port) <= ntohs(max->udp.port);
36}
37 21
38static int 22static bool
39udp_unique_tuple(struct nf_conntrack_tuple *tuple, 23udp_unique_tuple(struct nf_conntrack_tuple *tuple,
40 const struct nf_nat_range *range, 24 const struct nf_nat_range *range,
41 enum nf_nat_manip_type maniptype, 25 enum nf_nat_manip_type maniptype,
42 const struct nf_conn *ct) 26 const struct nf_conn *ct)
43{ 27{
44 static u_int16_t port; 28 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
45 __be16 *portptr; 29 &udp_port_rover);
46 unsigned int range_size, min, i;
47
48 if (maniptype == IP_NAT_MANIP_SRC)
49 portptr = &tuple->src.u.udp.port;
50 else
51 portptr = &tuple->dst.u.udp.port;
52
53 /* If no range specified... */
54 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
55 /* If it's dst rewrite, can't change port */
56 if (maniptype == IP_NAT_MANIP_DST)
57 return 0;
58
59 if (ntohs(*portptr) < 1024) {
60 /* Loose convention: >> 512 is credential passing */
61 if (ntohs(*portptr)<512) {
62 min = 1;
63 range_size = 511 - min + 1;
64 } else {
65 min = 600;
66 range_size = 1023 - min + 1;
67 }
68 } else {
69 min = 1024;
70 range_size = 65535 - 1024 + 1;
71 }
72 } else {
73 min = ntohs(range->min.udp.port);
74 range_size = ntohs(range->max.udp.port) - min + 1;
75 }
76
77 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
78 port = net_random();
79
80 for (i = 0; i < range_size; i++, port++) {
81 *portptr = htons(min + port % range_size);
82 if (!nf_nat_used_tuple(tuple, ct))
83 return 1;
84 }
85 return 0;
86} 30}
87 31
88static int 32static bool
89udp_manip_pkt(struct sk_buff *skb, 33udp_manip_pkt(struct sk_buff *skb,
90 unsigned int iphdroff, 34 unsigned int iphdroff,
91 const struct nf_conntrack_tuple *tuple, 35 const struct nf_conntrack_tuple *tuple,
@@ -98,7 +42,7 @@ udp_manip_pkt(struct sk_buff *skb,
98 __be16 *portptr, newport; 42 __be16 *portptr, newport;
99 43
100 if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) 44 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
101 return 0; 45 return false;
102 46
103 iph = (struct iphdr *)(skb->data + iphdroff); 47 iph = (struct iphdr *)(skb->data + iphdroff);
104 hdr = (struct udphdr *)(skb->data + hdroff); 48 hdr = (struct udphdr *)(skb->data + hdroff);
@@ -124,18 +68,17 @@ udp_manip_pkt(struct sk_buff *skb,
124 hdr->check = CSUM_MANGLED_0; 68 hdr->check = CSUM_MANGLED_0;
125 } 69 }
126 *portptr = newport; 70 *portptr = newport;
127 return 1; 71 return true;
128} 72}
129 73
130const struct nf_nat_protocol nf_nat_protocol_udp = { 74const struct nf_nat_protocol nf_nat_protocol_udp = {
131 .name = "UDP",
132 .protonum = IPPROTO_UDP, 75 .protonum = IPPROTO_UDP,
133 .me = THIS_MODULE, 76 .me = THIS_MODULE,
134 .manip_pkt = udp_manip_pkt, 77 .manip_pkt = udp_manip_pkt,
135 .in_range = udp_in_range, 78 .in_range = nf_nat_proto_in_range,
136 .unique_tuple = udp_unique_tuple, 79 .unique_tuple = udp_unique_tuple,
137#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 80#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
138 .range_to_nlattr = nf_nat_port_range_to_nlattr, 81 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
139 .nlattr_to_range = nf_nat_port_nlattr_to_range, 82 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
140#endif 83#endif
141}; 84};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
new file mode 100644
index 000000000000..440a229bbd87
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -0,0 +1,99 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2008 Patrick McHardy <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/types.h>
11#include <linux/init.h>
12#include <linux/ip.h>
13#include <linux/udp.h>
14
15#include <linux/netfilter.h>
16#include <net/netfilter/nf_nat.h>
17#include <net/netfilter/nf_nat_protocol.h>
18
19static u_int16_t udplite_port_rover;
20
21static bool
22udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
23 const struct nf_nat_range *range,
24 enum nf_nat_manip_type maniptype,
25 const struct nf_conn *ct)
26{
27 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
28 &udplite_port_rover);
29}
30
31static bool
32udplite_manip_pkt(struct sk_buff *skb,
33 unsigned int iphdroff,
34 const struct nf_conntrack_tuple *tuple,
35 enum nf_nat_manip_type maniptype)
36{
37 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
38 struct udphdr *hdr;
39 unsigned int hdroff = iphdroff + iph->ihl*4;
40 __be32 oldip, newip;
41 __be16 *portptr, newport;
42
43 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
44 return false;
45
46 iph = (struct iphdr *)(skb->data + iphdroff);
47 hdr = (struct udphdr *)(skb->data + hdroff);
48
49 if (maniptype == IP_NAT_MANIP_SRC) {
50 /* Get rid of src ip and src pt */
51 oldip = iph->saddr;
52 newip = tuple->src.u3.ip;
53 newport = tuple->src.u.udp.port;
54 portptr = &hdr->source;
55 } else {
56 /* Get rid of dst ip and dst pt */
57 oldip = iph->daddr;
58 newip = tuple->dst.u3.ip;
59 newport = tuple->dst.u.udp.port;
60 portptr = &hdr->dest;
61 }
62
63 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
64 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
65 if (!hdr->check)
66 hdr->check = CSUM_MANGLED_0;
67
68 *portptr = newport;
69 return true;
70}
71
72static const struct nf_nat_protocol nf_nat_protocol_udplite = {
73 .protonum = IPPROTO_UDPLITE,
74 .me = THIS_MODULE,
75 .manip_pkt = udplite_manip_pkt,
76 .in_range = nf_nat_proto_in_range,
77 .unique_tuple = udplite_unique_tuple,
78#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
79 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
80 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
81#endif
82};
83
84static int __init nf_nat_proto_udplite_init(void)
85{
86 return nf_nat_protocol_register(&nf_nat_protocol_udplite);
87}
88
89static void __exit nf_nat_proto_udplite_fini(void)
90{
91 nf_nat_protocol_unregister(&nf_nat_protocol_udplite);
92}
93
94module_init(nf_nat_proto_udplite_init);
95module_exit(nf_nat_proto_udplite_fini);
96
97MODULE_LICENSE("GPL");
98MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
99MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index a26efeb073cb..14381c62acea 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -18,35 +18,34 @@
18#include <net/netfilter/nf_nat_rule.h> 18#include <net/netfilter/nf_nat_rule.h>
19#include <net/netfilter/nf_nat_protocol.h> 19#include <net/netfilter/nf_nat_protocol.h>
20 20
21static int unknown_in_range(const struct nf_conntrack_tuple *tuple, 21static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
22 enum nf_nat_manip_type manip_type, 22 enum nf_nat_manip_type manip_type,
23 const union nf_conntrack_man_proto *min, 23 const union nf_conntrack_man_proto *min,
24 const union nf_conntrack_man_proto *max) 24 const union nf_conntrack_man_proto *max)
25{ 25{
26 return 1; 26 return true;
27} 27}
28 28
29static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple, 29static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
30 const struct nf_nat_range *range, 30 const struct nf_nat_range *range,
31 enum nf_nat_manip_type maniptype, 31 enum nf_nat_manip_type maniptype,
32 const struct nf_conn *ct) 32 const struct nf_conn *ct)
33{ 33{
34 /* Sorry: we can't help you; if it's not unique, we can't frob 34 /* Sorry: we can't help you; if it's not unique, we can't frob
35 anything. */ 35 anything. */
36 return 0; 36 return false;
37} 37}
38 38
39static int 39static bool
40unknown_manip_pkt(struct sk_buff *skb, 40unknown_manip_pkt(struct sk_buff *skb,
41 unsigned int iphdroff, 41 unsigned int iphdroff,
42 const struct nf_conntrack_tuple *tuple, 42 const struct nf_conntrack_tuple *tuple,
43 enum nf_nat_manip_type maniptype) 43 enum nf_nat_manip_type maniptype)
44{ 44{
45 return 1; 45 return true;
46} 46}
47 47
48const struct nf_nat_protocol nf_nat_unknown_protocol = { 48const struct nf_nat_protocol nf_nat_unknown_protocol = {
49 .name = "unknown",
50 /* .me isn't set: getting a ref to this cannot fail. */ 49 /* .me isn't set: getting a ref to this cannot fail. */
51 .manip_pkt = unknown_manip_pkt, 50 .manip_pkt = unknown_manip_pkt,
52 .in_range = unknown_in_range, 51 .in_range = unknown_in_range,
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index f8fda57ba20b..e8b4d0d4439e 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -61,7 +61,7 @@ static struct
61static struct xt_table __nat_table = { 61static struct xt_table __nat_table = {
62 .name = "nat", 62 .name = "nat",
63 .valid_hooks = NAT_VALID_HOOKS, 63 .valid_hooks = NAT_VALID_HOOKS,
64 .lock = RW_LOCK_UNLOCKED, 64 .lock = __RW_LOCK_UNLOCKED(__nat_table.lock),
65 .me = THIS_MODULE, 65 .me = THIS_MODULE,
66 .af = AF_INET, 66 .af = AF_INET,
67}; 67};
@@ -143,7 +143,7 @@ static bool ipt_snat_checkentry(const char *tablename,
143 void *targinfo, 143 void *targinfo,
144 unsigned int hook_mask) 144 unsigned int hook_mask)
145{ 145{
146 struct nf_nat_multi_range_compat *mr = targinfo; 146 const struct nf_nat_multi_range_compat *mr = targinfo;
147 147
148 /* Must be a valid range */ 148 /* Must be a valid range */
149 if (mr->rangesize != 1) { 149 if (mr->rangesize != 1) {
@@ -159,7 +159,7 @@ static bool ipt_dnat_checkentry(const char *tablename,
159 void *targinfo, 159 void *targinfo,
160 unsigned int hook_mask) 160 unsigned int hook_mask)
161{ 161{
162 struct nf_nat_multi_range_compat *mr = targinfo; 162 const struct nf_nat_multi_range_compat *mr = targinfo;
163 163
164 /* Must be a valid range */ 164 /* Must be a valid range */
165 if (mr->rangesize != 1) { 165 if (mr->rangesize != 1) {
@@ -188,25 +188,6 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
188 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); 188 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
189} 189}
190 190
191unsigned int
192alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum)
193{
194 __be32 ip
195 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
196 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip
197 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
198 __be16 all
199 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
200 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
201 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
202 struct nf_nat_range range
203 = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
204
205 pr_debug("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
206 ct, NIPQUAD(ip));
207 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
208}
209
210int nf_nat_rule_find(struct sk_buff *skb, 191int nf_nat_rule_find(struct sk_buff *skb,
211 unsigned int hooknum, 192 unsigned int hooknum,
212 const struct net_device *in, 193 const struct net_device *in,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b4c8d4968bb2..4334d5cabc5b 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -2,6 +2,8 @@
2 * 2 *
3 * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> 3 * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
4 * based on RR's ip_nat_ftp.c and other modules. 4 * based on RR's ip_nat_ftp.c and other modules.
5 * (C) 2007 United Security Providers
6 * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
5 * 7 *
6 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -26,275 +28,461 @@ MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
26MODULE_DESCRIPTION("SIP NAT helper"); 28MODULE_DESCRIPTION("SIP NAT helper");
27MODULE_ALIAS("ip_nat_sip"); 29MODULE_ALIAS("ip_nat_sip");
28 30
29struct addr_map {
30 struct {
31 char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
32 char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
33 unsigned int srclen, srciplen;
34 unsigned int dstlen, dstiplen;
35 } addr[IP_CT_DIR_MAX];
36};
37 31
38static void addr_map_init(const struct nf_conn *ct, struct addr_map *map) 32static unsigned int mangle_packet(struct sk_buff *skb,
33 const char **dptr, unsigned int *datalen,
34 unsigned int matchoff, unsigned int matchlen,
35 const char *buffer, unsigned int buflen)
39{ 36{
40 const struct nf_conntrack_tuple *t; 37 enum ip_conntrack_info ctinfo;
41 enum ip_conntrack_dir dir; 38 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
42 unsigned int n; 39
43 40 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen,
44 for (dir = 0; dir < IP_CT_DIR_MAX; dir++) { 41 buffer, buflen))
45 t = &ct->tuplehash[dir].tuple; 42 return 0;
46 43
47 n = sprintf(map->addr[dir].src, "%u.%u.%u.%u", 44 /* Reload data pointer and adjust datalen value */
48 NIPQUAD(t->src.u3.ip)); 45 *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
49 map->addr[dir].srciplen = n; 46 *datalen += buflen - matchlen;
50 n += sprintf(map->addr[dir].src + n, ":%u", 47 return 1;
51 ntohs(t->src.u.udp.port));
52 map->addr[dir].srclen = n;
53
54 n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u",
55 NIPQUAD(t->dst.u3.ip));
56 map->addr[dir].dstiplen = n;
57 n += sprintf(map->addr[dir].dst + n, ":%u",
58 ntohs(t->dst.u.udp.port));
59 map->addr[dir].dstlen = n;
60 }
61} 48}
62 49
63static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, 50static int map_addr(struct sk_buff *skb,
64 struct nf_conn *ct, const char **dptr, size_t dlen, 51 const char **dptr, unsigned int *datalen,
65 enum sip_header_pos pos, struct addr_map *map) 52 unsigned int matchoff, unsigned int matchlen,
53 union nf_inet_addr *addr, __be16 port)
66{ 54{
55 enum ip_conntrack_info ctinfo;
56 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
67 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 57 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
68 unsigned int matchlen, matchoff, addrlen; 58 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
69 char *addr; 59 unsigned int buflen;
70 60 __be32 newaddr;
71 if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) 61 __be16 newport;
62
63 if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip &&
64 ct->tuplehash[dir].tuple.src.u.udp.port == port) {
65 newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
66 newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
67 } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip &&
68 ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
69 newaddr = ct->tuplehash[!dir].tuple.src.u3.ip;
70 newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
71 } else
72 return 1; 72 return 1;
73 73
74 if ((matchlen == map->addr[dir].srciplen || 74 if (newaddr == addr->ip && newport == port)
75 matchlen == map->addr[dir].srclen) &&
76 memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) {
77 addr = map->addr[!dir].dst;
78 addrlen = map->addr[!dir].dstlen;
79 } else if ((matchlen == map->addr[dir].dstiplen ||
80 matchlen == map->addr[dir].dstlen) &&
81 memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) {
82 addr = map->addr[!dir].src;
83 addrlen = map->addr[!dir].srclen;
84 } else
85 return 1; 75 return 1;
86 76
87 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, 77 buflen = sprintf(buffer, "%u.%u.%u.%u:%u",
88 matchoff, matchlen, addr, addrlen)) 78 NIPQUAD(newaddr), ntohs(newport));
89 return 0;
90 *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
91 return 1;
92 79
80 return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
81 buffer, buflen);
93} 82}
94 83
95static unsigned int ip_nat_sip(struct sk_buff *skb, 84static int map_sip_addr(struct sk_buff *skb,
96 enum ip_conntrack_info ctinfo, 85 const char **dptr, unsigned int *datalen,
97 struct nf_conn *ct, 86 enum sip_header_types type)
98 const char **dptr)
99{ 87{
100 enum sip_header_pos pos; 88 enum ip_conntrack_info ctinfo;
101 struct addr_map map; 89 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
102 int dataoff, datalen; 90 unsigned int matchlen, matchoff;
91 union nf_inet_addr addr;
92 __be16 port;
103 93
104 dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); 94 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
105 datalen = skb->len - dataoff; 95 &matchoff, &matchlen, &addr, &port) <= 0)
106 if (datalen < sizeof("SIP/2.0") - 1) 96 return 1;
107 return NF_ACCEPT; 97 return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port);
98}
108 99
109 addr_map_init(ct, &map); 100static unsigned int ip_nat_sip(struct sk_buff *skb,
101 const char **dptr, unsigned int *datalen)
102{
103 enum ip_conntrack_info ctinfo;
104 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
105 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
106 unsigned int dataoff, matchoff, matchlen;
107 union nf_inet_addr addr;
108 __be16 port;
109 int request, in_header;
110 110
111 /* Basic rules: requests and responses. */ 111 /* Basic rules: requests and responses. */
112 if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) { 112 if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) {
113 /* 10.2: Constructing the REGISTER Request: 113 if (ct_sip_parse_request(ct, *dptr, *datalen,
114 * 114 &matchoff, &matchlen,
115 * The "userinfo" and "@" components of the SIP URI MUST NOT 115 &addr, &port) > 0 &&
116 * be present. 116 !map_addr(skb, dptr, datalen, matchoff, matchlen,
117 */ 117 &addr, port))
118 if (datalen >= sizeof("REGISTER") - 1 && 118 return NF_DROP;
119 strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0) 119 request = 1;
120 pos = POS_REG_REQ_URI; 120 } else
121 else 121 request = 0;
122 pos = POS_REQ_URI; 122
123 123 /* Translate topmost Via header and parameters */
124 if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map)) 124 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
125 SIP_HDR_VIA, NULL, &matchoff, &matchlen,
126 &addr, &port) > 0) {
127 unsigned int matchend, poff, plen, buflen, n;
128 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
129
130 /* We're only interested in headers related to this
131 * connection */
132 if (request) {
133 if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip ||
134 port != ct->tuplehash[dir].tuple.src.u.udp.port)
135 goto next;
136 } else {
137 if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip ||
138 port != ct->tuplehash[dir].tuple.dst.u.udp.port)
139 goto next;
140 }
141
142 if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
143 &addr, port))
125 return NF_DROP; 144 return NF_DROP;
145
146 matchend = matchoff + matchlen;
147
148 /* The maddr= parameter (RFC 2361) specifies where to send
149 * the reply. */
150 if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
151 "maddr=", &poff, &plen,
152 &addr) > 0 &&
153 addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
154 addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
155 __be32 ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
156 buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
157 if (!mangle_packet(skb, dptr, datalen, poff, plen,
158 buffer, buflen))
159 return NF_DROP;
160 }
161
162 /* The received= parameter (RFC 2361) contains the address
163 * from which the server received the request. */
164 if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
165 "received=", &poff, &plen,
166 &addr) > 0 &&
167 addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
168 addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
169 __be32 ip = ct->tuplehash[!dir].tuple.src.u3.ip;
170 buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
171 if (!mangle_packet(skb, dptr, datalen, poff, plen,
172 buffer, buflen))
173 return NF_DROP;
174 }
175
176 /* The rport= parameter (RFC 3581) contains the port number
177 * from which the server received the request. */
178 if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen,
179 "rport=", &poff, &plen,
180 &n) > 0 &&
181 htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port &&
182 htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
183 __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
184 buflen = sprintf(buffer, "%u", ntohs(p));
185 if (!mangle_packet(skb, dptr, datalen, poff, plen,
186 buffer, buflen))
187 return NF_DROP;
188 }
126 } 189 }
127 190
128 if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || 191next:
129 !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) || 192 /* Translate Contact headers */
130 !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || 193 dataoff = 0;
131 !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) 194 in_header = 0;
195 while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
196 SIP_HDR_CONTACT, &in_header,
197 &matchoff, &matchlen,
198 &addr, &port) > 0) {
199 if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
200 &addr, port))
201 return NF_DROP;
202 }
203
204 if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) ||
205 !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO))
132 return NF_DROP; 206 return NF_DROP;
133 return NF_ACCEPT; 207 return NF_ACCEPT;
134} 208}
135 209
136static unsigned int mangle_sip_packet(struct sk_buff *skb, 210/* Handles expected signalling connections and media streams */
137 enum ip_conntrack_info ctinfo, 211static void ip_nat_sip_expected(struct nf_conn *ct,
138 struct nf_conn *ct, 212 struct nf_conntrack_expect *exp)
139 const char **dptr, size_t dlen,
140 char *buffer, int bufflen,
141 enum sip_header_pos pos)
142{ 213{
143 unsigned int matchlen, matchoff; 214 struct nf_nat_range range;
144 215
145 if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) 216 /* This must be a fresh one. */
146 return 0; 217 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
147 218
148 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, 219 /* For DST manip, map port here to where it's expected. */
149 matchoff, matchlen, buffer, bufflen)) 220 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
150 return 0; 221 range.min = range.max = exp->saved_proto;
222 range.min_ip = range.max_ip = exp->saved_ip;
223 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
151 224
152 /* We need to reload this. Thanks Patrick. */ 225 /* Change src to where master sends to, but only if the connection
153 *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); 226 * actually came from the same source. */
154 return 1; 227 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
228 ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
229 range.flags = IP_NAT_RANGE_MAP_IPS;
230 range.min_ip = range.max_ip
231 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
232 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
233 }
155} 234}
156 235
157static int mangle_content_len(struct sk_buff *skb, 236static unsigned int ip_nat_sip_expect(struct sk_buff *skb,
158 enum ip_conntrack_info ctinfo, 237 const char **dptr, unsigned int *datalen,
159 struct nf_conn *ct, 238 struct nf_conntrack_expect *exp,
160 const char *dptr) 239 unsigned int matchoff,
240 unsigned int matchlen)
161{ 241{
162 unsigned int dataoff, matchoff, matchlen; 242 enum ip_conntrack_info ctinfo;
163 char buffer[sizeof("65536")]; 243 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
164 int bufflen; 244 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
245 __be32 newip;
246 u_int16_t port;
247 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
248 unsigned buflen;
165 249
166 dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); 250 /* Connection will come from reply */
251 if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
252 newip = exp->tuple.dst.u3.ip;
253 else
254 newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
167 255
168 /* Get actual SDP length */ 256 /* If the signalling port matches the connection's source port in the
169 if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, 257 * original direction, try to use the destination port in the opposite
170 &matchlen, POS_SDP_HEADER) > 0) { 258 * direction. */
259 if (exp->tuple.dst.u.udp.port ==
260 ct->tuplehash[dir].tuple.src.u.udp.port)
261 port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
262 else
263 port = ntohs(exp->tuple.dst.u.udp.port);
264
265 exp->saved_ip = exp->tuple.dst.u3.ip;
266 exp->tuple.dst.u3.ip = newip;
267 exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
268 exp->dir = !dir;
269 exp->expectfn = ip_nat_sip_expected;
171 270
172 /* since ct_sip_get_info() give us a pointer passing 'v=' 271 for (; port != 0; port++) {
173 we need to add 2 bytes in this count. */ 272 exp->tuple.dst.u.udp.port = htons(port);
174 int c_len = skb->len - dataoff - matchoff + 2; 273 if (nf_ct_expect_related(exp) == 0)
274 break;
275 }
175 276
176 /* Now, update SDP length */ 277 if (port == 0)
177 if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, 278 return NF_DROP;
178 &matchlen, POS_CONTENT) > 0) {
179 279
180 bufflen = sprintf(buffer, "%u", c_len); 280 if (exp->tuple.dst.u3.ip != exp->saved_ip ||
181 return nf_nat_mangle_udp_packet(skb, ct, ctinfo, 281 exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
182 matchoff, matchlen, 282 buflen = sprintf(buffer, "%u.%u.%u.%u:%u",
183 buffer, bufflen); 283 NIPQUAD(newip), port);
184 } 284 if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
285 buffer, buflen))
286 goto err;
185 } 287 }
186 return 0; 288 return NF_ACCEPT;
289
290err:
291 nf_ct_unexpect_related(exp);
292 return NF_DROP;
187} 293}
188 294
189static unsigned int mangle_sdp(struct sk_buff *skb, 295static int mangle_content_len(struct sk_buff *skb,
190 enum ip_conntrack_info ctinfo, 296 const char **dptr, unsigned int *datalen)
191 struct nf_conn *ct,
192 __be32 newip, u_int16_t port,
193 const char *dptr)
194{ 297{
195 char buffer[sizeof("nnn.nnn.nnn.nnn")]; 298 enum ip_conntrack_info ctinfo;
196 unsigned int dataoff, bufflen; 299 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
300 unsigned int matchoff, matchlen;
301 char buffer[sizeof("65536")];
302 int buflen, c_len;
197 303
198 dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); 304 /* Get actual SDP length */
305 if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
306 SDP_HDR_VERSION, SDP_HDR_UNSPEC,
307 &matchoff, &matchlen) <= 0)
308 return 0;
309 c_len = *datalen - matchoff + strlen("v=");
199 310
200 /* Mangle owner and contact info. */ 311 /* Now, update SDP length */
201 bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); 312 if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH,
202 if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, 313 &matchoff, &matchlen) <= 0)
203 buffer, bufflen, POS_OWNER_IP4))
204 return 0; 314 return 0;
205 315
206 if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, 316 buflen = sprintf(buffer, "%u", c_len);
207 buffer, bufflen, POS_CONNECTION_IP4)) 317 return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
318 buffer, buflen);
319}
320
321static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
322 unsigned int dataoff, unsigned int *datalen,
323 enum sdp_header_types type,
324 enum sdp_header_types term,
325 char *buffer, int buflen)
326{
327 enum ip_conntrack_info ctinfo;
328 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
329 unsigned int matchlen, matchoff;
330
331 if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term,
332 &matchoff, &matchlen) <= 0)
208 return 0; 333 return 0;
334 return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
335 buffer, buflen);
336}
209 337
210 /* Mangle media port. */ 338static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
211 bufflen = sprintf(buffer, "%u", port); 339 unsigned int dataoff,
212 if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, 340 unsigned int *datalen,
213 buffer, bufflen, POS_MEDIA)) 341 enum sdp_header_types type,
342 enum sdp_header_types term,
343 const union nf_inet_addr *addr)
344{
345 char buffer[sizeof("nnn.nnn.nnn.nnn")];
346 unsigned int buflen;
347
348 buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip));
349 if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term,
350 buffer, buflen))
214 return 0; 351 return 0;
215 352
216 return mangle_content_len(skb, ctinfo, ct, dptr); 353 return mangle_content_len(skb, dptr, datalen);
217} 354}
218 355
219static void ip_nat_sdp_expect(struct nf_conn *ct, 356static unsigned int ip_nat_sdp_port(struct sk_buff *skb,
220 struct nf_conntrack_expect *exp) 357 const char **dptr,
358 unsigned int *datalen,
359 unsigned int matchoff,
360 unsigned int matchlen,
361 u_int16_t port)
221{ 362{
222 struct nf_nat_range range; 363 char buffer[sizeof("nnnnn")];
364 unsigned int buflen;
223 365
224 /* This must be a fresh one. */ 366 buflen = sprintf(buffer, "%u", port);
225 BUG_ON(ct->status & IPS_NAT_DONE_MASK); 367 if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
368 buffer, buflen))
369 return 0;
226 370
227 /* Change src to where master sends to */ 371 return mangle_content_len(skb, dptr, datalen);
228 range.flags = IP_NAT_RANGE_MAP_IPS; 372}
229 range.min_ip = range.max_ip
230 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
231 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
232 373
233 /* For DST manip, map port here to where it's expected. */ 374static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
234 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); 375 unsigned int dataoff,
235 range.min = range.max = exp->saved_proto; 376 unsigned int *datalen,
236 range.min_ip = range.max_ip = exp->saved_ip; 377 const union nf_inet_addr *addr)
237 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); 378{
379 char buffer[sizeof("nnn.nnn.nnn.nnn")];
380 unsigned int buflen;
381
382 /* Mangle session description owner and contact addresses */
383 buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip));
384 if (!mangle_sdp_packet(skb, dptr, dataoff, datalen,
385 SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
386 buffer, buflen))
387 return 0;
388
389 if (!mangle_sdp_packet(skb, dptr, dataoff, datalen,
390 SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
391 buffer, buflen))
392 return 0;
393
394 return mangle_content_len(skb, dptr, datalen);
238} 395}
239 396
240/* So, this packet has hit the connection tracking matching code. 397/* So, this packet has hit the connection tracking matching code.
241 Mangle it, and change the expectation to match the new version. */ 398 Mangle it, and change the expectation to match the new version. */
242static unsigned int ip_nat_sdp(struct sk_buff *skb, 399static unsigned int ip_nat_sdp_media(struct sk_buff *skb,
243 enum ip_conntrack_info ctinfo, 400 const char **dptr,
244 struct nf_conntrack_expect *exp, 401 unsigned int *datalen,
245 const char *dptr) 402 struct nf_conntrack_expect *rtp_exp,
403 struct nf_conntrack_expect *rtcp_exp,
404 unsigned int mediaoff,
405 unsigned int medialen,
406 union nf_inet_addr *rtp_addr)
246{ 407{
247 struct nf_conn *ct = exp->master; 408 enum ip_conntrack_info ctinfo;
409 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
248 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 410 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
249 __be32 newip;
250 u_int16_t port; 411 u_int16_t port;
251 412
252 /* Connection will come from reply */ 413 /* Connection will come from reply */
253 if (ct->tuplehash[dir].tuple.src.u3.ip == 414 if (ct->tuplehash[dir].tuple.src.u3.ip ==
254 ct->tuplehash[!dir].tuple.dst.u3.ip) 415 ct->tuplehash[!dir].tuple.dst.u3.ip)
255 newip = exp->tuple.dst.u3.ip; 416 rtp_addr->ip = rtp_exp->tuple.dst.u3.ip;
256 else 417 else
257 newip = ct->tuplehash[!dir].tuple.dst.u3.ip; 418 rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
258 419
259 exp->saved_ip = exp->tuple.dst.u3.ip; 420 rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip;
260 exp->tuple.dst.u3.ip = newip; 421 rtp_exp->tuple.dst.u3.ip = rtp_addr->ip;
261 exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; 422 rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
262 exp->dir = !dir; 423 rtp_exp->dir = !dir;
263 424 rtp_exp->expectfn = ip_nat_sip_expected;
264 /* When you see the packet, we need to NAT it the same as the 425
265 this one. */ 426 rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip;
266 exp->expectfn = ip_nat_sdp_expect; 427 rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip;
267 428 rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
268 /* Try to get same port: if not, try to change it. */ 429 rtcp_exp->dir = !dir;
269 for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { 430 rtcp_exp->expectfn = ip_nat_sip_expected;
270 exp->tuple.dst.u.udp.port = htons(port); 431
271 if (nf_ct_expect_related(exp) == 0) 432 /* Try to get same pair of ports: if not, try to change them. */
433 for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
434 port != 0; port += 2) {
435 rtp_exp->tuple.dst.u.udp.port = htons(port);
436 if (nf_ct_expect_related(rtp_exp) != 0)
437 continue;
438 rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
439 if (nf_ct_expect_related(rtcp_exp) == 0)
272 break; 440 break;
441 nf_ct_unexpect_related(rtp_exp);
273 } 442 }
274 443
275 if (port == 0) 444 if (port == 0)
276 return NF_DROP; 445 goto err1;
446
447 /* Update media port. */
448 if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
449 !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port))
450 goto err2;
277 451
278 if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) {
279 nf_ct_unexpect_related(exp);
280 return NF_DROP;
281 }
282 return NF_ACCEPT; 452 return NF_ACCEPT;
453
454err2:
455 nf_ct_unexpect_related(rtp_exp);
456 nf_ct_unexpect_related(rtcp_exp);
457err1:
458 return NF_DROP;
283} 459}
284 460
285static void __exit nf_nat_sip_fini(void) 461static void __exit nf_nat_sip_fini(void)
286{ 462{
287 rcu_assign_pointer(nf_nat_sip_hook, NULL); 463 rcu_assign_pointer(nf_nat_sip_hook, NULL);
288 rcu_assign_pointer(nf_nat_sdp_hook, NULL); 464 rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
465 rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
466 rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
467 rcu_assign_pointer(nf_nat_sdp_session_hook, NULL);
468 rcu_assign_pointer(nf_nat_sdp_media_hook, NULL);
289 synchronize_rcu(); 469 synchronize_rcu();
290} 470}
291 471
292static int __init nf_nat_sip_init(void) 472static int __init nf_nat_sip_init(void)
293{ 473{
294 BUG_ON(nf_nat_sip_hook != NULL); 474 BUG_ON(nf_nat_sip_hook != NULL);
295 BUG_ON(nf_nat_sdp_hook != NULL); 475 BUG_ON(nf_nat_sip_expect_hook != NULL);
476 BUG_ON(nf_nat_sdp_addr_hook != NULL);
477 BUG_ON(nf_nat_sdp_port_hook != NULL);
478 BUG_ON(nf_nat_sdp_session_hook != NULL);
479 BUG_ON(nf_nat_sdp_media_hook != NULL);
296 rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); 480 rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
297 rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp); 481 rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
482 rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
483 rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
484 rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session);
485 rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media);
298 return 0; 486 return 0;
299} 487}
300 488
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 540ce6ae887c..5daefad3d193 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -50,6 +50,7 @@
50#include <net/udp.h> 50#include <net/udp.h>
51 51
52#include <net/netfilter/nf_nat.h> 52#include <net/netfilter/nf_nat.h>
53#include <net/netfilter/nf_conntrack_expect.h>
53#include <net/netfilter/nf_conntrack_helper.h> 54#include <net/netfilter/nf_conntrack_helper.h>
54#include <net/netfilter/nf_nat_helper.h> 55#include <net/netfilter/nf_nat_helper.h>
55 56
@@ -219,7 +220,7 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
219 if (ch < 0x80) 220 if (ch < 0x80)
220 *len = ch; 221 *len = ch;
221 else { 222 else {
222 cnt = (unsigned char) (ch & 0x7F); 223 cnt = ch & 0x7F;
223 *len = 0; 224 *len = 0;
224 225
225 while (cnt > 0) { 226 while (cnt > 0) {
@@ -617,8 +618,7 @@ struct snmp_cnv
617 int syntax; 618 int syntax;
618}; 619};
619 620
620static struct snmp_cnv snmp_conv [] = 621static const struct snmp_cnv snmp_conv[] = {
621{
622 {ASN1_UNI, ASN1_NUL, SNMP_NULL}, 622 {ASN1_UNI, ASN1_NUL, SNMP_NULL},
623 {ASN1_UNI, ASN1_INT, SNMP_INTEGER}, 623 {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
624 {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR}, 624 {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
@@ -643,7 +643,7 @@ static unsigned char snmp_tag_cls2syntax(unsigned int tag,
643 unsigned int cls, 643 unsigned int cls,
644 unsigned short *syntax) 644 unsigned short *syntax)
645{ 645{
646 struct snmp_cnv *cnv; 646 const struct snmp_cnv *cnv;
647 647
648 cnv = snmp_conv; 648 cnv = snmp_conv;
649 649
@@ -903,7 +903,7 @@ static inline void mangle_address(unsigned char *begin,
903 u_int32_t old; 903 u_int32_t old;
904 904
905 if (debug) 905 if (debug)
906 memcpy(&old, (unsigned char *)addr, sizeof(old)); 906 memcpy(&old, addr, sizeof(old));
907 907
908 *addr = map->to; 908 *addr = map->to;
909 909
@@ -998,7 +998,7 @@ err_id_free:
998 * 998 *
999 *****************************************************************************/ 999 *****************************************************************************/
1000 1000
1001static void hex_dump(unsigned char *buf, size_t len) 1001static void hex_dump(const unsigned char *buf, size_t len)
1002{ 1002{
1003 size_t i; 1003 size_t i;
1004 1004
@@ -1079,7 +1079,7 @@ static int snmp_parse_mangle(unsigned char *msg,
1079 if (cls != ASN1_CTX || con != ASN1_CON) 1079 if (cls != ASN1_CTX || con != ASN1_CON)
1080 return 0; 1080 return 0;
1081 if (debug > 1) { 1081 if (debug > 1) {
1082 unsigned char *pdus[] = { 1082 static const unsigned char *const pdus[] = {
1083 [SNMP_PDU_GET] = "get", 1083 [SNMP_PDU_GET] = "get",
1084 [SNMP_PDU_NEXT] = "get-next", 1084 [SNMP_PDU_NEXT] = "get-next",
1085 [SNMP_PDU_RESPONSE] = "response", 1085 [SNMP_PDU_RESPONSE] = "response",
@@ -1231,8 +1231,8 @@ static int help(struct sk_buff *skb, unsigned int protoff,
1231{ 1231{
1232 int dir = CTINFO2DIR(ctinfo); 1232 int dir = CTINFO2DIR(ctinfo);
1233 unsigned int ret; 1233 unsigned int ret;
1234 struct iphdr *iph = ip_hdr(skb); 1234 const struct iphdr *iph = ip_hdr(skb);
1235 struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); 1235 const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
1236 1236
1237 /* SNMP replies and originating SNMP traps get mangled */ 1237 /* SNMP replies and originating SNMP traps get mangled */
1238 if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) 1238 if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
@@ -1267,11 +1267,15 @@ static int help(struct sk_buff *skb, unsigned int protoff,
1267 return ret; 1267 return ret;
1268} 1268}
1269 1269
1270static const struct nf_conntrack_expect_policy snmp_exp_policy = {
1271 .max_expected = 0,
1272 .timeout = 180,
1273};
1274
1270static struct nf_conntrack_helper snmp_helper __read_mostly = { 1275static struct nf_conntrack_helper snmp_helper __read_mostly = {
1271 .max_expected = 0,
1272 .timeout = 180,
1273 .me = THIS_MODULE, 1276 .me = THIS_MODULE,
1274 .help = help, 1277 .help = help,
1278 .expect_policy = &snmp_exp_policy,
1275 .name = "snmp", 1279 .name = "snmp",
1276 .tuple.src.l3num = AF_INET, 1280 .tuple.src.l3num = AF_INET,
1277 .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), 1281 .tuple.src.u.udp.port = __constant_htons(SNMP_PORT),
@@ -1279,10 +1283,9 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = {
1279}; 1283};
1280 1284
1281static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { 1285static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
1282 .max_expected = 0,
1283 .timeout = 180,
1284 .me = THIS_MODULE, 1286 .me = THIS_MODULE,
1285 .help = help, 1287 .help = help,
1288 .expect_policy = &snmp_exp_policy,
1286 .name = "snmp_trap", 1289 .name = "snmp_trap",
1287 .tuple.src.l3num = AF_INET, 1290 .tuple.src.l3num = AF_INET,
1288 .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), 1291 .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT),
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 99b2c788d5a8..b7dd695691a0 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -30,8 +30,8 @@
30#ifdef CONFIG_XFRM 30#ifdef CONFIG_XFRM
31static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) 31static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
32{ 32{
33 struct nf_conn *ct; 33 const struct nf_conn *ct;
34 struct nf_conntrack_tuple *t; 34 const struct nf_conntrack_tuple *t;
35 enum ip_conntrack_info ctinfo; 35 enum ip_conntrack_info ctinfo;
36 enum ip_conntrack_dir dir; 36 enum ip_conntrack_dir dir;
37 unsigned long statusbit; 37 unsigned long statusbit;
@@ -50,7 +50,10 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
50 if (ct->status & statusbit) { 50 if (ct->status & statusbit) {
51 fl->fl4_dst = t->dst.u3.ip; 51 fl->fl4_dst = t->dst.u3.ip;
52 if (t->dst.protonum == IPPROTO_TCP || 52 if (t->dst.protonum == IPPROTO_TCP ||
53 t->dst.protonum == IPPROTO_UDP) 53 t->dst.protonum == IPPROTO_UDP ||
54 t->dst.protonum == IPPROTO_UDPLITE ||
55 t->dst.protonum == IPPROTO_DCCP ||
56 t->dst.protonum == IPPROTO_SCTP)
54 fl->fl_ip_dport = t->dst.u.tcp.port; 57 fl->fl_ip_dport = t->dst.u.tcp.port;
55 } 58 }
56 59
@@ -59,7 +62,10 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
59 if (ct->status & statusbit) { 62 if (ct->status & statusbit) {
60 fl->fl4_src = t->src.u3.ip; 63 fl->fl4_src = t->src.u3.ip;
61 if (t->dst.protonum == IPPROTO_TCP || 64 if (t->dst.protonum == IPPROTO_TCP ||
62 t->dst.protonum == IPPROTO_UDP) 65 t->dst.protonum == IPPROTO_UDP ||
66 t->dst.protonum == IPPROTO_UDPLITE ||
67 t->dst.protonum == IPPROTO_DCCP ||
68 t->dst.protonum == IPPROTO_SCTP)
63 fl->fl_ip_sport = t->src.u.tcp.port; 69 fl->fl_ip_sport = t->src.u.tcp.port;
64 } 70 }
65} 71}
@@ -87,21 +93,8 @@ nf_nat_fn(unsigned int hooknum,
87 have dropped it. Hence it's the user's responsibilty to 93 have dropped it. Hence it's the user's responsibilty to
88 packet filter it out, or implement conntrack/NAT for that 94 packet filter it out, or implement conntrack/NAT for that
89 protocol. 8) --RR */ 95 protocol. 8) --RR */
90 if (!ct) { 96 if (!ct)
91 /* Exception: ICMP redirect to new connection (not in
92 hash table yet). We must not let this through, in
93 case we're doing NAT to the same network. */
94 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
95 struct icmphdr _hdr, *hp;
96
97 hp = skb_header_pointer(skb, ip_hdrlen(skb),
98 sizeof(_hdr), &_hdr);
99 if (hp != NULL &&
100 hp->type == ICMP_REDIRECT)
101 return NF_DROP;
102 }
103 return NF_ACCEPT; 97 return NF_ACCEPT;
104 }
105 98
106 /* Don't try to NAT if this packet is not conntracked */ 99 /* Don't try to NAT if this packet is not conntracked */
107 if (ct == &nf_conntrack_untracked) 100 if (ct == &nf_conntrack_untracked)
@@ -109,6 +102,9 @@ nf_nat_fn(unsigned int hooknum,
109 102
110 nat = nfct_nat(ct); 103 nat = nfct_nat(ct);
111 if (!nat) { 104 if (!nat) {
105 /* NAT module was loaded late. */
106 if (nf_ct_is_confirmed(ct))
107 return NF_ACCEPT;
112 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); 108 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
113 if (nat == NULL) { 109 if (nat == NULL) {
114 pr_debug("failed to add NAT extension\n"); 110 pr_debug("failed to add NAT extension\n");
@@ -134,10 +130,7 @@ nf_nat_fn(unsigned int hooknum,
134 if (!nf_nat_initialized(ct, maniptype)) { 130 if (!nf_nat_initialized(ct, maniptype)) {
135 unsigned int ret; 131 unsigned int ret;
136 132
137 if (unlikely(nf_ct_is_confirmed(ct))) 133 if (hooknum == NF_INET_LOCAL_IN)
138 /* NAT module was loaded late */
139 ret = alloc_null_binding_confirmed(ct, hooknum);
140 else if (hooknum == NF_INET_LOCAL_IN)
141 /* LOCAL_IN hook doesn't have a chain! */ 134 /* LOCAL_IN hook doesn't have a chain! */
142 ret = alloc_null_binding(ct, hooknum); 135 ret = alloc_null_binding(ct, hooknum);
143 else 136 else
@@ -189,7 +182,7 @@ nf_nat_out(unsigned int hooknum,
189 int (*okfn)(struct sk_buff *)) 182 int (*okfn)(struct sk_buff *))
190{ 183{
191#ifdef CONFIG_XFRM 184#ifdef CONFIG_XFRM
192 struct nf_conn *ct; 185 const struct nf_conn *ct;
193 enum ip_conntrack_info ctinfo; 186 enum ip_conntrack_info ctinfo;
194#endif 187#endif
195 unsigned int ret; 188 unsigned int ret;
@@ -223,7 +216,7 @@ nf_nat_local_fn(unsigned int hooknum,
223 const struct net_device *out, 216 const struct net_device *out,
224 int (*okfn)(struct sk_buff *)) 217 int (*okfn)(struct sk_buff *))
225{ 218{
226 struct nf_conn *ct; 219 const struct nf_conn *ct;
227 enum ip_conntrack_info ctinfo; 220 enum ip_conntrack_info ctinfo;
228 unsigned int ret; 221 unsigned int ret;
229 222
@@ -252,25 +245,6 @@ nf_nat_local_fn(unsigned int hooknum,
252 return ret; 245 return ret;
253} 246}
254 247
255static unsigned int
256nf_nat_adjust(unsigned int hooknum,
257 struct sk_buff *skb,
258 const struct net_device *in,
259 const struct net_device *out,
260 int (*okfn)(struct sk_buff *))
261{
262 struct nf_conn *ct;
263 enum ip_conntrack_info ctinfo;
264
265 ct = nf_ct_get(skb, &ctinfo);
266 if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
267 pr_debug("nf_nat_standalone: adjusting sequence number\n");
268 if (!nf_nat_seq_adjust(skb, ct, ctinfo))
269 return NF_DROP;
270 }
271 return NF_ACCEPT;
272}
273
274/* We must be after connection tracking and before packet filtering. */ 248/* We must be after connection tracking and before packet filtering. */
275 249
276static struct nf_hook_ops nf_nat_ops[] __read_mostly = { 250static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
@@ -290,14 +264,6 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
290 .hooknum = NF_INET_POST_ROUTING, 264 .hooknum = NF_INET_POST_ROUTING,
291 .priority = NF_IP_PRI_NAT_SRC, 265 .priority = NF_IP_PRI_NAT_SRC,
292 }, 266 },
293 /* After conntrack, adjust sequence number */
294 {
295 .hook = nf_nat_adjust,
296 .owner = THIS_MODULE,
297 .pf = PF_INET,
298 .hooknum = NF_INET_POST_ROUTING,
299 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
300 },
301 /* Before packet filtering, change destination */ 267 /* Before packet filtering, change destination */
302 { 268 {
303 .hook = nf_nat_local_fn, 269 .hook = nf_nat_local_fn,
@@ -314,14 +280,6 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
314 .hooknum = NF_INET_LOCAL_IN, 280 .hooknum = NF_INET_LOCAL_IN,
315 .priority = NF_IP_PRI_NAT_SRC, 281 .priority = NF_IP_PRI_NAT_SRC,
316 }, 282 },
317 /* After conntrack, adjust sequence number */
318 {
319 .hook = nf_nat_adjust,
320 .owner = THIS_MODULE,
321 .pf = PF_INET,
322 .hooknum = NF_INET_LOCAL_IN,
323 .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
324 },
325}; 283};
326 284
327static int __init nf_nat_standalone_init(void) 285static int __init nf_nat_standalone_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d63474c6b400..552169b41b16 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -51,24 +51,54 @@
51 */ 51 */
52static int sockstat_seq_show(struct seq_file *seq, void *v) 52static int sockstat_seq_show(struct seq_file *seq, void *v)
53{ 53{
54 struct net *net = seq->private;
55
54 socket_seq_show(seq); 56 socket_seq_show(seq);
55 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", 57 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
56 sock_prot_inuse_get(&tcp_prot), 58 sock_prot_inuse_get(net, &tcp_prot),
57 atomic_read(&tcp_orphan_count), 59 atomic_read(&tcp_orphan_count),
58 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), 60 tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated),
59 atomic_read(&tcp_memory_allocated)); 61 atomic_read(&tcp_memory_allocated));
60 seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), 62 seq_printf(seq, "UDP: inuse %d mem %d\n",
63 sock_prot_inuse_get(net, &udp_prot),
61 atomic_read(&udp_memory_allocated)); 64 atomic_read(&udp_memory_allocated));
62 seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); 65 seq_printf(seq, "UDPLITE: inuse %d\n",
63 seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); 66 sock_prot_inuse_get(net, &udplite_prot));
67 seq_printf(seq, "RAW: inuse %d\n",
68 sock_prot_inuse_get(net, &raw_prot));
64 seq_printf(seq, "FRAG: inuse %d memory %d\n", 69 seq_printf(seq, "FRAG: inuse %d memory %d\n",
65 ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); 70 ip_frag_nqueues(net), ip_frag_mem(net));
66 return 0; 71 return 0;
67} 72}
68 73
69static int sockstat_seq_open(struct inode *inode, struct file *file) 74static int sockstat_seq_open(struct inode *inode, struct file *file)
70{ 75{
71 return single_open(file, sockstat_seq_show, NULL); 76 int err;
77 struct net *net;
78
79 err = -ENXIO;
80 net = get_proc_net(inode);
81 if (net == NULL)
82 goto err_net;
83
84 err = single_open(file, sockstat_seq_show, net);
85 if (err < 0)
86 goto err_open;
87
88 return 0;
89
90err_open:
91 put_net(net);
92err_net:
93 return err;
94}
95
96static int sockstat_seq_release(struct inode *inode, struct file *file)
97{
98 struct net *net = ((struct seq_file *)file->private_data)->private;
99
100 put_net(net);
101 return single_release(inode, file);
72} 102}
73 103
74static const struct file_operations sockstat_seq_fops = { 104static const struct file_operations sockstat_seq_fops = {
@@ -76,7 +106,7 @@ static const struct file_operations sockstat_seq_fops = {
76 .open = sockstat_seq_open, 106 .open = sockstat_seq_open,
77 .read = seq_read, 107 .read = seq_read,
78 .llseek = seq_lseek, 108 .llseek = seq_lseek,
79 .release = single_release, 109 .release = sockstat_seq_release,
80}; 110};
81 111
82/* snmp items */ 112/* snmp items */
@@ -423,25 +453,42 @@ static const struct file_operations netstat_seq_fops = {
423 .release = single_release, 453 .release = single_release,
424}; 454};
425 455
456static __net_init int ip_proc_init_net(struct net *net)
457{
458 if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops))
459 return -ENOMEM;
460 return 0;
461}
462
463static __net_exit void ip_proc_exit_net(struct net *net)
464{
465 proc_net_remove(net, "sockstat");
466}
467
468static __net_initdata struct pernet_operations ip_proc_ops = {
469 .init = ip_proc_init_net,
470 .exit = ip_proc_exit_net,
471};
472
426int __init ip_misc_proc_init(void) 473int __init ip_misc_proc_init(void)
427{ 474{
428 int rc = 0; 475 int rc = 0;
429 476
477 if (register_pernet_subsys(&ip_proc_ops))
478 goto out_pernet;
479
430 if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) 480 if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops))
431 goto out_netstat; 481 goto out_netstat;
432 482
433 if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) 483 if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
434 goto out_snmp; 484 goto out_snmp;
435
436 if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops))
437 goto out_sockstat;
438out: 485out:
439 return rc; 486 return rc;
440out_sockstat:
441 proc_net_remove(&init_net, "snmp");
442out_snmp: 487out_snmp:
443 proc_net_remove(&init_net, "netstat"); 488 proc_net_remove(&init_net, "netstat");
444out_netstat: 489out_netstat:
490 unregister_pernet_subsys(&ip_proc_ops);
491out_pernet:
445 rc = -ENOMEM; 492 rc = -ENOMEM;
446 goto out; 493 goto out;
447} 494}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a3002fe65b7f..11d7f753a820 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -81,41 +81,34 @@
81#include <linux/netfilter_ipv4.h> 81#include <linux/netfilter_ipv4.h>
82 82
83static struct raw_hashinfo raw_v4_hashinfo = { 83static struct raw_hashinfo raw_v4_hashinfo = {
84 .lock = __RW_LOCK_UNLOCKED(), 84 .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
85}; 85};
86 86
87void raw_hash_sk(struct sock *sk, struct raw_hashinfo *h) 87void raw_hash_sk(struct sock *sk)
88{ 88{
89 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
89 struct hlist_head *head; 90 struct hlist_head *head;
90 91
91 head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)]; 92 head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)];
92 93
93 write_lock_bh(&h->lock); 94 write_lock_bh(&h->lock);
94 sk_add_node(sk, head); 95 sk_add_node(sk, head);
95 sock_prot_inuse_add(sk->sk_prot, 1); 96 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
96 write_unlock_bh(&h->lock); 97 write_unlock_bh(&h->lock);
97} 98}
98EXPORT_SYMBOL_GPL(raw_hash_sk); 99EXPORT_SYMBOL_GPL(raw_hash_sk);
99 100
100void raw_unhash_sk(struct sock *sk, struct raw_hashinfo *h) 101void raw_unhash_sk(struct sock *sk)
101{ 102{
103 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
104
102 write_lock_bh(&h->lock); 105 write_lock_bh(&h->lock);
103 if (sk_del_node_init(sk)) 106 if (sk_del_node_init(sk))
104 sock_prot_inuse_add(sk->sk_prot, -1); 107 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
105 write_unlock_bh(&h->lock); 108 write_unlock_bh(&h->lock);
106} 109}
107EXPORT_SYMBOL_GPL(raw_unhash_sk); 110EXPORT_SYMBOL_GPL(raw_unhash_sk);
108 111
109static void raw_v4_hash(struct sock *sk)
110{
111 raw_hash_sk(sk, &raw_v4_hashinfo);
112}
113
114static void raw_v4_unhash(struct sock *sk)
115{
116 raw_unhash_sk(sk, &raw_v4_hashinfo);
117}
118
119static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, 112static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
120 unsigned short num, __be32 raddr, __be32 laddr, int dif) 113 unsigned short num, __be32 raddr, __be32 laddr, int dif)
121{ 114{
@@ -124,7 +117,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
124 sk_for_each_from(sk, node) { 117 sk_for_each_from(sk, node) {
125 struct inet_sock *inet = inet_sk(sk); 118 struct inet_sock *inet = inet_sk(sk);
126 119
127 if (sk->sk_net == net && inet->num == num && 120 if (net_eq(sock_net(sk), net) && inet->num == num &&
128 !(inet->daddr && inet->daddr != raddr) && 121 !(inet->daddr && inet->daddr != raddr) &&
129 !(inet->rcv_saddr && inet->rcv_saddr != laddr) && 122 !(inet->rcv_saddr && inet->rcv_saddr != laddr) &&
130 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 123 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
@@ -175,7 +168,7 @@ static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
175 if (hlist_empty(head)) 168 if (hlist_empty(head))
176 goto out; 169 goto out;
177 170
178 net = skb->dev->nd_net; 171 net = dev_net(skb->dev);
179 sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, 172 sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
180 iph->saddr, iph->daddr, 173 iph->saddr, iph->daddr,
181 skb->dev->ifindex); 174 skb->dev->ifindex);
@@ -283,7 +276,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
283 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); 276 raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
284 if (raw_sk != NULL) { 277 if (raw_sk != NULL) {
285 iph = (struct iphdr *)skb->data; 278 iph = (struct iphdr *)skb->data;
286 net = skb->dev->nd_net; 279 net = dev_net(skb->dev);
287 280
288 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, 281 while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
289 iph->daddr, iph->saddr, 282 iph->daddr, iph->saddr,
@@ -506,7 +499,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
506 ipc.oif = sk->sk_bound_dev_if; 499 ipc.oif = sk->sk_bound_dev_if;
507 500
508 if (msg->msg_controllen) { 501 if (msg->msg_controllen) {
509 err = ip_cmsg_send(msg, &ipc); 502 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
510 if (err) 503 if (err)
511 goto out; 504 goto out;
512 if (ipc.opt) 505 if (ipc.opt)
@@ -560,7 +553,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
560 } 553 }
561 554
562 security_sk_classify_flow(sk, &fl); 555 security_sk_classify_flow(sk, &fl);
563 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); 556 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
564 } 557 }
565 if (err) 558 if (err)
566 goto done; 559 goto done;
@@ -627,7 +620,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
627 620
628 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) 621 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
629 goto out; 622 goto out;
630 chk_addr_ret = inet_addr_type(sk->sk_net, addr->sin_addr.s_addr); 623 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
631 ret = -EADDRNOTAVAIL; 624 ret = -EADDRNOTAVAIL;
632 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && 625 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
633 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) 626 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
@@ -825,8 +818,6 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
825 } 818 }
826} 819}
827 820
828DEFINE_PROTO_INUSE(raw)
829
830struct proto raw_prot = { 821struct proto raw_prot = {
831 .name = "RAW", 822 .name = "RAW",
832 .owner = THIS_MODULE, 823 .owner = THIS_MODULE,
@@ -841,14 +832,14 @@ struct proto raw_prot = {
841 .recvmsg = raw_recvmsg, 832 .recvmsg = raw_recvmsg,
842 .bind = raw_bind, 833 .bind = raw_bind,
843 .backlog_rcv = raw_rcv_skb, 834 .backlog_rcv = raw_rcv_skb,
844 .hash = raw_v4_hash, 835 .hash = raw_hash_sk,
845 .unhash = raw_v4_unhash, 836 .unhash = raw_unhash_sk,
846 .obj_size = sizeof(struct raw_sock), 837 .obj_size = sizeof(struct raw_sock),
838 .h.raw_hash = &raw_v4_hashinfo,
847#ifdef CONFIG_COMPAT 839#ifdef CONFIG_COMPAT
848 .compat_setsockopt = compat_raw_setsockopt, 840 .compat_setsockopt = compat_raw_setsockopt,
849 .compat_getsockopt = compat_raw_getsockopt, 841 .compat_getsockopt = compat_raw_getsockopt,
850#endif 842#endif
851 REF_PROTO_INUSE(raw)
852}; 843};
853 844
854#ifdef CONFIG_PROC_FS 845#ifdef CONFIG_PROC_FS
@@ -862,7 +853,7 @@ static struct sock *raw_get_first(struct seq_file *seq)
862 struct hlist_node *node; 853 struct hlist_node *node;
863 854
864 sk_for_each(sk, node, &state->h->ht[state->bucket]) 855 sk_for_each(sk, node, &state->h->ht[state->bucket])
865 if (sk->sk_net == state->p.net) 856 if (sock_net(sk) == seq_file_net(seq))
866 goto found; 857 goto found;
867 } 858 }
868 sk = NULL; 859 sk = NULL;
@@ -878,7 +869,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
878 sk = sk_next(sk); 869 sk = sk_next(sk);
879try_again: 870try_again:
880 ; 871 ;
881 } while (sk && sk->sk_net != state->p.net); 872 } while (sk && sock_net(sk) != seq_file_net(seq));
882 873
883 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { 874 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
884 sk = sk_head(&state->h->ht[state->bucket]); 875 sk = sk_head(&state->h->ht[state->bucket]);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7b5e8e1d94be..780e9484c825 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -118,21 +118,19 @@
118#define RT_GC_TIMEOUT (300*HZ) 118#define RT_GC_TIMEOUT (300*HZ)
119 119
120static int ip_rt_max_size; 120static int ip_rt_max_size;
121static int ip_rt_gc_timeout = RT_GC_TIMEOUT; 121static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
122static int ip_rt_gc_interval = 60 * HZ; 122static int ip_rt_gc_interval __read_mostly = 60 * HZ;
123static int ip_rt_gc_min_interval = HZ / 2; 123static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
124static int ip_rt_redirect_number = 9; 124static int ip_rt_redirect_number __read_mostly = 9;
125static int ip_rt_redirect_load = HZ / 50; 125static int ip_rt_redirect_load __read_mostly = HZ / 50;
126static int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1)); 126static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
127static int ip_rt_error_cost = HZ; 127static int ip_rt_error_cost __read_mostly = HZ;
128static int ip_rt_error_burst = 5 * HZ; 128static int ip_rt_error_burst __read_mostly = 5 * HZ;
129static int ip_rt_gc_elasticity = 8; 129static int ip_rt_gc_elasticity __read_mostly = 8;
130static int ip_rt_mtu_expires = 10 * 60 * HZ; 130static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
131static int ip_rt_min_pmtu = 512 + 20 + 20; 131static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
132static int ip_rt_min_advmss = 256; 132static int ip_rt_min_advmss __read_mostly = 256;
133static int ip_rt_secret_interval = 10 * 60 * HZ; 133static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
134
135#define RTprint(a...) printk(KERN_DEBUG a)
136 134
137static void rt_worker_func(struct work_struct *work); 135static void rt_worker_func(struct work_struct *work);
138static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); 136static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
@@ -252,40 +250,41 @@ static inline void rt_hash_lock_init(void)
252} 250}
253#endif 251#endif
254 252
255static struct rt_hash_bucket *rt_hash_table; 253static struct rt_hash_bucket *rt_hash_table __read_mostly;
256static unsigned rt_hash_mask; 254static unsigned rt_hash_mask __read_mostly;
257static unsigned int rt_hash_log; 255static unsigned int rt_hash_log __read_mostly;
258static atomic_t rt_genid; 256static atomic_t rt_genid __read_mostly;
259 257
260static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 258static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
261#define RT_CACHE_STAT_INC(field) \ 259#define RT_CACHE_STAT_INC(field) \
262 (__raw_get_cpu_var(rt_cache_stat).field++) 260 (__raw_get_cpu_var(rt_cache_stat).field++)
263 261
264static unsigned int rt_hash_code(u32 daddr, u32 saddr) 262static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx)
265{ 263{
266 return jhash_2words(daddr, saddr, atomic_read(&rt_genid)) 264 return jhash_3words((__force u32)(__be32)(daddr),
265 (__force u32)(__be32)(saddr),
266 idx, atomic_read(&rt_genid))
267 & rt_hash_mask; 267 & rt_hash_mask;
268} 268}
269 269
270#define rt_hash(daddr, saddr, idx) \
271 rt_hash_code((__force u32)(__be32)(daddr),\
272 (__force u32)(__be32)(saddr) ^ ((idx) << 5))
273
274#ifdef CONFIG_PROC_FS 270#ifdef CONFIG_PROC_FS
275struct rt_cache_iter_state { 271struct rt_cache_iter_state {
272 struct seq_net_private p;
276 int bucket; 273 int bucket;
277 int genid; 274 int genid;
278}; 275};
279 276
280static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) 277static struct rtable *rt_cache_get_first(struct seq_file *seq)
281{ 278{
279 struct rt_cache_iter_state *st = seq->private;
282 struct rtable *r = NULL; 280 struct rtable *r = NULL;
283 281
284 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
285 rcu_read_lock_bh(); 283 rcu_read_lock_bh();
286 r = rcu_dereference(rt_hash_table[st->bucket].chain); 284 r = rcu_dereference(rt_hash_table[st->bucket].chain);
287 while (r) { 285 while (r) {
288 if (r->rt_genid == st->genid) 286 if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
287 r->rt_genid == st->genid)
289 return r; 288 return r;
290 r = rcu_dereference(r->u.dst.rt_next); 289 r = rcu_dereference(r->u.dst.rt_next);
291 } 290 }
@@ -294,8 +293,10 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st)
294 return r; 293 return r;
295} 294}
296 295
297static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) 296static struct rtable *__rt_cache_get_next(struct seq_file *seq,
297 struct rtable *r)
298{ 298{
299 struct rt_cache_iter_state *st = seq->private;
299 r = r->u.dst.rt_next; 300 r = r->u.dst.rt_next;
300 while (!r) { 301 while (!r) {
301 rcu_read_unlock_bh(); 302 rcu_read_unlock_bh();
@@ -307,25 +308,34 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct r
307 return rcu_dereference(r); 308 return rcu_dereference(r);
308} 309}
309 310
310static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) 311static struct rtable *rt_cache_get_next(struct seq_file *seq,
312 struct rtable *r)
313{
314 struct rt_cache_iter_state *st = seq->private;
315 while ((r = __rt_cache_get_next(seq, r)) != NULL) {
316 if (dev_net(r->u.dst.dev) != seq_file_net(seq))
317 continue;
318 if (r->rt_genid == st->genid)
319 break;
320 }
321 return r;
322}
323
324static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)
311{ 325{
312 struct rtable *r = rt_cache_get_first(st); 326 struct rtable *r = rt_cache_get_first(seq);
313 327
314 if (r) 328 if (r)
315 while (pos && (r = rt_cache_get_next(st, r))) { 329 while (pos && (r = rt_cache_get_next(seq, r)))
316 if (r->rt_genid != st->genid)
317 continue;
318 --pos; 330 --pos;
319 }
320 return pos ? NULL : r; 331 return pos ? NULL : r;
321} 332}
322 333
323static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 334static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
324{ 335{
325 struct rt_cache_iter_state *st = seq->private; 336 struct rt_cache_iter_state *st = seq->private;
326
327 if (*pos) 337 if (*pos)
328 return rt_cache_get_idx(st, *pos - 1); 338 return rt_cache_get_idx(seq, *pos - 1);
329 st->genid = atomic_read(&rt_genid); 339 st->genid = atomic_read(&rt_genid);
330 return SEQ_START_TOKEN; 340 return SEQ_START_TOKEN;
331} 341}
@@ -333,12 +343,11 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
333static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 343static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
334{ 344{
335 struct rtable *r; 345 struct rtable *r;
336 struct rt_cache_iter_state *st = seq->private;
337 346
338 if (v == SEQ_START_TOKEN) 347 if (v == SEQ_START_TOKEN)
339 r = rt_cache_get_first(st); 348 r = rt_cache_get_first(seq);
340 else 349 else
341 r = rt_cache_get_next(st, v); 350 r = rt_cache_get_next(seq, v);
342 ++*pos; 351 ++*pos;
343 return r; 352 return r;
344} 353}
@@ -390,7 +399,7 @@ static const struct seq_operations rt_cache_seq_ops = {
390 399
391static int rt_cache_seq_open(struct inode *inode, struct file *file) 400static int rt_cache_seq_open(struct inode *inode, struct file *file)
392{ 401{
393 return seq_open_private(file, &rt_cache_seq_ops, 402 return seq_open_net(inode, file, &rt_cache_seq_ops,
394 sizeof(struct rt_cache_iter_state)); 403 sizeof(struct rt_cache_iter_state));
395} 404}
396 405
@@ -399,7 +408,7 @@ static const struct file_operations rt_cache_seq_fops = {
399 .open = rt_cache_seq_open, 408 .open = rt_cache_seq_open,
400 .read = seq_read, 409 .read = seq_read,
401 .llseek = seq_lseek, 410 .llseek = seq_lseek,
402 .release = seq_release_private, 411 .release = seq_release_net,
403}; 412};
404 413
405 414
@@ -533,7 +542,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
533} 542}
534#endif 543#endif
535 544
536static __init int ip_rt_proc_init(struct net *net) 545static int __net_init ip_rt_do_proc_init(struct net *net)
537{ 546{
538 struct proc_dir_entry *pde; 547 struct proc_dir_entry *pde;
539 548
@@ -564,25 +573,43 @@ err2:
564err1: 573err1:
565 return -ENOMEM; 574 return -ENOMEM;
566} 575}
576
577static void __net_exit ip_rt_do_proc_exit(struct net *net)
578{
579 remove_proc_entry("rt_cache", net->proc_net_stat);
580 remove_proc_entry("rt_cache", net->proc_net);
581 remove_proc_entry("rt_acct", net->proc_net);
582}
583
584static struct pernet_operations ip_rt_proc_ops __net_initdata = {
585 .init = ip_rt_do_proc_init,
586 .exit = ip_rt_do_proc_exit,
587};
588
589static int __init ip_rt_proc_init(void)
590{
591 return register_pernet_subsys(&ip_rt_proc_ops);
592}
593
567#else 594#else
568static inline int ip_rt_proc_init(struct net *net) 595static inline int ip_rt_proc_init(void)
569{ 596{
570 return 0; 597 return 0;
571} 598}
572#endif /* CONFIG_PROC_FS */ 599#endif /* CONFIG_PROC_FS */
573 600
574static __inline__ void rt_free(struct rtable *rt) 601static inline void rt_free(struct rtable *rt)
575{ 602{
576 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 603 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
577} 604}
578 605
579static __inline__ void rt_drop(struct rtable *rt) 606static inline void rt_drop(struct rtable *rt)
580{ 607{
581 ip_rt_put(rt); 608 ip_rt_put(rt);
582 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 609 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
583} 610}
584 611
585static __inline__ int rt_fast_clean(struct rtable *rth) 612static inline int rt_fast_clean(struct rtable *rth)
586{ 613{
587 /* Kill broadcast/multicast entries very aggresively, if they 614 /* Kill broadcast/multicast entries very aggresively, if they
588 collide in hash table with more useful entries */ 615 collide in hash table with more useful entries */
@@ -590,7 +617,7 @@ static __inline__ int rt_fast_clean(struct rtable *rth)
590 rth->fl.iif && rth->u.dst.rt_next; 617 rth->fl.iif && rth->u.dst.rt_next;
591} 618}
592 619
593static __inline__ int rt_valuable(struct rtable *rth) 620static inline int rt_valuable(struct rtable *rth)
594{ 621{
595 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 622 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
596 rth->u.dst.expires; 623 rth->u.dst.expires;
@@ -652,7 +679,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
652 679
653static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 680static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
654{ 681{
655 return rt1->u.dst.dev->nd_net == rt2->u.dst.dev->nd_net; 682 return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
656} 683}
657 684
658/* 685/*
@@ -1032,10 +1059,10 @@ restart:
1032#if RT_CACHE_DEBUG >= 2 1059#if RT_CACHE_DEBUG >= 2
1033 if (rt->u.dst.rt_next) { 1060 if (rt->u.dst.rt_next) {
1034 struct rtable *trt; 1061 struct rtable *trt;
1035 printk(KERN_DEBUG "rt_cache @%02x: %u.%u.%u.%u", hash, 1062 printk(KERN_DEBUG "rt_cache @%02x: " NIPQUAD_FMT, hash,
1036 NIPQUAD(rt->rt_dst)); 1063 NIPQUAD(rt->rt_dst));
1037 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1064 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
1038 printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst)); 1065 printk(" . " NIPQUAD_FMT, NIPQUAD(trt->rt_dst));
1039 printk("\n"); 1066 printk("\n");
1040 } 1067 }
1041#endif 1068#endif
@@ -1131,10 +1158,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1131 __be32 skeys[2] = { saddr, 0 }; 1158 __be32 skeys[2] = { saddr, 0 };
1132 int ikeys[2] = { dev->ifindex, 0 }; 1159 int ikeys[2] = { dev->ifindex, 0 };
1133 struct netevent_redirect netevent; 1160 struct netevent_redirect netevent;
1161 struct net *net;
1134 1162
1135 if (!in_dev) 1163 if (!in_dev)
1136 return; 1164 return;
1137 1165
1166 net = dev_net(dev);
1138 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) 1167 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
1139 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) 1168 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
1140 || ipv4_is_zeronet(new_gw)) 1169 || ipv4_is_zeronet(new_gw))
@@ -1146,7 +1175,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1146 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 1175 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
1147 goto reject_redirect; 1176 goto reject_redirect;
1148 } else { 1177 } else {
1149 if (inet_addr_type(&init_net, new_gw) != RTN_UNICAST) 1178 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
1150 goto reject_redirect; 1179 goto reject_redirect;
1151 } 1180 }
1152 1181
@@ -1164,7 +1193,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1164 rth->fl.fl4_src != skeys[i] || 1193 rth->fl.fl4_src != skeys[i] ||
1165 rth->fl.oif != ikeys[k] || 1194 rth->fl.oif != ikeys[k] ||
1166 rth->fl.iif != 0 || 1195 rth->fl.iif != 0 ||
1167 rth->rt_genid != atomic_read(&rt_genid)) { 1196 rth->rt_genid != atomic_read(&rt_genid) ||
1197 !net_eq(dev_net(rth->u.dst.dev), net)) {
1168 rthp = &rth->u.dst.rt_next; 1198 rthp = &rth->u.dst.rt_next;
1169 continue; 1199 continue;
1170 } 1200 }
@@ -1245,9 +1275,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1245reject_redirect: 1275reject_redirect:
1246#ifdef CONFIG_IP_ROUTE_VERBOSE 1276#ifdef CONFIG_IP_ROUTE_VERBOSE
1247 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 1277 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
1248 printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " 1278 printk(KERN_INFO "Redirect from " NIPQUAD_FMT " on %s about "
1249 "%u.%u.%u.%u ignored.\n" 1279 NIPQUAD_FMT " ignored.\n"
1250 " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u\n", 1280 " Advised path = " NIPQUAD_FMT " -> " NIPQUAD_FMT "\n",
1251 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), 1281 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw),
1252 NIPQUAD(saddr), NIPQUAD(daddr)); 1282 NIPQUAD(saddr), NIPQUAD(daddr));
1253#endif 1283#endif
@@ -1256,7 +1286,7 @@ reject_redirect:
1256 1286
1257static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1287static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1258{ 1288{
1259 struct rtable *rt = (struct rtable*)dst; 1289 struct rtable *rt = (struct rtable *)dst;
1260 struct dst_entry *ret = dst; 1290 struct dst_entry *ret = dst;
1261 1291
1262 if (rt) { 1292 if (rt) {
@@ -1269,7 +1299,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1269 rt->fl.oif); 1299 rt->fl.oif);
1270#if RT_CACHE_DEBUG >= 1 1300#if RT_CACHE_DEBUG >= 1
1271 printk(KERN_DEBUG "ipv4_negative_advice: redirect to " 1301 printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
1272 "%u.%u.%u.%u/%02x dropped\n", 1302 NIPQUAD_FMT "/%02x dropped\n",
1273 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); 1303 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
1274#endif 1304#endif
1275 rt_del(hash, rt); 1305 rt_del(hash, rt);
@@ -1297,7 +1327,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1297 1327
1298void ip_rt_send_redirect(struct sk_buff *skb) 1328void ip_rt_send_redirect(struct sk_buff *skb)
1299{ 1329{
1300 struct rtable *rt = (struct rtable*)skb->dst; 1330 struct rtable *rt = skb->rtable;
1301 struct in_device *in_dev = in_dev_get(rt->u.dst.dev); 1331 struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
1302 1332
1303 if (!in_dev) 1333 if (!in_dev)
@@ -1334,8 +1364,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1334 if (IN_DEV_LOG_MARTIANS(in_dev) && 1364 if (IN_DEV_LOG_MARTIANS(in_dev) &&
1335 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1365 rt->u.dst.rate_tokens == ip_rt_redirect_number &&
1336 net_ratelimit()) 1366 net_ratelimit())
1337 printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores " 1367 printk(KERN_WARNING "host " NIPQUAD_FMT "/if%d ignores "
1338 "redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n", 1368 "redirects for " NIPQUAD_FMT " to " NIPQUAD_FMT ".\n",
1339 NIPQUAD(rt->rt_src), rt->rt_iif, 1369 NIPQUAD(rt->rt_src), rt->rt_iif,
1340 NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway)); 1370 NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));
1341#endif 1371#endif
@@ -1346,7 +1376,7 @@ out:
1346 1376
1347static int ip_error(struct sk_buff *skb) 1377static int ip_error(struct sk_buff *skb)
1348{ 1378{
1349 struct rtable *rt = (struct rtable*)skb->dst; 1379 struct rtable *rt = skb->rtable;
1350 unsigned long now; 1380 unsigned long now;
1351 int code; 1381 int code;
1352 1382
@@ -1388,7 +1418,7 @@ out: kfree_skb(skb);
1388static const unsigned short mtu_plateau[] = 1418static const unsigned short mtu_plateau[] =
1389{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; 1419{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
1390 1420
1391static __inline__ unsigned short guess_mtu(unsigned short old_mtu) 1421static inline unsigned short guess_mtu(unsigned short old_mtu)
1392{ 1422{
1393 int i; 1423 int i;
1394 1424
@@ -1423,7 +1453,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1423 rth->rt_src == iph->saddr && 1453 rth->rt_src == iph->saddr &&
1424 rth->fl.iif == 0 && 1454 rth->fl.iif == 0 &&
1425 !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && 1455 !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
1426 rth->u.dst.dev->nd_net == net && 1456 net_eq(dev_net(rth->u.dst.dev), net) &&
1427 rth->rt_genid == atomic_read(&rt_genid)) { 1457 rth->rt_genid == atomic_read(&rt_genid)) {
1428 unsigned short mtu = new_mtu; 1458 unsigned short mtu = new_mtu;
1429 1459
@@ -1499,9 +1529,9 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
1499{ 1529{
1500 struct rtable *rt = (struct rtable *) dst; 1530 struct rtable *rt = (struct rtable *) dst;
1501 struct in_device *idev = rt->idev; 1531 struct in_device *idev = rt->idev;
1502 if (dev != dev->nd_net->loopback_dev && idev && idev->dev == dev) { 1532 if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) {
1503 struct in_device *loopback_idev = 1533 struct in_device *loopback_idev =
1504 in_dev_get(dev->nd_net->loopback_dev); 1534 in_dev_get(dev_net(dev)->loopback_dev);
1505 if (loopback_idev) { 1535 if (loopback_idev) {
1506 rt->idev = loopback_idev; 1536 rt->idev = loopback_idev;
1507 in_dev_put(idev); 1537 in_dev_put(idev);
@@ -1515,14 +1545,14 @@ static void ipv4_link_failure(struct sk_buff *skb)
1515 1545
1516 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1546 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1517 1547
1518 rt = (struct rtable *) skb->dst; 1548 rt = skb->rtable;
1519 if (rt) 1549 if (rt)
1520 dst_set_expires(&rt->u.dst, 0); 1550 dst_set_expires(&rt->u.dst, 0);
1521} 1551}
1522 1552
1523static int ip_rt_bug(struct sk_buff *skb) 1553static int ip_rt_bug(struct sk_buff *skb)
1524{ 1554{
1525 printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n", 1555 printk(KERN_DEBUG "ip_rt_bug: " NIPQUAD_FMT " -> " NIPQUAD_FMT ", %s\n",
1526 NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr), 1556 NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr),
1527 skb->dev ? skb->dev->name : "?"); 1557 skb->dev ? skb->dev->name : "?");
1528 kfree_skb(skb); 1558 kfree_skb(skb);
@@ -1545,7 +1575,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1545 1575
1546 if (rt->fl.iif == 0) 1576 if (rt->fl.iif == 0)
1547 src = rt->rt_src; 1577 src = rt->rt_src;
1548 else if (fib_lookup(rt->u.dst.dev->nd_net, &rt->fl, &res) == 0) { 1578 else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) {
1549 src = FIB_RES_PREFSRC(res); 1579 src = FIB_RES_PREFSRC(res);
1550 fib_res_put(&res); 1580 fib_res_put(&res);
1551 } else 1581 } else
@@ -1675,7 +1705,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1675 1705
1676 in_dev_put(in_dev); 1706 in_dev_put(in_dev);
1677 hash = rt_hash(daddr, saddr, dev->ifindex); 1707 hash = rt_hash(daddr, saddr, dev->ifindex);
1678 return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); 1708 return rt_intern_hash(hash, rth, &skb->rtable);
1679 1709
1680e_nobufs: 1710e_nobufs:
1681 in_dev_put(in_dev); 1711 in_dev_put(in_dev);
@@ -1700,8 +1730,8 @@ static void ip_handle_martian_source(struct net_device *dev,
1700 * RFC1812 recommendation, if source is martian, 1730 * RFC1812 recommendation, if source is martian,
1701 * the only hint is MAC header. 1731 * the only hint is MAC header.
1702 */ 1732 */
1703 printk(KERN_WARNING "martian source %u.%u.%u.%u from " 1733 printk(KERN_WARNING "martian source " NIPQUAD_FMT " from "
1704 "%u.%u.%u.%u, on dev %s\n", 1734 NIPQUAD_FMT", on dev %s\n",
1705 NIPQUAD(daddr), NIPQUAD(saddr), dev->name); 1735 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
1706 if (dev->hard_header_len && skb_mac_header_was_set(skb)) { 1736 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1707 int i; 1737 int i;
@@ -1718,11 +1748,11 @@ static void ip_handle_martian_source(struct net_device *dev,
1718#endif 1748#endif
1719} 1749}
1720 1750
1721static inline int __mkroute_input(struct sk_buff *skb, 1751static int __mkroute_input(struct sk_buff *skb,
1722 struct fib_result* res, 1752 struct fib_result *res,
1723 struct in_device *in_dev, 1753 struct in_device *in_dev,
1724 __be32 daddr, __be32 saddr, u32 tos, 1754 __be32 daddr, __be32 saddr, u32 tos,
1725 struct rtable **result) 1755 struct rtable **result)
1726{ 1756{
1727 1757
1728 struct rtable *rth; 1758 struct rtable *rth;
@@ -1814,11 +1844,11 @@ static inline int __mkroute_input(struct sk_buff *skb,
1814 return err; 1844 return err;
1815} 1845}
1816 1846
1817static inline int ip_mkroute_input(struct sk_buff *skb, 1847static int ip_mkroute_input(struct sk_buff *skb,
1818 struct fib_result* res, 1848 struct fib_result *res,
1819 const struct flowi *fl, 1849 const struct flowi *fl,
1820 struct in_device *in_dev, 1850 struct in_device *in_dev,
1821 __be32 daddr, __be32 saddr, u32 tos) 1851 __be32 daddr, __be32 saddr, u32 tos)
1822{ 1852{
1823 struct rtable* rth = NULL; 1853 struct rtable* rth = NULL;
1824 int err; 1854 int err;
@@ -1836,7 +1866,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
1836 1866
1837 /* put it into the cache */ 1867 /* put it into the cache */
1838 hash = rt_hash(daddr, saddr, fl->iif); 1868 hash = rt_hash(daddr, saddr, fl->iif);
1839 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 1869 return rt_intern_hash(hash, rth, &skb->rtable);
1840} 1870}
1841 1871
1842/* 1872/*
@@ -1869,7 +1899,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1869 __be32 spec_dst; 1899 __be32 spec_dst;
1870 int err = -EINVAL; 1900 int err = -EINVAL;
1871 int free_res = 0; 1901 int free_res = 0;
1872 struct net * net = dev->nd_net; 1902 struct net * net = dev_net(dev);
1873 1903
1874 /* IP on this device is disabled. */ 1904 /* IP on this device is disabled. */
1875 1905
@@ -1992,7 +2022,7 @@ local_input:
1992 } 2022 }
1993 rth->rt_type = res.type; 2023 rth->rt_type = res.type;
1994 hash = rt_hash(daddr, saddr, fl.iif); 2024 hash = rt_hash(daddr, saddr, fl.iif);
1995 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 2025 err = rt_intern_hash(hash, rth, &skb->rtable);
1996 goto done; 2026 goto done;
1997 2027
1998no_route: 2028no_route:
@@ -2010,8 +2040,8 @@ martian_destination:
2010 RT_CACHE_STAT_INC(in_martian_dst); 2040 RT_CACHE_STAT_INC(in_martian_dst);
2011#ifdef CONFIG_IP_ROUTE_VERBOSE 2041#ifdef CONFIG_IP_ROUTE_VERBOSE
2012 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 2042 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
2013 printk(KERN_WARNING "martian destination %u.%u.%u.%u from " 2043 printk(KERN_WARNING "martian destination " NIPQUAD_FMT " from "
2014 "%u.%u.%u.%u, dev %s\n", 2044 NIPQUAD_FMT ", dev %s\n",
2015 NIPQUAD(daddr), NIPQUAD(saddr), dev->name); 2045 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
2016#endif 2046#endif
2017 2047
@@ -2040,25 +2070,25 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2040 int iif = dev->ifindex; 2070 int iif = dev->ifindex;
2041 struct net *net; 2071 struct net *net;
2042 2072
2043 net = dev->nd_net; 2073 net = dev_net(dev);
2044 tos &= IPTOS_RT_MASK; 2074 tos &= IPTOS_RT_MASK;
2045 hash = rt_hash(daddr, saddr, iif); 2075 hash = rt_hash(daddr, saddr, iif);
2046 2076
2047 rcu_read_lock(); 2077 rcu_read_lock();
2048 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2078 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2049 rth = rcu_dereference(rth->u.dst.rt_next)) { 2079 rth = rcu_dereference(rth->u.dst.rt_next)) {
2050 if (rth->fl.fl4_dst == daddr && 2080 if (((rth->fl.fl4_dst ^ daddr) |
2051 rth->fl.fl4_src == saddr && 2081 (rth->fl.fl4_src ^ saddr) |
2052 rth->fl.iif == iif && 2082 (rth->fl.iif ^ iif) |
2053 rth->fl.oif == 0 && 2083 rth->fl.oif |
2084 (rth->fl.fl4_tos ^ tos)) == 0 &&
2054 rth->fl.mark == skb->mark && 2085 rth->fl.mark == skb->mark &&
2055 rth->fl.fl4_tos == tos && 2086 net_eq(dev_net(rth->u.dst.dev), net) &&
2056 rth->u.dst.dev->nd_net == net &&
2057 rth->rt_genid == atomic_read(&rt_genid)) { 2087 rth->rt_genid == atomic_read(&rt_genid)) {
2058 dst_use(&rth->u.dst, jiffies); 2088 dst_use(&rth->u.dst, jiffies);
2059 RT_CACHE_STAT_INC(in_hit); 2089 RT_CACHE_STAT_INC(in_hit);
2060 rcu_read_unlock(); 2090 rcu_read_unlock();
2061 skb->dst = (struct dst_entry*)rth; 2091 skb->rtable = rth;
2062 return 0; 2092 return 0;
2063 } 2093 }
2064 RT_CACHE_STAT_INC(in_hlist_search); 2094 RT_CACHE_STAT_INC(in_hlist_search);
@@ -2100,12 +2130,12 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2100 return ip_route_input_slow(skb, daddr, saddr, tos, dev); 2130 return ip_route_input_slow(skb, daddr, saddr, tos, dev);
2101} 2131}
2102 2132
2103static inline int __mkroute_output(struct rtable **result, 2133static int __mkroute_output(struct rtable **result,
2104 struct fib_result* res, 2134 struct fib_result *res,
2105 const struct flowi *fl, 2135 const struct flowi *fl,
2106 const struct flowi *oldflp, 2136 const struct flowi *oldflp,
2107 struct net_device *dev_out, 2137 struct net_device *dev_out,
2108 unsigned flags) 2138 unsigned flags)
2109{ 2139{
2110 struct rtable *rth; 2140 struct rtable *rth;
2111 struct in_device *in_dev; 2141 struct in_device *in_dev;
@@ -2220,12 +2250,12 @@ static inline int __mkroute_output(struct rtable **result,
2220 return err; 2250 return err;
2221} 2251}
2222 2252
2223static inline int ip_mkroute_output(struct rtable **rp, 2253static int ip_mkroute_output(struct rtable **rp,
2224 struct fib_result* res, 2254 struct fib_result *res,
2225 const struct flowi *fl, 2255 const struct flowi *fl,
2226 const struct flowi *oldflp, 2256 const struct flowi *oldflp,
2227 struct net_device *dev_out, 2257 struct net_device *dev_out,
2228 unsigned flags) 2258 unsigned flags)
2229{ 2259{
2230 struct rtable *rth = NULL; 2260 struct rtable *rth = NULL;
2231 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2261 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
@@ -2455,7 +2485,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2455 rth->fl.mark == flp->mark && 2485 rth->fl.mark == flp->mark &&
2456 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2486 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2457 (IPTOS_RT_MASK | RTO_ONLINK)) && 2487 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2458 rth->u.dst.dev->nd_net == net && 2488 net_eq(dev_net(rth->u.dst.dev), net) &&
2459 rth->rt_genid == atomic_read(&rt_genid)) { 2489 rth->rt_genid == atomic_read(&rt_genid)) {
2460 dst_use(&rth->u.dst, jiffies); 2490 dst_use(&rth->u.dst, jiffies);
2461 RT_CACHE_STAT_INC(out_hit); 2491 RT_CACHE_STAT_INC(out_hit);
@@ -2487,7 +2517,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2487}; 2517};
2488 2518
2489 2519
2490static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk) 2520static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
2491{ 2521{
2492 struct rtable *ort = *rp; 2522 struct rtable *ort = *rp;
2493 struct rtable *rt = (struct rtable *) 2523 struct rtable *rt = (struct rtable *)
@@ -2547,7 +2577,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2547 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, 2577 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
2548 flags ? XFRM_LOOKUP_WAIT : 0); 2578 flags ? XFRM_LOOKUP_WAIT : 0);
2549 if (err == -EREMOTE) 2579 if (err == -EREMOTE)
2550 err = ipv4_dst_blackhole(rp, flp, sk); 2580 err = ipv4_dst_blackhole(rp, flp);
2551 2581
2552 return err; 2582 return err;
2553 } 2583 }
@@ -2565,7 +2595,7 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2565static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2595static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2566 int nowait, unsigned int flags) 2596 int nowait, unsigned int flags)
2567{ 2597{
2568 struct rtable *rt = (struct rtable*)skb->dst; 2598 struct rtable *rt = skb->rtable;
2569 struct rtmsg *r; 2599 struct rtmsg *r;
2570 struct nlmsghdr *nlh; 2600 struct nlmsghdr *nlh;
2571 long expires; 2601 long expires;
@@ -2658,7 +2688,7 @@ nla_put_failure:
2658 2688
2659static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2689static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2660{ 2690{
2661 struct net *net = in_skb->sk->sk_net; 2691 struct net *net = sock_net(in_skb->sk);
2662 struct rtmsg *rtm; 2692 struct rtmsg *rtm;
2663 struct nlattr *tb[RTA_MAX+1]; 2693 struct nlattr *tb[RTA_MAX+1];
2664 struct rtable *rt = NULL; 2694 struct rtable *rt = NULL;
@@ -2668,9 +2698,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2668 int err; 2698 int err;
2669 struct sk_buff *skb; 2699 struct sk_buff *skb;
2670 2700
2671 if (net != &init_net)
2672 return -EINVAL;
2673
2674 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); 2701 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2675 if (err < 0) 2702 if (err < 0)
2676 goto errout; 2703 goto errout;
@@ -2700,7 +2727,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2700 if (iif) { 2727 if (iif) {
2701 struct net_device *dev; 2728 struct net_device *dev;
2702 2729
2703 dev = __dev_get_by_index(&init_net, iif); 2730 dev = __dev_get_by_index(net, iif);
2704 if (dev == NULL) { 2731 if (dev == NULL) {
2705 err = -ENODEV; 2732 err = -ENODEV;
2706 goto errout_free; 2733 goto errout_free;
@@ -2712,7 +2739,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2712 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2739 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2713 local_bh_enable(); 2740 local_bh_enable();
2714 2741
2715 rt = (struct rtable*) skb->dst; 2742 rt = skb->rtable;
2716 if (err == 0 && rt->u.dst.error) 2743 if (err == 0 && rt->u.dst.error)
2717 err = -rt->u.dst.error; 2744 err = -rt->u.dst.error;
2718 } else { 2745 } else {
@@ -2726,22 +2753,22 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2726 }, 2753 },
2727 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2754 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2728 }; 2755 };
2729 err = ip_route_output_key(&init_net, &rt, &fl); 2756 err = ip_route_output_key(net, &rt, &fl);
2730 } 2757 }
2731 2758
2732 if (err) 2759 if (err)
2733 goto errout_free; 2760 goto errout_free;
2734 2761
2735 skb->dst = &rt->u.dst; 2762 skb->rtable = rt;
2736 if (rtm->rtm_flags & RTM_F_NOTIFY) 2763 if (rtm->rtm_flags & RTM_F_NOTIFY)
2737 rt->rt_flags |= RTCF_NOTIFY; 2764 rt->rt_flags |= RTCF_NOTIFY;
2738 2765
2739 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2766 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2740 RTM_NEWROUTE, 0, 0); 2767 RTM_NEWROUTE, 0, 0);
2741 if (err <= 0) 2768 if (err <= 0)
2742 goto errout_free; 2769 goto errout_free;
2743 2770
2744 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); 2771 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2745errout: 2772errout:
2746 return err; 2773 return err;
2747 2774
@@ -2755,6 +2782,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2755 struct rtable *rt; 2782 struct rtable *rt;
2756 int h, s_h; 2783 int h, s_h;
2757 int idx, s_idx; 2784 int idx, s_idx;
2785 struct net *net;
2786
2787 net = sock_net(skb->sk);
2758 2788
2759 s_h = cb->args[0]; 2789 s_h = cb->args[0];
2760 if (s_h < 0) 2790 if (s_h < 0)
@@ -2764,7 +2794,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2764 rcu_read_lock_bh(); 2794 rcu_read_lock_bh();
2765 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; 2795 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
2766 rt = rcu_dereference(rt->u.dst.rt_next), idx++) { 2796 rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
2767 if (idx < s_idx) 2797 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
2768 continue; 2798 continue;
2769 if (rt->rt_genid != atomic_read(&rt_genid)) 2799 if (rt->rt_genid != atomic_read(&rt_genid))
2770 continue; 2800 continue;
@@ -3028,7 +3058,9 @@ int __init ip_rt_init(void)
3028 devinet_init(); 3058 devinet_init();
3029 ip_fib_init(); 3059 ip_fib_init();
3030 3060
3031 setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); 3061 rt_secret_timer.function = rt_secret_rebuild;
3062 rt_secret_timer.data = 0;
3063 init_timer_deferrable(&rt_secret_timer);
3032 3064
3033 /* All the timers, started at system startup tend 3065 /* All the timers, started at system startup tend
3034 to synchronize. Perturb it a bit. 3066 to synchronize. Perturb it a bit.
@@ -3040,7 +3072,7 @@ int __init ip_rt_init(void)
3040 ip_rt_secret_interval; 3072 ip_rt_secret_interval;
3041 add_timer(&rt_secret_timer); 3073 add_timer(&rt_secret_timer);
3042 3074
3043 if (ip_rt_proc_init(&init_net)) 3075 if (ip_rt_proc_init())
3044 printk(KERN_ERR "Unable to create route proc files\n"); 3076 printk(KERN_ERR "Unable to create route proc files\n");
3045#ifdef CONFIG_XFRM 3077#ifdef CONFIG_XFRM
3046 xfrm_init(); 3078 xfrm_init();
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f470fe4511db..73ba98921d64 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -10,8 +10,6 @@
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $ 12 * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
13 *
14 * Missing: IPv6 support.
15 */ 13 */
16 14
17#include <linux/tcp.h> 15#include <linux/tcp.h>
@@ -21,26 +19,33 @@
21#include <linux/kernel.h> 19#include <linux/kernel.h>
22#include <net/tcp.h> 20#include <net/tcp.h>
23 21
22/* Timestamps: lowest 9 bits store TCP options */
23#define TSBITS 9
24#define TSMASK (((__u32)1 << TSBITS) - 1)
25
24extern int sysctl_tcp_syncookies; 26extern int sysctl_tcp_syncookies;
25 27
26static __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS]; 28__u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
29EXPORT_SYMBOL(syncookie_secret);
27 30
28static __init int init_syncookies(void) 31static __init int init_syncookies(void)
29{ 32{
30 get_random_bytes(syncookie_secret, sizeof(syncookie_secret)); 33 get_random_bytes(syncookie_secret, sizeof(syncookie_secret));
31 return 0; 34 return 0;
32} 35}
33module_init(init_syncookies); 36__initcall(init_syncookies);
34 37
35#define COOKIEBITS 24 /* Upper bits store count */ 38#define COOKIEBITS 24 /* Upper bits store count */
36#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) 39#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
37 40
41static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS];
42
38static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, 43static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
39 u32 count, int c) 44 u32 count, int c)
40{ 45{
41 __u32 tmp[16 + 5 + SHA_WORKSPACE_WORDS]; 46 __u32 *tmp = __get_cpu_var(cookie_scratch);
42 47
43 memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c])); 48 memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c]));
44 tmp[0] = (__force u32)saddr; 49 tmp[0] = (__force u32)saddr;
45 tmp[1] = (__force u32)daddr; 50 tmp[1] = (__force u32)daddr;
46 tmp[2] = ((__force u32)sport << 16) + (__force u32)dport; 51 tmp[2] = ((__force u32)sport << 16) + (__force u32)dport;
@@ -50,6 +55,39 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
50 return tmp[17]; 55 return tmp[17];
51} 56}
52 57
58
59/*
60 * when syncookies are in effect and tcp timestamps are enabled we encode
61 * tcp options in the lowest 9 bits of the timestamp value that will be
62 * sent in the syn-ack.
63 * Since subsequent timestamps use the normal tcp_time_stamp value, we
64 * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
65 */
66__u32 cookie_init_timestamp(struct request_sock *req)
67{
68 struct inet_request_sock *ireq;
69 u32 ts, ts_now = tcp_time_stamp;
70 u32 options = 0;
71
72 ireq = inet_rsk(req);
73 if (ireq->wscale_ok) {
74 options = ireq->snd_wscale;
75 options |= ireq->rcv_wscale << 4;
76 }
77 options |= ireq->sack_ok << 8;
78
79 ts = ts_now & ~TSMASK;
80 ts |= options;
81 if (ts > ts_now) {
82 ts >>= TSBITS;
83 ts--;
84 ts <<= TSBITS;
85 ts |= options;
86 }
87 return ts;
88}
89
90
53static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, 91static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport,
54 __be16 dport, __u32 sseq, __u32 count, 92 __be16 dport, __u32 sseq, __u32 count,
55 __u32 data) 93 __u32 data)
@@ -184,6 +222,35 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
184 return child; 222 return child;
185} 223}
186 224
225
226/*
227 * when syncookies are in effect and tcp timestamps are enabled we stored
228 * additional tcp options in the timestamp.
229 * This extracts these options from the timestamp echo.
230 *
231 * The lowest 4 bits are for snd_wscale
232 * The next 4 lsb are for rcv_wscale
233 * The next lsb is for sack_ok
234 */
235void cookie_check_timestamp(struct tcp_options_received *tcp_opt)
236{
237 /* echoed timestamp, 9 lowest bits contain options */
238 u32 options = tcp_opt->rcv_tsecr & TSMASK;
239
240 tcp_opt->snd_wscale = options & 0xf;
241 options >>= 4;
242 tcp_opt->rcv_wscale = options & 0xf;
243
244 tcp_opt->sack_ok = (options >> 4) & 0x1;
245
246 if (tcp_opt->sack_ok)
247 tcp_sack_reset(tcp_opt);
248
249 if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale)
250 tcp_opt->wscale_ok = 1;
251}
252EXPORT_SYMBOL(cookie_check_timestamp);
253
187struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 254struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
188 struct ip_options *opt) 255 struct ip_options *opt)
189{ 256{
@@ -197,6 +264,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
197 int mss; 264 int mss;
198 struct rtable *rt; 265 struct rtable *rt;
199 __u8 rcv_wscale; 266 __u8 rcv_wscale;
267 struct tcp_options_received tcp_opt;
200 268
201 if (!sysctl_tcp_syncookies || !th->ack) 269 if (!sysctl_tcp_syncookies || !th->ack)
202 goto out; 270 goto out;
@@ -209,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
209 277
210 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); 278 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
211 279
280 /* check for timestamp cookie support */
281 memset(&tcp_opt, 0, sizeof(tcp_opt));
282 tcp_parse_options(skb, &tcp_opt, 0);
283
284 if (tcp_opt.saw_tstamp)
285 cookie_check_timestamp(&tcp_opt);
286
212 ret = NULL; 287 ret = NULL;
213 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 288 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
214 if (!req) 289 if (!req)
@@ -227,6 +302,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
227 ireq->loc_addr = ip_hdr(skb)->daddr; 302 ireq->loc_addr = ip_hdr(skb)->daddr;
228 ireq->rmt_addr = ip_hdr(skb)->saddr; 303 ireq->rmt_addr = ip_hdr(skb)->saddr;
229 ireq->opt = NULL; 304 ireq->opt = NULL;
305 ireq->snd_wscale = tcp_opt.snd_wscale;
306 ireq->rcv_wscale = tcp_opt.rcv_wscale;
307 ireq->sack_ok = tcp_opt.sack_ok;
308 ireq->wscale_ok = tcp_opt.wscale_ok;
309 ireq->tstamp_ok = tcp_opt.saw_tstamp;
310 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
230 311
231 /* We throwed the options of the initial SYN away, so we hope 312 /* We throwed the options of the initial SYN away, so we hope
232 * the ACK carries the same options again (see RFC1122 4.2.3.8) 313 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -241,8 +322,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
241 } 322 }
242 } 323 }
243 324
244 ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0;
245 ireq->wscale_ok = ireq->sack_ok = 0;
246 req->expires = 0UL; 325 req->expires = 0UL;
247 req->retrans = 0; 326 req->retrans = 0;
248 327
@@ -271,11 +350,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
271 } 350 }
272 351
273 /* Try to redo what tcp_v4_send_synack did. */ 352 /* Try to redo what tcp_v4_send_synack did. */
274 req->window_clamp = dst_metric(&rt->u.dst, RTAX_WINDOW); 353 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
354
275 tcp_select_initial_window(tcp_full_space(sk), req->mss, 355 tcp_select_initial_window(tcp_full_space(sk), req->mss,
276 &req->rcv_wnd, &req->window_clamp, 356 &req->rcv_wnd, &req->window_clamp,
277 0, &rcv_wscale); 357 ireq->wscale_ok, &rcv_wscale);
278 /* BTW win scale with syncookies is 0 by definition */ 358
279 ireq->rcv_wscale = rcv_wscale; 359 ireq->rcv_wscale = rcv_wscale;
280 360
281 ret = get_cookie_sock(sk, skb, req, &rt->u.dst); 361 ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 88286f35d1e2..c437f804ee38 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -404,38 +404,6 @@ static struct ctl_table ipv4_table[] = {
404 .strategy = &ipv4_sysctl_local_port_range, 404 .strategy = &ipv4_sysctl_local_port_range,
405 }, 405 },
406 { 406 {
407 .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL,
408 .procname = "icmp_echo_ignore_all",
409 .data = &sysctl_icmp_echo_ignore_all,
410 .maxlen = sizeof(int),
411 .mode = 0644,
412 .proc_handler = &proc_dointvec
413 },
414 {
415 .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS,
416 .procname = "icmp_echo_ignore_broadcasts",
417 .data = &sysctl_icmp_echo_ignore_broadcasts,
418 .maxlen = sizeof(int),
419 .mode = 0644,
420 .proc_handler = &proc_dointvec
421 },
422 {
423 .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES,
424 .procname = "icmp_ignore_bogus_error_responses",
425 .data = &sysctl_icmp_ignore_bogus_error_responses,
426 .maxlen = sizeof(int),
427 .mode = 0644,
428 .proc_handler = &proc_dointvec
429 },
430 {
431 .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR,
432 .procname = "icmp_errors_use_inbound_ifaddr",
433 .data = &sysctl_icmp_errors_use_inbound_ifaddr,
434 .maxlen = sizeof(int),
435 .mode = 0644,
436 .proc_handler = &proc_dointvec
437 },
438 {
439 .ctl_name = NET_IPV4_ROUTE, 407 .ctl_name = NET_IPV4_ROUTE,
440 .procname = "route", 408 .procname = "route",
441 .maxlen = 0, 409 .maxlen = 0,
@@ -586,22 +554,6 @@ static struct ctl_table ipv4_table[] = {
586 .proc_handler = &proc_dointvec 554 .proc_handler = &proc_dointvec
587 }, 555 },
588 { 556 {
589 .ctl_name = NET_IPV4_ICMP_RATELIMIT,
590 .procname = "icmp_ratelimit",
591 .data = &sysctl_icmp_ratelimit,
592 .maxlen = sizeof(int),
593 .mode = 0644,
594 .proc_handler = &proc_dointvec
595 },
596 {
597 .ctl_name = NET_IPV4_ICMP_RATEMASK,
598 .procname = "icmp_ratemask",
599 .data = &sysctl_icmp_ratemask,
600 .maxlen = sizeof(int),
601 .mode = 0644,
602 .proc_handler = &proc_dointvec
603 },
604 {
605 .ctl_name = NET_TCP_TW_REUSE, 557 .ctl_name = NET_TCP_TW_REUSE,
606 .procname = "tcp_tw_reuse", 558 .procname = "tcp_tw_reuse",
607 .data = &sysctl_tcp_tw_reuse, 559 .data = &sysctl_tcp_tw_reuse,
@@ -804,6 +756,58 @@ static struct ctl_table ipv4_table[] = {
804 { .ctl_name = 0 } 756 { .ctl_name = 0 }
805}; 757};
806 758
759static struct ctl_table ipv4_net_table[] = {
760 {
761 .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL,
762 .procname = "icmp_echo_ignore_all",
763 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
764 .maxlen = sizeof(int),
765 .mode = 0644,
766 .proc_handler = &proc_dointvec
767 },
768 {
769 .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS,
770 .procname = "icmp_echo_ignore_broadcasts",
771 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
772 .maxlen = sizeof(int),
773 .mode = 0644,
774 .proc_handler = &proc_dointvec
775 },
776 {
777 .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES,
778 .procname = "icmp_ignore_bogus_error_responses",
779 .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
780 .maxlen = sizeof(int),
781 .mode = 0644,
782 .proc_handler = &proc_dointvec
783 },
784 {
785 .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR,
786 .procname = "icmp_errors_use_inbound_ifaddr",
787 .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
788 .maxlen = sizeof(int),
789 .mode = 0644,
790 .proc_handler = &proc_dointvec
791 },
792 {
793 .ctl_name = NET_IPV4_ICMP_RATELIMIT,
794 .procname = "icmp_ratelimit",
795 .data = &init_net.ipv4.sysctl_icmp_ratelimit,
796 .maxlen = sizeof(int),
797 .mode = 0644,
798 .proc_handler = &proc_dointvec
799 },
800 {
801 .ctl_name = NET_IPV4_ICMP_RATEMASK,
802 .procname = "icmp_ratemask",
803 .data = &init_net.ipv4.sysctl_icmp_ratemask,
804 .maxlen = sizeof(int),
805 .mode = 0644,
806 .proc_handler = &proc_dointvec
807 },
808 { }
809};
810
807struct ctl_path net_ipv4_ctl_path[] = { 811struct ctl_path net_ipv4_ctl_path[] = {
808 { .procname = "net", .ctl_name = CTL_NET, }, 812 { .procname = "net", .ctl_name = CTL_NET, },
809 { .procname = "ipv4", .ctl_name = NET_IPV4, }, 813 { .procname = "ipv4", .ctl_name = NET_IPV4, },
@@ -811,12 +815,72 @@ struct ctl_path net_ipv4_ctl_path[] = {
811}; 815};
812EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); 816EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
813 817
818static __net_init int ipv4_sysctl_init_net(struct net *net)
819{
820 struct ctl_table *table;
821
822 table = ipv4_net_table;
823 if (net != &init_net) {
824 table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
825 if (table == NULL)
826 goto err_alloc;
827
828 table[0].data =
829 &net->ipv4.sysctl_icmp_echo_ignore_all;
830 table[1].data =
831 &net->ipv4.sysctl_icmp_echo_ignore_broadcasts;
832 table[2].data =
833 &net->ipv4.sysctl_icmp_ignore_bogus_error_responses;
834 table[3].data =
835 &net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr;
836 table[4].data =
837 &net->ipv4.sysctl_icmp_ratelimit;
838 table[5].data =
839 &net->ipv4.sysctl_icmp_ratemask;
840 }
841
842 net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
843 net_ipv4_ctl_path, table);
844 if (net->ipv4.ipv4_hdr == NULL)
845 goto err_reg;
846
847 return 0;
848
849err_reg:
850 if (net != &init_net)
851 kfree(table);
852err_alloc:
853 return -ENOMEM;
854}
855
856static __net_exit void ipv4_sysctl_exit_net(struct net *net)
857{
858 struct ctl_table *table;
859
860 table = net->ipv4.ipv4_hdr->ctl_table_arg;
861 unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
862 kfree(table);
863}
864
865static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
866 .init = ipv4_sysctl_init_net,
867 .exit = ipv4_sysctl_exit_net,
868};
869
814static __init int sysctl_ipv4_init(void) 870static __init int sysctl_ipv4_init(void)
815{ 871{
816 struct ctl_table_header *hdr; 872 struct ctl_table_header *hdr;
817 873
818 hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); 874 hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
819 return hdr == NULL ? -ENOMEM : 0; 875 if (hdr == NULL)
876 return -ENOMEM;
877
878 if (register_pernet_subsys(&ipv4_sysctl_ops)) {
879 unregister_sysctl_table(hdr);
880 return -ENOMEM;
881 }
882
883 return 0;
820} 884}
821 885
822__initcall(sysctl_ipv4_init); 886__initcall(sysctl_ipv4_init);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39b629ac2404..58ac838bf460 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2105,15 +2105,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2105 break; 2105 break;
2106 2106
2107 case TCP_DEFER_ACCEPT: 2107 case TCP_DEFER_ACCEPT:
2108 icsk->icsk_accept_queue.rskq_defer_accept = 0; 2108 if (val < 0) {
2109 if (val > 0) { 2109 err = -EINVAL;
2110 /* Translate value in seconds to number of 2110 } else {
2111 * retransmits */ 2111 if (val > MAX_TCP_ACCEPT_DEFERRED)
2112 while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && 2112 val = MAX_TCP_ACCEPT_DEFERRED;
2113 val > ((TCP_TIMEOUT_INIT / HZ) << 2113 icsk->icsk_accept_queue.rskq_defer_accept = val;
2114 icsk->icsk_accept_queue.rskq_defer_accept))
2115 icsk->icsk_accept_queue.rskq_defer_accept++;
2116 icsk->icsk_accept_queue.rskq_defer_accept++;
2117 } 2114 }
2118 break; 2115 break;
2119 2116
@@ -2295,8 +2292,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2295 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2292 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2296 break; 2293 break;
2297 case TCP_DEFER_ACCEPT: 2294 case TCP_DEFER_ACCEPT:
2298 val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : 2295 val = icsk->icsk_accept_queue.rskq_defer_accept;
2299 ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
2300 break; 2296 break;
2301 case TCP_WINDOW_CLAMP: 2297 case TCP_WINDOW_CLAMP:
2302 val = tp->window_clamp; 2298 val = tp->window_clamp;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 3aa0b23c1ea0..eb5b9854c8c7 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,12 +1,13 @@
1/* 1/*
2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.1 2 * TCP CUBIC: Binary Increase Congestion control for TCP v2.2
3 * 3 * Home page:
4 * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
4 * This is from the implementation of CUBIC TCP in 5 * This is from the implementation of CUBIC TCP in
5 * Injong Rhee, Lisong Xu. 6 * Injong Rhee, Lisong Xu.
6 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant 7 * "CUBIC: A New TCP-Friendly High-Speed TCP Variant
7 * in PFLDnet 2005 8 * in PFLDnet 2005
8 * Available from: 9 * Available from:
9 * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf 10 * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf
10 * 11 *
11 * Unless CUBIC is enabled and congestion window is large 12 * Unless CUBIC is enabled and congestion window is large
12 * this behaves the same as the original Reno. 13 * this behaves the same as the original Reno.
@@ -20,15 +21,10 @@
20#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation 21#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
21 * max_cwnd = snd_cwnd * beta 22 * max_cwnd = snd_cwnd * beta
22 */ 23 */
23#define BICTCP_B 4 /*
24 * In binary search,
25 * go to point (max+min)/N
26 */
27#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ 24#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
28 25
29static int fast_convergence __read_mostly = 1; 26static int fast_convergence __read_mostly = 1;
30static int max_increment __read_mostly = 16; 27static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */
31static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
32static int initial_ssthresh __read_mostly; 28static int initial_ssthresh __read_mostly;
33static int bic_scale __read_mostly = 41; 29static int bic_scale __read_mostly = 41;
34static int tcp_friendliness __read_mostly = 1; 30static int tcp_friendliness __read_mostly = 1;
@@ -40,9 +36,7 @@ static u64 cube_factor __read_mostly;
40/* Note parameters that are used for precomputing scale factors are read-only */ 36/* Note parameters that are used for precomputing scale factors are read-only */
41module_param(fast_convergence, int, 0644); 37module_param(fast_convergence, int, 0644);
42MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); 38MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
43module_param(max_increment, int, 0644); 39module_param(beta, int, 0644);
44MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
45module_param(beta, int, 0444);
46MODULE_PARM_DESC(beta, "beta for multiplicative increase"); 40MODULE_PARM_DESC(beta, "beta for multiplicative increase");
47module_param(initial_ssthresh, int, 0644); 41module_param(initial_ssthresh, int, 0644);
48MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); 42MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
@@ -145,7 +139,7 @@ static u32 cubic_root(u64 a)
145static inline void bictcp_update(struct bictcp *ca, u32 cwnd) 139static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
146{ 140{
147 u64 offs; 141 u64 offs;
148 u32 delta, t, bic_target, min_cnt, max_cnt; 142 u32 delta, t, bic_target, max_cnt;
149 143
150 ca->ack_cnt++; /* count the number of ACKs */ 144 ca->ack_cnt++; /* count the number of ACKs */
151 145
@@ -211,19 +205,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
211 ca->cnt = 100 * cwnd; /* very small increment*/ 205 ca->cnt = 100 * cwnd; /* very small increment*/
212 } 206 }
213 207
214 if (ca->delay_min > 0) {
215 /* max increment = Smax * rtt / 0.1 */
216 min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
217
218 /* use concave growth when the target is above the origin */
219 if (ca->cnt < min_cnt && t >= ca->bic_K)
220 ca->cnt = min_cnt;
221 }
222
223 /* slow start and low utilization */
224 if (ca->loss_cwnd == 0) /* could be aggressive in slow start */
225 ca->cnt = 50;
226
227 /* TCP Friendly */ 208 /* TCP Friendly */
228 if (tcp_friendliness) { 209 if (tcp_friendliness) {
229 u32 scale = beta_scale; 210 u32 scale = beta_scale;
@@ -391,4 +372,4 @@ module_exit(cubictcp_unregister);
391MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); 372MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
392MODULE_LICENSE("GPL"); 373MODULE_LICENSE("GPL");
393MODULE_DESCRIPTION("CUBIC TCP"); 374MODULE_DESCRIPTION("CUBIC TCP");
394MODULE_VERSION("2.1"); 375MODULE_VERSION("2.2");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bbb7d88a16b4..cdc051bfdb4d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2309,12 +2309,25 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2309 struct tcp_sock *tp = tcp_sk(sk); 2309 struct tcp_sock *tp = tcp_sk(sk);
2310 struct inet_sock *inet = inet_sk(sk); 2310 struct inet_sock *inet = inet_sk(sk);
2311 2311
2312 printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", 2312 if (sk->sk_family == AF_INET) {
2313 msg, 2313 printk(KERN_DEBUG "Undo %s " NIPQUAD_FMT "/%u c%u l%u ss%u/%u p%u\n",
2314 NIPQUAD(inet->daddr), ntohs(inet->dport), 2314 msg,
2315 tp->snd_cwnd, tcp_left_out(tp), 2315 NIPQUAD(inet->daddr), ntohs(inet->dport),
2316 tp->snd_ssthresh, tp->prior_ssthresh, 2316 tp->snd_cwnd, tcp_left_out(tp),
2317 tp->packets_out); 2317 tp->snd_ssthresh, tp->prior_ssthresh,
2318 tp->packets_out);
2319 }
2320#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2321 else if (sk->sk_family == AF_INET6) {
2322 struct ipv6_pinfo *np = inet6_sk(sk);
2323 printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n",
2324 msg,
2325 NIP6(np->daddr), ntohs(inet->dport),
2326 tp->snd_cwnd, tcp_left_out(tp),
2327 tp->snd_ssthresh, tp->prior_ssthresh,
2328 tp->packets_out);
2329 }
2330#endif
2318} 2331}
2319#else 2332#else
2320#define DBGUNDO(x...) do { } while (0) 2333#define DBGUNDO(x...) do { } while (0)
@@ -3592,7 +3605,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
3592 * cases we should never reach this piece of code. 3605 * cases we should never reach this piece of code.
3593 */ 3606 */
3594 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", 3607 printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
3595 __FUNCTION__, sk->sk_state); 3608 __func__, sk->sk_state);
3596 break; 3609 break;
3597 } 3610 }
3598 3611
@@ -4012,7 +4025,7 @@ drop:
4012 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 4025 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4013 4026
4014 if (seq == TCP_SKB_CB(skb1)->end_seq) { 4027 if (seq == TCP_SKB_CB(skb1)->end_seq) {
4015 __skb_append(skb1, skb, &tp->out_of_order_queue); 4028 __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
4016 4029
4017 if (!tp->rx_opt.num_sacks || 4030 if (!tp->rx_opt.num_sacks ||
4018 tp->selective_acks[0].end_seq != seq) 4031 tp->selective_acks[0].end_seq != seq)
@@ -4508,6 +4521,49 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4508 } 4521 }
4509} 4522}
4510 4523
4524static int tcp_defer_accept_check(struct sock *sk)
4525{
4526 struct tcp_sock *tp = tcp_sk(sk);
4527
4528 if (tp->defer_tcp_accept.request) {
4529 int queued_data = tp->rcv_nxt - tp->copied_seq;
4530 int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
4531 tcp_hdr((struct sk_buff *)
4532 sk->sk_receive_queue.prev)->fin : 0;
4533
4534 if (queued_data && hasfin)
4535 queued_data--;
4536
4537 if (queued_data &&
4538 tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
4539 if (sock_flag(sk, SOCK_KEEPOPEN)) {
4540 inet_csk_reset_keepalive_timer(sk,
4541 keepalive_time_when(tp));
4542 } else {
4543 inet_csk_delete_keepalive_timer(sk);
4544 }
4545
4546 inet_csk_reqsk_queue_add(
4547 tp->defer_tcp_accept.listen_sk,
4548 tp->defer_tcp_accept.request,
4549 sk);
4550
4551 tp->defer_tcp_accept.listen_sk->sk_data_ready(
4552 tp->defer_tcp_accept.listen_sk, 0);
4553
4554 sock_put(tp->defer_tcp_accept.listen_sk);
4555 sock_put(sk);
4556 tp->defer_tcp_accept.listen_sk = NULL;
4557 tp->defer_tcp_accept.request = NULL;
4558 } else if (hasfin ||
4559 tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
4560 tcp_reset(sk);
4561 return -1;
4562 }
4563 }
4564 return 0;
4565}
4566
4511static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) 4567static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4512{ 4568{
4513 struct tcp_sock *tp = tcp_sk(sk); 4569 struct tcp_sock *tp = tcp_sk(sk);
@@ -4868,6 +4924,9 @@ step5:
4868 4924
4869 tcp_data_snd_check(sk); 4925 tcp_data_snd_check(sk);
4870 tcp_ack_snd_check(sk); 4926 tcp_ack_snd_check(sk);
4927
4928 if (tcp_defer_accept_check(sk))
4929 return -1;
4871 return 0; 4930 return 0;
4872 4931
4873csum_error: 4932csum_error:
@@ -5387,6 +5446,7 @@ discard:
5387 5446
5388EXPORT_SYMBOL(sysctl_tcp_ecn); 5447EXPORT_SYMBOL(sysctl_tcp_ecn);
5389EXPORT_SYMBOL(sysctl_tcp_reordering); 5448EXPORT_SYMBOL(sysctl_tcp_reordering);
5449EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
5390EXPORT_SYMBOL(tcp_parse_options); 5450EXPORT_SYMBOL(tcp_parse_options);
5391EXPORT_SYMBOL(tcp_rcv_established); 5451EXPORT_SYMBOL(tcp_rcv_established);
5392EXPORT_SYMBOL(tcp_rcv_state_process); 5452EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 00156bf421ca..776615180b93 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -88,9 +88,6 @@ int sysctl_tcp_low_latency __read_mostly;
88/* Check TCP sequence numbers in ICMP packets. */ 88/* Check TCP sequence numbers in ICMP packets. */
89#define ICMP_MIN_LENGTH 8 89#define ICMP_MIN_LENGTH 8
90 90
91/* Socket used for sending RSTs */
92static struct socket *tcp_socket __read_mostly;
93
94void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); 91void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
95 92
96#ifdef CONFIG_TCP_MD5SIG 93#ifdef CONFIG_TCP_MD5SIG
@@ -353,7 +350,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
353 return; 350 return;
354 } 351 }
355 352
356 sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest, 353 sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest,
357 iph->saddr, th->source, inet_iif(skb)); 354 iph->saddr, th->source, inet_iif(skb));
358 if (!sk) { 355 if (!sk) {
359 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 356 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
@@ -552,7 +549,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
552 if (th->rst) 549 if (th->rst)
553 return; 550 return;
554 551
555 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) 552 if (skb->rtable->rt_type != RTN_LOCAL)
556 return; 553 return;
557 554
558 /* Swap the send and the receive. */ 555 /* Swap the send and the receive. */
@@ -598,7 +595,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
598 sizeof(struct tcphdr), IPPROTO_TCP, 0); 595 sizeof(struct tcphdr), IPPROTO_TCP, 0);
599 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 596 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
600 597
601 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); 598 ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb,
599 &arg, arg.iov[0].iov_len);
602 600
603 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 601 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
604 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); 602 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
@@ -693,7 +691,8 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
693 if (twsk) 691 if (twsk)
694 arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; 692 arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
695 693
696 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); 694 ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb,
695 &arg, arg.iov[0].iov_len);
697 696
698 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 697 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
699} 698}
@@ -723,8 +722,8 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
723 * This still operates on a request_sock only, not on a big 722 * This still operates on a request_sock only, not on a big
724 * socket. 723 * socket.
725 */ 724 */
726static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, 725static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
727 struct dst_entry *dst) 726 struct dst_entry *dst)
728{ 727{
729 const struct inet_request_sock *ireq = inet_rsk(req); 728 const struct inet_request_sock *ireq = inet_rsk(req);
730 int err = -1; 729 int err = -1;
@@ -732,7 +731,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
732 731
733 /* First, grab a route. */ 732 /* First, grab a route. */
734 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 733 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
735 goto out; 734 return -1;
736 735
737 skb = tcp_make_synack(sk, dst, req); 736 skb = tcp_make_synack(sk, dst, req);
738 737
@@ -751,11 +750,15 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
751 err = net_xmit_eval(err); 750 err = net_xmit_eval(err);
752 } 751 }
753 752
754out:
755 dst_release(dst); 753 dst_release(dst);
756 return err; 754 return err;
757} 755}
758 756
757static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
758{
759 return __tcp_v4_send_synack(sk, req, NULL);
760}
761
759/* 762/*
760 * IPv4 request_sock destructor. 763 * IPv4 request_sock destructor.
761 */ 764 */
@@ -1258,8 +1261,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1258#endif 1261#endif
1259 1262
1260 /* Never answer to SYNs send to broadcast or multicast */ 1263 /* Never answer to SYNs send to broadcast or multicast */
1261 if (((struct rtable *)skb->dst)->rt_flags & 1264 if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1262 (RTCF_BROADCAST | RTCF_MULTICAST))
1263 goto drop; 1265 goto drop;
1264 1266
1265 /* TW buckets are converted to open requests without 1267 /* TW buckets are converted to open requests without
@@ -1297,10 +1299,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1297 1299
1298 tcp_parse_options(skb, &tmp_opt, 0); 1300 tcp_parse_options(skb, &tmp_opt, 0);
1299 1301
1300 if (want_cookie) { 1302 if (want_cookie && !tmp_opt.saw_tstamp)
1301 tcp_clear_options(&tmp_opt); 1303 tcp_clear_options(&tmp_opt);
1302 tmp_opt.saw_tstamp = 0;
1303 }
1304 1304
1305 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { 1305 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1306 /* Some OSes (unknown ones, but I see them on web server, which 1306 /* Some OSes (unknown ones, but I see them on web server, which
@@ -1328,6 +1328,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1328 if (want_cookie) { 1328 if (want_cookie) {
1329#ifdef CONFIG_SYN_COOKIES 1329#ifdef CONFIG_SYN_COOKIES
1330 syn_flood_warning(skb); 1330 syn_flood_warning(skb);
1331 req->cookie_ts = tmp_opt.tstamp_ok;
1331#endif 1332#endif
1332 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1333 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1333 } else if (!isn) { 1334 } else if (!isn) {
@@ -1351,8 +1352,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1351 (s32)(peer->tcp_ts - req->ts_recent) > 1352 (s32)(peer->tcp_ts - req->ts_recent) >
1352 TCP_PAWS_WINDOW) { 1353 TCP_PAWS_WINDOW) {
1353 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); 1354 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1354 dst_release(dst); 1355 goto drop_and_release;
1355 goto drop_and_free;
1356 } 1356 }
1357 } 1357 }
1358 /* Kill the following clause, if you dislike this way. */ 1358 /* Kill the following clause, if you dislike this way. */
@@ -1369,27 +1369,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1369 * to the moment of synflood. 1369 * to the moment of synflood.
1370 */ 1370 */
1371 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " 1371 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1372 "request from %u.%u.%u.%u/%u\n", 1372 "request from " NIPQUAD_FMT "/%u\n",
1373 NIPQUAD(saddr), 1373 NIPQUAD(saddr),
1374 ntohs(tcp_hdr(skb)->source)); 1374 ntohs(tcp_hdr(skb)->source));
1375 dst_release(dst); 1375 goto drop_and_release;
1376 goto drop_and_free;
1377 } 1376 }
1378 1377
1379 isn = tcp_v4_init_sequence(skb); 1378 isn = tcp_v4_init_sequence(skb);
1380 } 1379 }
1381 tcp_rsk(req)->snt_isn = isn; 1380 tcp_rsk(req)->snt_isn = isn;
1382 1381
1383 if (tcp_v4_send_synack(sk, req, dst)) 1382 if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
1384 goto drop_and_free; 1383 goto drop_and_free;
1385 1384
1386 if (want_cookie) { 1385 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1387 reqsk_free(req);
1388 } else {
1389 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1390 }
1391 return 0; 1386 return 0;
1392 1387
1388drop_and_release:
1389 dst_release(dst);
1393drop_and_free: 1390drop_and_free:
1394 reqsk_free(req); 1391 reqsk_free(req);
1395drop: 1392drop:
@@ -1487,7 +1484,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1487 if (req) 1484 if (req)
1488 return tcp_check_req(sk, skb, req, prev); 1485 return tcp_check_req(sk, skb, req, prev);
1489 1486
1490 nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr, 1487 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1491 th->source, iph->daddr, th->dest, inet_iif(skb)); 1488 th->source, iph->daddr, th->dest, inet_iif(skb));
1492 1489
1493 if (nsk) { 1490 if (nsk) {
@@ -1645,7 +1642,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1645 TCP_SKB_CB(skb)->flags = iph->tos; 1642 TCP_SKB_CB(skb)->flags = iph->tos;
1646 TCP_SKB_CB(skb)->sacked = 0; 1643 TCP_SKB_CB(skb)->sacked = 0;
1647 1644
1648 sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr, 1645 sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr,
1649 th->source, iph->daddr, th->dest, inet_iif(skb)); 1646 th->source, iph->daddr, th->dest, inet_iif(skb));
1650 if (!sk) 1647 if (!sk)
1651 goto no_tcp_socket; 1648 goto no_tcp_socket;
@@ -1719,7 +1716,7 @@ do_time_wait:
1719 } 1716 }
1720 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1717 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1721 case TCP_TW_SYN: { 1718 case TCP_TW_SYN: {
1722 struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net, 1719 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1723 &tcp_hashinfo, 1720 &tcp_hashinfo,
1724 iph->daddr, th->dest, 1721 iph->daddr, th->dest,
1725 inet_iif(skb)); 1722 inet_iif(skb));
@@ -1921,6 +1918,14 @@ int tcp_v4_destroy_sock(struct sock *sk)
1921 sk->sk_sndmsg_page = NULL; 1918 sk->sk_sndmsg_page = NULL;
1922 } 1919 }
1923 1920
1921 if (tp->defer_tcp_accept.request) {
1922 reqsk_free(tp->defer_tcp_accept.request);
1923 sock_put(tp->defer_tcp_accept.listen_sk);
1924 sock_put(sk);
1925 tp->defer_tcp_accept.listen_sk = NULL;
1926 tp->defer_tcp_accept.request = NULL;
1927 }
1928
1924 atomic_dec(&tcp_sockets_allocated); 1929 atomic_dec(&tcp_sockets_allocated);
1925 1930
1926 return 0; 1931 return 0;
@@ -1949,6 +1954,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1949 struct hlist_node *node; 1954 struct hlist_node *node;
1950 struct sock *sk = cur; 1955 struct sock *sk = cur;
1951 struct tcp_iter_state* st = seq->private; 1956 struct tcp_iter_state* st = seq->private;
1957 struct net *net = seq_file_net(seq);
1952 1958
1953 if (!sk) { 1959 if (!sk) {
1954 st->bucket = 0; 1960 st->bucket = 0;
@@ -1965,7 +1971,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1965 req = req->dl_next; 1971 req = req->dl_next;
1966 while (1) { 1972 while (1) {
1967 while (req) { 1973 while (req) {
1968 if (req->rsk_ops->family == st->family) { 1974 if (req->rsk_ops->family == st->family &&
1975 net_eq(sock_net(req->sk), net)) {
1969 cur = req; 1976 cur = req;
1970 goto out; 1977 goto out;
1971 } 1978 }
@@ -1989,7 +1996,7 @@ get_req:
1989 } 1996 }
1990get_sk: 1997get_sk:
1991 sk_for_each_from(sk, node) { 1998 sk_for_each_from(sk, node) {
1992 if (sk->sk_family == st->family) { 1999 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
1993 cur = sk; 2000 cur = sk;
1994 goto out; 2001 goto out;
1995 } 2002 }
@@ -2028,6 +2035,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2028static void *established_get_first(struct seq_file *seq) 2035static void *established_get_first(struct seq_file *seq)
2029{ 2036{
2030 struct tcp_iter_state* st = seq->private; 2037 struct tcp_iter_state* st = seq->private;
2038 struct net *net = seq_file_net(seq);
2031 void *rc = NULL; 2039 void *rc = NULL;
2032 2040
2033 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { 2041 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
@@ -2038,7 +2046,8 @@ static void *established_get_first(struct seq_file *seq)
2038 2046
2039 read_lock_bh(lock); 2047 read_lock_bh(lock);
2040 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 2048 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2041 if (sk->sk_family != st->family) { 2049 if (sk->sk_family != st->family ||
2050 !net_eq(sock_net(sk), net)) {
2042 continue; 2051 continue;
2043 } 2052 }
2044 rc = sk; 2053 rc = sk;
@@ -2047,7 +2056,8 @@ static void *established_get_first(struct seq_file *seq)
2047 st->state = TCP_SEQ_STATE_TIME_WAIT; 2056 st->state = TCP_SEQ_STATE_TIME_WAIT;
2048 inet_twsk_for_each(tw, node, 2057 inet_twsk_for_each(tw, node,
2049 &tcp_hashinfo.ehash[st->bucket].twchain) { 2058 &tcp_hashinfo.ehash[st->bucket].twchain) {
2050 if (tw->tw_family != st->family) { 2059 if (tw->tw_family != st->family ||
2060 !net_eq(twsk_net(tw), net)) {
2051 continue; 2061 continue;
2052 } 2062 }
2053 rc = tw; 2063 rc = tw;
@@ -2066,6 +2076,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
2066 struct inet_timewait_sock *tw; 2076 struct inet_timewait_sock *tw;
2067 struct hlist_node *node; 2077 struct hlist_node *node;
2068 struct tcp_iter_state* st = seq->private; 2078 struct tcp_iter_state* st = seq->private;
2079 struct net *net = seq_file_net(seq);
2069 2080
2070 ++st->num; 2081 ++st->num;
2071 2082
@@ -2073,7 +2084,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
2073 tw = cur; 2084 tw = cur;
2074 tw = tw_next(tw); 2085 tw = tw_next(tw);
2075get_tw: 2086get_tw:
2076 while (tw && tw->tw_family != st->family) { 2087 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2077 tw = tw_next(tw); 2088 tw = tw_next(tw);
2078 } 2089 }
2079 if (tw) { 2090 if (tw) {
@@ -2094,7 +2105,7 @@ get_tw:
2094 sk = sk_next(sk); 2105 sk = sk_next(sk);
2095 2106
2096 sk_for_each_from(sk, node) { 2107 sk_for_each_from(sk, node) {
2097 if (sk->sk_family == st->family) 2108 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2098 goto found; 2109 goto found;
2099 } 2110 }
2100 2111
@@ -2200,48 +2211,37 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2200static int tcp_seq_open(struct inode *inode, struct file *file) 2211static int tcp_seq_open(struct inode *inode, struct file *file)
2201{ 2212{
2202 struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 2213 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2203 struct seq_file *seq;
2204 struct tcp_iter_state *s; 2214 struct tcp_iter_state *s;
2205 int rc; 2215 int err;
2206 2216
2207 if (unlikely(afinfo == NULL)) 2217 if (unlikely(afinfo == NULL))
2208 return -EINVAL; 2218 return -EINVAL;
2209 2219
2210 s = kzalloc(sizeof(*s), GFP_KERNEL); 2220 err = seq_open_net(inode, file, &afinfo->seq_ops,
2211 if (!s) 2221 sizeof(struct tcp_iter_state));
2212 return -ENOMEM; 2222 if (err < 0)
2223 return err;
2224
2225 s = ((struct seq_file *)file->private_data)->private;
2213 s->family = afinfo->family; 2226 s->family = afinfo->family;
2214 s->seq_ops.start = tcp_seq_start; 2227 return 0;
2215 s->seq_ops.next = tcp_seq_next;
2216 s->seq_ops.show = afinfo->seq_show;
2217 s->seq_ops.stop = tcp_seq_stop;
2218
2219 rc = seq_open(file, &s->seq_ops);
2220 if (rc)
2221 goto out_kfree;
2222 seq = file->private_data;
2223 seq->private = s;
2224out:
2225 return rc;
2226out_kfree:
2227 kfree(s);
2228 goto out;
2229} 2228}
2230 2229
2231int tcp_proc_register(struct tcp_seq_afinfo *afinfo) 2230int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2232{ 2231{
2233 int rc = 0; 2232 int rc = 0;
2234 struct proc_dir_entry *p; 2233 struct proc_dir_entry *p;
2235 2234
2236 if (!afinfo) 2235 afinfo->seq_fops.open = tcp_seq_open;
2237 return -EINVAL; 2236 afinfo->seq_fops.read = seq_read;
2238 afinfo->seq_fops->owner = afinfo->owner; 2237 afinfo->seq_fops.llseek = seq_lseek;
2239 afinfo->seq_fops->open = tcp_seq_open; 2238 afinfo->seq_fops.release = seq_release_net;
2240 afinfo->seq_fops->read = seq_read; 2239
2241 afinfo->seq_fops->llseek = seq_lseek; 2240 afinfo->seq_ops.start = tcp_seq_start;
2242 afinfo->seq_fops->release = seq_release_private; 2241 afinfo->seq_ops.next = tcp_seq_next;
2242 afinfo->seq_ops.stop = tcp_seq_stop;
2243 2243
2244 p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); 2244 p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops);
2245 if (p) 2245 if (p)
2246 p->data = afinfo; 2246 p->data = afinfo;
2247 else 2247 else
@@ -2249,12 +2249,9 @@ int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2249 return rc; 2249 return rc;
2250} 2250}
2251 2251
2252void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) 2252void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2253{ 2253{
2254 if (!afinfo) 2254 proc_net_remove(net, afinfo->name);
2255 return;
2256 proc_net_remove(&init_net, afinfo->name);
2257 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2258} 2255}
2259 2256
2260static void get_openreq4(struct sock *sk, struct request_sock *req, 2257static void get_openreq4(struct sock *sk, struct request_sock *req,
@@ -2383,28 +2380,43 @@ out:
2383 return 0; 2380 return 0;
2384} 2381}
2385 2382
2386static struct file_operations tcp4_seq_fops;
2387static struct tcp_seq_afinfo tcp4_seq_afinfo = { 2383static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2388 .owner = THIS_MODULE,
2389 .name = "tcp", 2384 .name = "tcp",
2390 .family = AF_INET, 2385 .family = AF_INET,
2391 .seq_show = tcp4_seq_show, 2386 .seq_fops = {
2392 .seq_fops = &tcp4_seq_fops, 2387 .owner = THIS_MODULE,
2388 },
2389 .seq_ops = {
2390 .show = tcp4_seq_show,
2391 },
2392};
2393
2394static int tcp4_proc_init_net(struct net *net)
2395{
2396 return tcp_proc_register(net, &tcp4_seq_afinfo);
2397}
2398
2399static void tcp4_proc_exit_net(struct net *net)
2400{
2401 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2402}
2403
2404static struct pernet_operations tcp4_net_ops = {
2405 .init = tcp4_proc_init_net,
2406 .exit = tcp4_proc_exit_net,
2393}; 2407};
2394 2408
2395int __init tcp4_proc_init(void) 2409int __init tcp4_proc_init(void)
2396{ 2410{
2397 return tcp_proc_register(&tcp4_seq_afinfo); 2411 return register_pernet_subsys(&tcp4_net_ops);
2398} 2412}
2399 2413
2400void tcp4_proc_exit(void) 2414void tcp4_proc_exit(void)
2401{ 2415{
2402 tcp_proc_unregister(&tcp4_seq_afinfo); 2416 unregister_pernet_subsys(&tcp4_net_ops);
2403} 2417}
2404#endif /* CONFIG_PROC_FS */ 2418#endif /* CONFIG_PROC_FS */
2405 2419
2406DEFINE_PROTO_INUSE(tcp)
2407
2408struct proto tcp_prot = { 2420struct proto tcp_prot = {
2409 .name = "TCP", 2421 .name = "TCP",
2410 .owner = THIS_MODULE, 2422 .owner = THIS_MODULE,
@@ -2435,18 +2447,33 @@ struct proto tcp_prot = {
2435 .obj_size = sizeof(struct tcp_sock), 2447 .obj_size = sizeof(struct tcp_sock),
2436 .twsk_prot = &tcp_timewait_sock_ops, 2448 .twsk_prot = &tcp_timewait_sock_ops,
2437 .rsk_prot = &tcp_request_sock_ops, 2449 .rsk_prot = &tcp_request_sock_ops,
2438 .hashinfo = &tcp_hashinfo, 2450 .h.hashinfo = &tcp_hashinfo,
2439#ifdef CONFIG_COMPAT 2451#ifdef CONFIG_COMPAT
2440 .compat_setsockopt = compat_tcp_setsockopt, 2452 .compat_setsockopt = compat_tcp_setsockopt,
2441 .compat_getsockopt = compat_tcp_getsockopt, 2453 .compat_getsockopt = compat_tcp_getsockopt,
2442#endif 2454#endif
2443 REF_PROTO_INUSE(tcp)
2444}; 2455};
2445 2456
2446void __init tcp_v4_init(struct net_proto_family *ops) 2457
2458static int __net_init tcp_sk_init(struct net *net)
2459{
2460 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2461 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2462}
2463
2464static void __net_exit tcp_sk_exit(struct net *net)
2465{
2466 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2467}
2468
2469static struct pernet_operations __net_initdata tcp_sk_ops = {
2470 .init = tcp_sk_init,
2471 .exit = tcp_sk_exit,
2472};
2473
2474void __init tcp_v4_init(void)
2447{ 2475{
2448 if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, 2476 if (register_pernet_device(&tcp_sk_ops))
2449 IPPROTO_TCP) < 0)
2450 panic("Failed to create the TCP control socket.\n"); 2477 panic("Failed to create the TCP control socket.\n");
2451} 2478}
2452 2479
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b61b76847ad9..019c8c16e5cc 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -35,6 +35,8 @@
35#endif 35#endif
36 36
37int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; 37int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
38EXPORT_SYMBOL(sysctl_tcp_syncookies);
39
38int sysctl_tcp_abort_on_overflow __read_mostly; 40int sysctl_tcp_abort_on_overflow __read_mostly;
39 41
40struct inet_timewait_death_row tcp_death_row = { 42struct inet_timewait_death_row tcp_death_row = {
@@ -536,7 +538,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
536 * Enforce "SYN-ACK" according to figure 8, figure 6 538 * Enforce "SYN-ACK" according to figure 8, figure 6
537 * of RFC793, fixed by RFC1122. 539 * of RFC793, fixed by RFC1122.
538 */ 540 */
539 req->rsk_ops->rtx_syn_ack(sk, req, NULL); 541 req->rsk_ops->rtx_syn_ack(sk, req);
540 return NULL; 542 return NULL;
541 } 543 }
542 544
@@ -569,10 +571,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
569 does sequence test, SYN is truncated, and thus we consider 571 does sequence test, SYN is truncated, and thus we consider
570 it a bare ACK. 572 it a bare ACK.
571 573
572 If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this 574 Both ends (listening sockets) accept the new incoming
573 bare ACK. Otherwise, we create an established connection. Both 575 connection and try to talk to each other. 8-)
574 ends (listening sockets) accept the new incoming connection and try
575 to talk to each other. 8-)
576 576
577 Note: This case is both harmless, and rare. Possibility is about the 577 Note: This case is both harmless, and rare. Possibility is about the
578 same as us discovering intelligent life on another plant tomorrow. 578 same as us discovering intelligent life on another plant tomorrow.
@@ -640,13 +640,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
640 if (!(flg & TCP_FLAG_ACK)) 640 if (!(flg & TCP_FLAG_ACK))
641 return NULL; 641 return NULL;
642 642
643 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
644 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
645 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
646 inet_rsk(req)->acked = 1;
647 return NULL;
648 }
649
650 /* OK, ACK is valid, create big socket and 643 /* OK, ACK is valid, create big socket and
651 * feed this segment to it. It will repeat all 644 * feed this segment to it. It will repeat all
652 * the tests. THIS SEGMENT MUST MOVE SOCKET TO 645 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -685,7 +678,24 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
685 inet_csk_reqsk_queue_unlink(sk, req, prev); 678 inet_csk_reqsk_queue_unlink(sk, req, prev);
686 inet_csk_reqsk_queue_removed(sk, req); 679 inet_csk_reqsk_queue_removed(sk, req);
687 680
688 inet_csk_reqsk_queue_add(sk, req, child); 681 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
682 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
683
684 /* the accept queue handling is done is est recv slow
685 * path so lets make sure to start there
686 */
687 tcp_sk(child)->pred_flags = 0;
688 sock_hold(sk);
689 sock_hold(child);
690 tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
691 tcp_sk(child)->defer_tcp_accept.request = req;
692
693 inet_csk_reset_keepalive_timer(child,
694 inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
695 } else {
696 inet_csk_reqsk_queue_add(sk, req, child);
697 }
698
689 return child; 699 return child;
690 700
691 listen_overflow: 701 listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d29ef79c00ca..debf23581606 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -998,7 +998,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
998 xmit_size_goal = mss_now; 998 xmit_size_goal = mss_now;
999 999
1000 if (doing_tso) { 1000 if (doing_tso) {
1001 xmit_size_goal = (65535 - 1001 xmit_size_goal = ((sk->sk_gso_max_size - 1) -
1002 inet_csk(sk)->icsk_af_ops->net_header_len - 1002 inet_csk(sk)->icsk_af_ops->net_header_len -
1003 inet_csk(sk)->icsk_ext_hdr_len - 1003 inet_csk(sk)->icsk_ext_hdr_len -
1004 tp->tcp_header_len); 1004 tp->tcp_header_len);
@@ -1057,7 +1057,7 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
1057 1057
1058 needed = min(skb->len, window); 1058 needed = min(skb->len, window);
1059 1059
1060 if (skb == tcp_write_queue_tail(sk) && cwnd_len <= needed) 1060 if (cwnd_len <= needed)
1061 return cwnd_len; 1061 return cwnd_len;
1062 1062
1063 return needed - needed % mss_now; 1063 return needed - needed % mss_now;
@@ -1282,7 +1282,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1282 limit = min(send_win, cong_win); 1282 limit = min(send_win, cong_win);
1283 1283
1284 /* If a full-sized TSO skb can be sent, do it. */ 1284 /* If a full-sized TSO skb can be sent, do it. */
1285 if (limit >= 65536) 1285 if (limit >= sk->sk_gso_max_size)
1286 goto send_now; 1286 goto send_now;
1287 1287
1288 if (sysctl_tcp_tso_win_divisor) { 1288 if (sysctl_tcp_tso_win_divisor) {
@@ -2236,7 +2236,11 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2236 2236
2237 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 2237 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
2238 th->window = htons(min(req->rcv_wnd, 65535U)); 2238 th->window = htons(min(req->rcv_wnd, 65535U));
2239 2239#ifdef CONFIG_SYN_COOKIES
2240 if (unlikely(req->cookie_ts))
2241 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2242 else
2243#endif
2240 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2244 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2241 tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, 2245 tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
2242 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, 2246 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
@@ -2571,6 +2575,7 @@ void tcp_send_probe0(struct sock *sk)
2571 } 2575 }
2572} 2576}
2573 2577
2578EXPORT_SYMBOL(tcp_select_initial_window);
2574EXPORT_SYMBOL(tcp_connect); 2579EXPORT_SYMBOL(tcp_connect);
2575EXPORT_SYMBOL(tcp_make_synack); 2580EXPORT_SYMBOL(tcp_make_synack);
2576EXPORT_SYMBOL(tcp_simple_retransmit); 2581EXPORT_SYMBOL(tcp_simple_retransmit);
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 87dd5bff315f..1c509592574a 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -153,7 +153,7 @@ static int tcpprobe_sprint(char *tbuf, int n)
153 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); 153 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
154 154
155 return snprintf(tbuf, n, 155 return snprintf(tbuf, n,
156 "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u" 156 "%lu.%09lu " NIPQUAD_FMT ":%u " NIPQUAD_FMT ":%u"
157 " %d %#x %#x %u %u %u %u\n", 157 " %d %#x %#x %u %u %u %u\n",
158 (unsigned long) tv.tv_sec, 158 (unsigned long) tv.tv_sec,
159 (unsigned long) tv.tv_nsec, 159 (unsigned long) tv.tv_nsec,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 803d758a2b12..4de68cf5f2aa 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -299,12 +299,20 @@ static void tcp_retransmit_timer(struct sock *sk)
299 * we cannot allow such beasts to hang infinitely. 299 * we cannot allow such beasts to hang infinitely.
300 */ 300 */
301#ifdef TCP_DEBUG 301#ifdef TCP_DEBUG
302 if (1) { 302 struct inet_sock *inet = inet_sk(sk);
303 struct inet_sock *inet = inet_sk(sk); 303 if (sk->sk_family == AF_INET) {
304 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n", 304 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIPQUAD_FMT ":%u/%u shrinks window %u:%u. Repaired.\n",
305 NIPQUAD(inet->daddr), ntohs(inet->dport), 305 NIPQUAD(inet->daddr), ntohs(inet->dport),
306 inet->num, tp->snd_una, tp->snd_nxt); 306 inet->num, tp->snd_una, tp->snd_nxt);
307 } 307 }
308#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
309 else if (sk->sk_family == AF_INET6) {
310 struct ipv6_pinfo *np = inet6_sk(sk);
311 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIP6_FMT ":%u/%u shrinks window %u:%u. Repaired.\n",
312 NIP6(np->daddr), ntohs(inet->dport),
313 inet->num, tp->snd_una, tp->snd_nxt);
314 }
315#endif
308#endif 316#endif
309 if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { 317 if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
310 tcp_write_err(sk); 318 tcp_write_err(sk);
@@ -481,6 +489,11 @@ static void tcp_keepalive_timer (unsigned long data)
481 goto death; 489 goto death;
482 } 490 }
483 491
492 if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
493 tcp_send_active_reset(sk, GFP_ATOMIC);
494 goto death;
495 }
496
484 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) 497 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
485 goto out; 498 goto out;
486 499
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 978b3fd61e65..d3b709a6f264 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -136,6 +136,7 @@ static struct net_protocol tunnel4_protocol = {
136 .handler = tunnel4_rcv, 136 .handler = tunnel4_rcv,
137 .err_handler = tunnel4_err, 137 .err_handler = tunnel4_err,
138 .no_policy = 1, 138 .no_policy = 1,
139 .netns_ok = 1,
139}; 140};
140 141
141#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 142#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -143,6 +144,7 @@ static struct net_protocol tunnel64_protocol = {
143 .handler = tunnel64_rcv, 144 .handler = tunnel64_rcv,
144 .err_handler = tunnel64_err, 145 .err_handler = tunnel64_err,
145 .no_policy = 1, 146 .no_policy = 1,
147 .netns_ok = 1,
146}; 148};
147#endif 149#endif
148 150
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1704c1474ea1..b053ac795275 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -137,29 +137,28 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
137 struct hlist_node *node; 137 struct hlist_node *node;
138 138
139 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) 139 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
140 if (sk->sk_net == net && sk->sk_hash == num) 140 if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
141 return 1; 141 return 1;
142 return 0; 142 return 0;
143} 143}
144 144
145/** 145/**
146 * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 146 * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
147 * 147 *
148 * @sk: socket struct in question 148 * @sk: socket struct in question
149 * @snum: port number to look up 149 * @snum: port number to look up
150 * @udptable: hash list table, must be of UDP_HTABLE_SIZE
151 * @saddr_comp: AF-dependent comparison of bound local IP addresses 150 * @saddr_comp: AF-dependent comparison of bound local IP addresses
152 */ 151 */
153int __udp_lib_get_port(struct sock *sk, unsigned short snum, 152int udp_lib_get_port(struct sock *sk, unsigned short snum,
154 struct hlist_head udptable[],
155 int (*saddr_comp)(const struct sock *sk1, 153 int (*saddr_comp)(const struct sock *sk1,
156 const struct sock *sk2 ) ) 154 const struct sock *sk2 ) )
157{ 155{
156 struct hlist_head *udptable = sk->sk_prot->h.udp_hash;
158 struct hlist_node *node; 157 struct hlist_node *node;
159 struct hlist_head *head; 158 struct hlist_head *head;
160 struct sock *sk2; 159 struct sock *sk2;
161 int error = 1; 160 int error = 1;
162 struct net *net = sk->sk_net; 161 struct net *net = sock_net(sk);
163 162
164 write_lock_bh(&udp_hash_lock); 163 write_lock_bh(&udp_hash_lock);
165 164
@@ -219,7 +218,7 @@ gotit:
219 sk_for_each(sk2, node, head) 218 sk_for_each(sk2, node, head)
220 if (sk2->sk_hash == snum && 219 if (sk2->sk_hash == snum &&
221 sk2 != sk && 220 sk2 != sk &&
222 sk2->sk_net == net && 221 net_eq(sock_net(sk2), net) &&
223 (!sk2->sk_reuse || !sk->sk_reuse) && 222 (!sk2->sk_reuse || !sk->sk_reuse) &&
224 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 223 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
225 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 224 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
@@ -232,7 +231,7 @@ gotit:
232 if (sk_unhashed(sk)) { 231 if (sk_unhashed(sk)) {
233 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; 232 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
234 sk_add_node(sk, head); 233 sk_add_node(sk, head);
235 sock_prot_inuse_add(sk->sk_prot, 1); 234 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
236 } 235 }
237 error = 0; 236 error = 0;
238fail: 237fail:
@@ -240,13 +239,7 @@ fail:
240 return error; 239 return error;
241} 240}
242 241
243int udp_get_port(struct sock *sk, unsigned short snum, 242static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
244 int (*scmp)(const struct sock *, const struct sock *))
245{
246 return __udp_lib_get_port(sk, snum, udp_hash, scmp);
247}
248
249int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
250{ 243{
251 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 244 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
252 245
@@ -255,9 +248,9 @@ int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
255 inet1->rcv_saddr == inet2->rcv_saddr )); 248 inet1->rcv_saddr == inet2->rcv_saddr ));
256} 249}
257 250
258static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) 251int udp_v4_get_port(struct sock *sk, unsigned short snum)
259{ 252{
260 return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); 253 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
261} 254}
262 255
263/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 256/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
@@ -276,7 +269,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
276 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { 269 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
277 struct inet_sock *inet = inet_sk(sk); 270 struct inet_sock *inet = inet_sk(sk);
278 271
279 if (sk->sk_net == net && sk->sk_hash == hnum && 272 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
280 !ipv6_only_sock(sk)) { 273 !ipv6_only_sock(sk)) {
281 int score = (sk->sk_family == PF_INET ? 1 : 0); 274 int score = (sk->sk_family == PF_INET ? 1 : 0);
282 if (inet->rcv_saddr) { 275 if (inet->rcv_saddr) {
@@ -364,7 +357,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
364 int harderr; 357 int harderr;
365 int err; 358 int err;
366 359
367 sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, 360 sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest,
368 iph->saddr, uh->source, skb->dev->ifindex, udptable); 361 iph->saddr, uh->source, skb->dev->ifindex, udptable);
369 if (sk == NULL) { 362 if (sk == NULL) {
370 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 363 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
@@ -614,7 +607,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
614 607
615 ipc.oif = sk->sk_bound_dev_if; 608 ipc.oif = sk->sk_bound_dev_if;
616 if (msg->msg_controllen) { 609 if (msg->msg_controllen) {
617 err = ip_cmsg_send(msg, &ipc); 610 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
618 if (err) 611 if (err)
619 return err; 612 return err;
620 if (ipc.opt) 613 if (ipc.opt)
@@ -663,7 +656,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
663 { .sport = inet->sport, 656 { .sport = inet->sport,
664 .dport = dport } } }; 657 .dport = dport } } };
665 security_sk_classify_flow(sk, &fl); 658 security_sk_classify_flow(sk, &fl);
666 err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); 659 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
667 if (err) { 660 if (err) {
668 if (err == -ENETUNREACH) 661 if (err == -ENETUNREACH)
669 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 662 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
@@ -1188,7 +1181,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1188 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1181 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1189 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); 1182 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1190 1183
1191 sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, 1184 sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr,
1192 uh->dest, inet_iif(skb), udptable); 1185 uh->dest, inet_iif(skb), udptable);
1193 1186
1194 if (sk != NULL) { 1187 if (sk != NULL) {
@@ -1228,7 +1221,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1228 return 0; 1221 return 0;
1229 1222
1230short_packet: 1223short_packet:
1231 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", 1224 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n",
1232 proto == IPPROTO_UDPLITE ? "-Lite" : "", 1225 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1233 NIPQUAD(saddr), 1226 NIPQUAD(saddr),
1234 ntohs(uh->source), 1227 ntohs(uh->source),
@@ -1243,7 +1236,7 @@ csum_error:
1243 * RFC1122: OK. Discards the bad packet silently (as far as 1236 * RFC1122: OK. Discards the bad packet silently (as far as
1244 * the network is concerned, anyway) as per 4.1.3.4 (MUST). 1237 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1245 */ 1238 */
1246 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", 1239 LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n",
1247 proto == IPPROTO_UDPLITE ? "-Lite" : "", 1240 proto == IPPROTO_UDPLITE ? "-Lite" : "",
1248 NIPQUAD(saddr), 1241 NIPQUAD(saddr),
1249 ntohs(uh->source), 1242 ntohs(uh->source),
@@ -1474,8 +1467,6 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1474 1467
1475} 1468}
1476 1469
1477DEFINE_PROTO_INUSE(udp)
1478
1479struct proto udp_prot = { 1470struct proto udp_prot = {
1480 .name = "UDP", 1471 .name = "UDP",
1481 .owner = THIS_MODULE, 1472 .owner = THIS_MODULE,
@@ -1498,11 +1489,11 @@ struct proto udp_prot = {
1498 .sysctl_wmem = &sysctl_udp_wmem_min, 1489 .sysctl_wmem = &sysctl_udp_wmem_min,
1499 .sysctl_rmem = &sysctl_udp_rmem_min, 1490 .sysctl_rmem = &sysctl_udp_rmem_min,
1500 .obj_size = sizeof(struct udp_sock), 1491 .obj_size = sizeof(struct udp_sock),
1492 .h.udp_hash = udp_hash,
1501#ifdef CONFIG_COMPAT 1493#ifdef CONFIG_COMPAT
1502 .compat_setsockopt = compat_udp_setsockopt, 1494 .compat_setsockopt = compat_udp_setsockopt,
1503 .compat_getsockopt = compat_udp_getsockopt, 1495 .compat_getsockopt = compat_udp_getsockopt,
1504#endif 1496#endif
1505 REF_PROTO_INUSE(udp)
1506}; 1497};
1507 1498
1508/* ------------------------------------------------------------------------ */ 1499/* ------------------------------------------------------------------------ */
@@ -1512,10 +1503,13 @@ static struct sock *udp_get_first(struct seq_file *seq)
1512{ 1503{
1513 struct sock *sk; 1504 struct sock *sk;
1514 struct udp_iter_state *state = seq->private; 1505 struct udp_iter_state *state = seq->private;
1506 struct net *net = seq_file_net(seq);
1515 1507
1516 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { 1508 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
1517 struct hlist_node *node; 1509 struct hlist_node *node;
1518 sk_for_each(sk, node, state->hashtable + state->bucket) { 1510 sk_for_each(sk, node, state->hashtable + state->bucket) {
1511 if (!net_eq(sock_net(sk), net))
1512 continue;
1519 if (sk->sk_family == state->family) 1513 if (sk->sk_family == state->family)
1520 goto found; 1514 goto found;
1521 } 1515 }
@@ -1528,12 +1522,13 @@ found:
1528static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) 1522static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
1529{ 1523{
1530 struct udp_iter_state *state = seq->private; 1524 struct udp_iter_state *state = seq->private;
1525 struct net *net = seq_file_net(seq);
1531 1526
1532 do { 1527 do {
1533 sk = sk_next(sk); 1528 sk = sk_next(sk);
1534try_again: 1529try_again:
1535 ; 1530 ;
1536 } while (sk && sk->sk_family != state->family); 1531 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
1537 1532
1538 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { 1533 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
1539 sk = sk_head(state->hashtable + state->bucket); 1534 sk = sk_head(state->hashtable + state->bucket);
@@ -1581,47 +1576,36 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
1581static int udp_seq_open(struct inode *inode, struct file *file) 1576static int udp_seq_open(struct inode *inode, struct file *file)
1582{ 1577{
1583 struct udp_seq_afinfo *afinfo = PDE(inode)->data; 1578 struct udp_seq_afinfo *afinfo = PDE(inode)->data;
1584 struct seq_file *seq; 1579 struct udp_iter_state *s;
1585 int rc = -ENOMEM; 1580 int err;
1586 struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
1587 1581
1588 if (!s) 1582 err = seq_open_net(inode, file, &afinfo->seq_ops,
1589 goto out; 1583 sizeof(struct udp_iter_state));
1584 if (err < 0)
1585 return err;
1586
1587 s = ((struct seq_file *)file->private_data)->private;
1590 s->family = afinfo->family; 1588 s->family = afinfo->family;
1591 s->hashtable = afinfo->hashtable; 1589 s->hashtable = afinfo->hashtable;
1592 s->seq_ops.start = udp_seq_start; 1590 return err;
1593 s->seq_ops.next = udp_seq_next;
1594 s->seq_ops.show = afinfo->seq_show;
1595 s->seq_ops.stop = udp_seq_stop;
1596
1597 rc = seq_open(file, &s->seq_ops);
1598 if (rc)
1599 goto out_kfree;
1600
1601 seq = file->private_data;
1602 seq->private = s;
1603out:
1604 return rc;
1605out_kfree:
1606 kfree(s);
1607 goto out;
1608} 1591}
1609 1592
1610/* ------------------------------------------------------------------------ */ 1593/* ------------------------------------------------------------------------ */
1611int udp_proc_register(struct udp_seq_afinfo *afinfo) 1594int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
1612{ 1595{
1613 struct proc_dir_entry *p; 1596 struct proc_dir_entry *p;
1614 int rc = 0; 1597 int rc = 0;
1615 1598
1616 if (!afinfo) 1599 afinfo->seq_fops.open = udp_seq_open;
1617 return -EINVAL; 1600 afinfo->seq_fops.read = seq_read;
1618 afinfo->seq_fops->owner = afinfo->owner; 1601 afinfo->seq_fops.llseek = seq_lseek;
1619 afinfo->seq_fops->open = udp_seq_open; 1602 afinfo->seq_fops.release = seq_release_net;
1620 afinfo->seq_fops->read = seq_read; 1603
1621 afinfo->seq_fops->llseek = seq_lseek; 1604 afinfo->seq_ops.start = udp_seq_start;
1622 afinfo->seq_fops->release = seq_release_private; 1605 afinfo->seq_ops.next = udp_seq_next;
1606 afinfo->seq_ops.stop = udp_seq_stop;
1623 1607
1624 p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); 1608 p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops);
1625 if (p) 1609 if (p)
1626 p->data = afinfo; 1610 p->data = afinfo;
1627 else 1611 else
@@ -1629,12 +1613,9 @@ int udp_proc_register(struct udp_seq_afinfo *afinfo)
1629 return rc; 1613 return rc;
1630} 1614}
1631 1615
1632void udp_proc_unregister(struct udp_seq_afinfo *afinfo) 1616void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
1633{ 1617{
1634 if (!afinfo) 1618 proc_net_remove(net, afinfo->name);
1635 return;
1636 proc_net_remove(&init_net, afinfo->name);
1637 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
1638} 1619}
1639 1620
1640/* ------------------------------------------------------------------------ */ 1621/* ------------------------------------------------------------------------ */
@@ -1673,24 +1654,41 @@ int udp4_seq_show(struct seq_file *seq, void *v)
1673} 1654}
1674 1655
1675/* ------------------------------------------------------------------------ */ 1656/* ------------------------------------------------------------------------ */
1676static struct file_operations udp4_seq_fops;
1677static struct udp_seq_afinfo udp4_seq_afinfo = { 1657static struct udp_seq_afinfo udp4_seq_afinfo = {
1678 .owner = THIS_MODULE,
1679 .name = "udp", 1658 .name = "udp",
1680 .family = AF_INET, 1659 .family = AF_INET,
1681 .hashtable = udp_hash, 1660 .hashtable = udp_hash,
1682 .seq_show = udp4_seq_show, 1661 .seq_fops = {
1683 .seq_fops = &udp4_seq_fops, 1662 .owner = THIS_MODULE,
1663 },
1664 .seq_ops = {
1665 .show = udp4_seq_show,
1666 },
1667};
1668
1669static int udp4_proc_init_net(struct net *net)
1670{
1671 return udp_proc_register(net, &udp4_seq_afinfo);
1672}
1673
1674static void udp4_proc_exit_net(struct net *net)
1675{
1676 udp_proc_unregister(net, &udp4_seq_afinfo);
1677}
1678
1679static struct pernet_operations udp4_net_ops = {
1680 .init = udp4_proc_init_net,
1681 .exit = udp4_proc_exit_net,
1684}; 1682};
1685 1683
1686int __init udp4_proc_init(void) 1684int __init udp4_proc_init(void)
1687{ 1685{
1688 return udp_proc_register(&udp4_seq_afinfo); 1686 return register_pernet_subsys(&udp4_net_ops);
1689} 1687}
1690 1688
1691void udp4_proc_exit(void) 1689void udp4_proc_exit(void)
1692{ 1690{
1693 udp_proc_unregister(&udp4_seq_afinfo); 1691 unregister_pernet_subsys(&udp4_net_ops);
1694} 1692}
1695#endif /* CONFIG_PROC_FS */ 1693#endif /* CONFIG_PROC_FS */
1696 1694
@@ -1717,12 +1715,12 @@ EXPORT_SYMBOL(udp_disconnect);
1717EXPORT_SYMBOL(udp_hash); 1715EXPORT_SYMBOL(udp_hash);
1718EXPORT_SYMBOL(udp_hash_lock); 1716EXPORT_SYMBOL(udp_hash_lock);
1719EXPORT_SYMBOL(udp_ioctl); 1717EXPORT_SYMBOL(udp_ioctl);
1720EXPORT_SYMBOL(udp_get_port);
1721EXPORT_SYMBOL(udp_prot); 1718EXPORT_SYMBOL(udp_prot);
1722EXPORT_SYMBOL(udp_sendmsg); 1719EXPORT_SYMBOL(udp_sendmsg);
1723EXPORT_SYMBOL(udp_lib_getsockopt); 1720EXPORT_SYMBOL(udp_lib_getsockopt);
1724EXPORT_SYMBOL(udp_lib_setsockopt); 1721EXPORT_SYMBOL(udp_lib_setsockopt);
1725EXPORT_SYMBOL(udp_poll); 1722EXPORT_SYMBOL(udp_poll);
1723EXPORT_SYMBOL(udp_lib_get_port);
1726 1724
1727#ifdef CONFIG_PROC_FS 1725#ifdef CONFIG_PROC_FS
1728EXPORT_SYMBOL(udp_proc_register); 1726EXPORT_SYMBOL(udp_proc_register);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 6c55828e41ba..7288bf7977fb 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -8,11 +8,7 @@
8extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); 8extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int );
9extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); 9extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []);
10 10
11extern int __udp_lib_get_port(struct sock *sk, unsigned short snum, 11extern int udp_v4_get_port(struct sock *sk, unsigned short snum);
12 struct hlist_head udptable[],
13 int (*)(const struct sock*,const struct sock*));
14extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
15
16 12
17extern int udp_setsockopt(struct sock *sk, int level, int optname, 13extern int udp_setsockopt(struct sock *sk, int level, int optname,
18 char __user *optval, int optlen); 14 char __user *optval, int optlen);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 001b881ca36f..72ce26b6c4d3 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -17,17 +17,6 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly;
17 17
18struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; 18struct hlist_head udplite_hash[UDP_HTABLE_SIZE];
19 19
20int udplite_get_port(struct sock *sk, unsigned short p,
21 int (*c)(const struct sock *, const struct sock *))
22{
23 return __udp_lib_get_port(sk, p, udplite_hash, c);
24}
25
26static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
27{
28 return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal);
29}
30
31static int udplite_rcv(struct sk_buff *skb) 20static int udplite_rcv(struct sk_buff *skb)
32{ 21{
33 return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); 22 return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
@@ -42,10 +31,9 @@ static struct net_protocol udplite_protocol = {
42 .handler = udplite_rcv, 31 .handler = udplite_rcv,
43 .err_handler = udplite_err, 32 .err_handler = udplite_err,
44 .no_policy = 1, 33 .no_policy = 1,
34 .netns_ok = 1,
45}; 35};
46 36
47DEFINE_PROTO_INUSE(udplite)
48
49struct proto udplite_prot = { 37struct proto udplite_prot = {
50 .name = "UDP-Lite", 38 .name = "UDP-Lite",
51 .owner = THIS_MODULE, 39 .owner = THIS_MODULE,
@@ -63,13 +51,13 @@ struct proto udplite_prot = {
63 .backlog_rcv = udp_queue_rcv_skb, 51 .backlog_rcv = udp_queue_rcv_skb,
64 .hash = udp_lib_hash, 52 .hash = udp_lib_hash,
65 .unhash = udp_lib_unhash, 53 .unhash = udp_lib_unhash,
66 .get_port = udplite_v4_get_port, 54 .get_port = udp_v4_get_port,
67 .obj_size = sizeof(struct udp_sock), 55 .obj_size = sizeof(struct udp_sock),
56 .h.udp_hash = udplite_hash,
68#ifdef CONFIG_COMPAT 57#ifdef CONFIG_COMPAT
69 .compat_setsockopt = compat_udp_setsockopt, 58 .compat_setsockopt = compat_udp_setsockopt,
70 .compat_getsockopt = compat_udp_getsockopt, 59 .compat_getsockopt = compat_udp_getsockopt,
71#endif 60#endif
72 REF_PROTO_INUSE(udplite)
73}; 61};
74 62
75static struct inet_protosw udplite4_protosw = { 63static struct inet_protosw udplite4_protosw = {
@@ -83,15 +71,42 @@ static struct inet_protosw udplite4_protosw = {
83}; 71};
84 72
85#ifdef CONFIG_PROC_FS 73#ifdef CONFIG_PROC_FS
86static struct file_operations udplite4_seq_fops;
87static struct udp_seq_afinfo udplite4_seq_afinfo = { 74static struct udp_seq_afinfo udplite4_seq_afinfo = {
88 .owner = THIS_MODULE,
89 .name = "udplite", 75 .name = "udplite",
90 .family = AF_INET, 76 .family = AF_INET,
91 .hashtable = udplite_hash, 77 .hashtable = udplite_hash,
92 .seq_show = udp4_seq_show, 78 .seq_fops = {
93 .seq_fops = &udplite4_seq_fops, 79 .owner = THIS_MODULE,
80 },
81 .seq_ops = {
82 .show = udp4_seq_show,
83 },
84};
85
86static int udplite4_proc_init_net(struct net *net)
87{
88 return udp_proc_register(net, &udplite4_seq_afinfo);
89}
90
91static void udplite4_proc_exit_net(struct net *net)
92{
93 udp_proc_unregister(net, &udplite4_seq_afinfo);
94}
95
96static struct pernet_operations udplite4_net_ops = {
97 .init = udplite4_proc_init_net,
98 .exit = udplite4_proc_exit_net,
94}; 99};
100
101static __init int udplite4_proc_init(void)
102{
103 return register_pernet_subsys(&udplite4_net_ops);
104}
105#else
106static inline int udplite4_proc_init(void)
107{
108 return 0;
109}
95#endif 110#endif
96 111
97void __init udplite4_register(void) 112void __init udplite4_register(void)
@@ -104,18 +119,15 @@ void __init udplite4_register(void)
104 119
105 inet_register_protosw(&udplite4_protosw); 120 inet_register_protosw(&udplite4_protosw);
106 121
107#ifdef CONFIG_PROC_FS 122 if (udplite4_proc_init())
108 if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */ 123 printk(KERN_ERR "%s: Cannot register /proc!\n", __func__);
109 printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__);
110#endif
111 return; 124 return;
112 125
113out_unregister_proto: 126out_unregister_proto:
114 proto_unregister(&udplite_prot); 127 proto_unregister(&udplite_prot);
115out_register_err: 128out_register_err:
116 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __FUNCTION__); 129 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__);
117} 130}
118 131
119EXPORT_SYMBOL(udplite_hash); 132EXPORT_SYMBOL(udplite_hash);
120EXPORT_SYMBOL(udplite_prot); 133EXPORT_SYMBOL(udplite_prot);
121EXPORT_SYMBOL(udplite_get_port);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 10ed70491434..c63de0a72aba 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -221,7 +221,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
221 xdst = (struct xfrm_dst *)dst; 221 xdst = (struct xfrm_dst *)dst;
222 if (xdst->u.rt.idev->dev == dev) { 222 if (xdst->u.rt.idev->dev == dev) {
223 struct in_device *loopback_idev = 223 struct in_device *loopback_idev =
224 in_dev_get(dev->nd_net->loopback_dev); 224 in_dev_get(dev_net(dev)->loopback_dev);
225 BUG_ON(!loopback_idev); 225 BUG_ON(!loopback_idev);
226 226
227 do { 227 do {