diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-18 21:02:35 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-18 21:02:35 -0400 |
commit | 334d094504c2fe1c44211ecb49146ae6bca8c321 (patch) | |
tree | d3c0f68e4b9f8e3d2ccc39e7dfe5de0534a5fad9 /net/ipv4 | |
parent | d1a4be630fb068f251d64b62919f143c49ca8057 (diff) | |
parent | d1643d24c61b725bef399cc1cf2944b4c9c23177 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.26
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.26: (1090 commits)
[NET]: Fix and allocate less memory for ->priv'less netdevices
[IPV6]: Fix dangling references on error in fib6_add().
[NETLABEL]: Fix NULL deref in netlbl_unlabel_staticlist_gen() if ifindex not found
[PKT_SCHED]: Fix datalen check in tcf_simp_init().
[INET]: Uninline the __inet_inherit_port call.
[INET]: Drop the inet_inherit_port() call.
SCTP: Initialize partial_bytes_acked to 0, when all of the data is acked.
[netdrvr] forcedeth: internal simplifications; changelog removal
phylib: factor out get_phy_id from within get_phy_device
PHY: add BCM5464 support to broadcom PHY driver
cxgb3: Fix __must_check warning with dev_dbg.
tc35815: Statistics cleanup
natsemi: fix MMIO for PPC 44x platforms
[TIPC]: Cleanup of TIPC reference table code
[TIPC]: Optimized initialization of TIPC reference table
[TIPC]: Remove inlining of reference table locking routines
e1000: convert uint16_t style integers to u16
ixgb: convert uint16_t style integers to u16
sb1000.c: make const arrays static
sb1000.c: stop inlining largish static functions
...
Diffstat (limited to 'net/ipv4')
83 files changed, 2819 insertions, 1784 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0d109504ed86..f2b5270efdaa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -243,6 +243,23 @@ void build_ehash_secret(void) | |||
243 | } | 243 | } |
244 | EXPORT_SYMBOL(build_ehash_secret); | 244 | EXPORT_SYMBOL(build_ehash_secret); |
245 | 245 | ||
246 | static inline int inet_netns_ok(struct net *net, int protocol) | ||
247 | { | ||
248 | int hash; | ||
249 | struct net_protocol *ipprot; | ||
250 | |||
251 | if (net == &init_net) | ||
252 | return 1; | ||
253 | |||
254 | hash = protocol & (MAX_INET_PROTOS - 1); | ||
255 | ipprot = rcu_dereference(inet_protos[hash]); | ||
256 | |||
257 | if (ipprot == NULL) | ||
258 | /* raw IP is OK */ | ||
259 | return 1; | ||
260 | return ipprot->netns_ok; | ||
261 | } | ||
262 | |||
246 | /* | 263 | /* |
247 | * Create an inet socket. | 264 | * Create an inet socket. |
248 | */ | 265 | */ |
@@ -259,9 +276,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) | |||
259 | int try_loading_module = 0; | 276 | int try_loading_module = 0; |
260 | int err; | 277 | int err; |
261 | 278 | ||
262 | if (net != &init_net) | ||
263 | return -EAFNOSUPPORT; | ||
264 | |||
265 | if (sock->type != SOCK_RAW && | 279 | if (sock->type != SOCK_RAW && |
266 | sock->type != SOCK_DGRAM && | 280 | sock->type != SOCK_DGRAM && |
267 | !inet_ehash_secret) | 281 | !inet_ehash_secret) |
@@ -320,6 +334,10 @@ lookup_protocol: | |||
320 | if (answer->capability > 0 && !capable(answer->capability)) | 334 | if (answer->capability > 0 && !capable(answer->capability)) |
321 | goto out_rcu_unlock; | 335 | goto out_rcu_unlock; |
322 | 336 | ||
337 | err = -EAFNOSUPPORT; | ||
338 | if (!inet_netns_ok(net, protocol)) | ||
339 | goto out_rcu_unlock; | ||
340 | |||
323 | sock->ops = answer->ops; | 341 | sock->ops = answer->ops; |
324 | answer_prot = answer->prot; | 342 | answer_prot = answer->prot; |
325 | answer_no_check = answer->no_check; | 343 | answer_no_check = answer->no_check; |
@@ -446,7 +464,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
446 | if (addr_len < sizeof(struct sockaddr_in)) | 464 | if (addr_len < sizeof(struct sockaddr_in)) |
447 | goto out; | 465 | goto out; |
448 | 466 | ||
449 | chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr); | 467 | chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); |
450 | 468 | ||
451 | /* Not specified by any standard per-se, however it breaks too | 469 | /* Not specified by any standard per-se, however it breaks too |
452 | * many applications when removed. It is unfortunate since | 470 | * many applications when removed. It is unfortunate since |
@@ -784,6 +802,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
784 | { | 802 | { |
785 | struct sock *sk = sock->sk; | 803 | struct sock *sk = sock->sk; |
786 | int err = 0; | 804 | int err = 0; |
805 | struct net *net = sock_net(sk); | ||
787 | 806 | ||
788 | switch (cmd) { | 807 | switch (cmd) { |
789 | case SIOCGSTAMP: | 808 | case SIOCGSTAMP: |
@@ -795,12 +814,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
795 | case SIOCADDRT: | 814 | case SIOCADDRT: |
796 | case SIOCDELRT: | 815 | case SIOCDELRT: |
797 | case SIOCRTMSG: | 816 | case SIOCRTMSG: |
798 | err = ip_rt_ioctl(sk->sk_net, cmd, (void __user *)arg); | 817 | err = ip_rt_ioctl(net, cmd, (void __user *)arg); |
799 | break; | 818 | break; |
800 | case SIOCDARP: | 819 | case SIOCDARP: |
801 | case SIOCGARP: | 820 | case SIOCGARP: |
802 | case SIOCSARP: | 821 | case SIOCSARP: |
803 | err = arp_ioctl(sk->sk_net, cmd, (void __user *)arg); | 822 | err = arp_ioctl(net, cmd, (void __user *)arg); |
804 | break; | 823 | break; |
805 | case SIOCGIFADDR: | 824 | case SIOCGIFADDR: |
806 | case SIOCSIFADDR: | 825 | case SIOCSIFADDR: |
@@ -813,7 +832,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
813 | case SIOCSIFPFLAGS: | 832 | case SIOCSIFPFLAGS: |
814 | case SIOCGIFPFLAGS: | 833 | case SIOCGIFPFLAGS: |
815 | case SIOCSIFFLAGS: | 834 | case SIOCSIFFLAGS: |
816 | err = devinet_ioctl(cmd, (void __user *)arg); | 835 | err = devinet_ioctl(net, cmd, (void __user *)arg); |
817 | break; | 836 | break; |
818 | default: | 837 | default: |
819 | if (sk->sk_prot->ioctl) | 838 | if (sk->sk_prot->ioctl) |
@@ -1058,8 +1077,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) | |||
1058 | 1077 | ||
1059 | if (sysctl_ip_dynaddr > 1) { | 1078 | if (sysctl_ip_dynaddr > 1) { |
1060 | printk(KERN_INFO "%s(): shifting inet->" | 1079 | printk(KERN_INFO "%s(): shifting inet->" |
1061 | "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", | 1080 | "saddr from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n", |
1062 | __FUNCTION__, | 1081 | __func__, |
1063 | NIPQUAD(old_saddr), | 1082 | NIPQUAD(old_saddr), |
1064 | NIPQUAD(new_saddr)); | 1083 | NIPQUAD(new_saddr)); |
1065 | } | 1084 | } |
@@ -1113,7 +1132,7 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1113 | }; | 1132 | }; |
1114 | 1133 | ||
1115 | security_sk_classify_flow(sk, &fl); | 1134 | security_sk_classify_flow(sk, &fl); |
1116 | err = ip_route_output_flow(&init_net, &rt, &fl, sk, 0); | 1135 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); |
1117 | } | 1136 | } |
1118 | if (!err) | 1137 | if (!err) |
1119 | sk_setup_caps(sk, &rt->u.dst); | 1138 | sk_setup_caps(sk, &rt->u.dst); |
@@ -1231,6 +1250,29 @@ out: | |||
1231 | return segs; | 1250 | return segs; |
1232 | } | 1251 | } |
1233 | 1252 | ||
1253 | int inet_ctl_sock_create(struct sock **sk, unsigned short family, | ||
1254 | unsigned short type, unsigned char protocol, | ||
1255 | struct net *net) | ||
1256 | { | ||
1257 | struct socket *sock; | ||
1258 | int rc = sock_create_kern(family, type, protocol, &sock); | ||
1259 | |||
1260 | if (rc == 0) { | ||
1261 | *sk = sock->sk; | ||
1262 | (*sk)->sk_allocation = GFP_ATOMIC; | ||
1263 | /* | ||
1264 | * Unhash it so that IP input processing does not even see it, | ||
1265 | * we do not wish this socket to see incoming packets. | ||
1266 | */ | ||
1267 | (*sk)->sk_prot->unhash(*sk); | ||
1268 | |||
1269 | sk_change_net(*sk, net); | ||
1270 | } | ||
1271 | return rc; | ||
1272 | } | ||
1273 | |||
1274 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); | ||
1275 | |||
1234 | unsigned long snmp_fold_field(void *mib[], int offt) | 1276 | unsigned long snmp_fold_field(void *mib[], int offt) |
1235 | { | 1277 | { |
1236 | unsigned long res = 0; | 1278 | unsigned long res = 0; |
@@ -1283,17 +1325,20 @@ static struct net_protocol tcp_protocol = { | |||
1283 | .gso_send_check = tcp_v4_gso_send_check, | 1325 | .gso_send_check = tcp_v4_gso_send_check, |
1284 | .gso_segment = tcp_tso_segment, | 1326 | .gso_segment = tcp_tso_segment, |
1285 | .no_policy = 1, | 1327 | .no_policy = 1, |
1328 | .netns_ok = 1, | ||
1286 | }; | 1329 | }; |
1287 | 1330 | ||
1288 | static struct net_protocol udp_protocol = { | 1331 | static struct net_protocol udp_protocol = { |
1289 | .handler = udp_rcv, | 1332 | .handler = udp_rcv, |
1290 | .err_handler = udp_err, | 1333 | .err_handler = udp_err, |
1291 | .no_policy = 1, | 1334 | .no_policy = 1, |
1335 | .netns_ok = 1, | ||
1292 | }; | 1336 | }; |
1293 | 1337 | ||
1294 | static struct net_protocol icmp_protocol = { | 1338 | static struct net_protocol icmp_protocol = { |
1295 | .handler = icmp_rcv, | 1339 | .handler = icmp_rcv, |
1296 | .no_policy = 1, | 1340 | .no_policy = 1, |
1341 | .netns_ok = 1, | ||
1297 | }; | 1342 | }; |
1298 | 1343 | ||
1299 | static int __init init_ipv4_mibs(void) | 1344 | static int __init init_ipv4_mibs(void) |
@@ -1414,7 +1459,7 @@ static int __init inet_init(void) | |||
1414 | 1459 | ||
1415 | ip_init(); | 1460 | ip_init(); |
1416 | 1461 | ||
1417 | tcp_v4_init(&inet_family_ops); | 1462 | tcp_v4_init(); |
1418 | 1463 | ||
1419 | /* Setup TCP slab cache for open requests. */ | 1464 | /* Setup TCP slab cache for open requests. */ |
1420 | tcp_init(); | 1465 | tcp_init(); |
@@ -1429,7 +1474,8 @@ static int __init inet_init(void) | |||
1429 | * Set the ICMP layer up | 1474 | * Set the ICMP layer up |
1430 | */ | 1475 | */ |
1431 | 1476 | ||
1432 | icmp_init(&inet_family_ops); | 1477 | if (icmp_init() < 0) |
1478 | panic("Failed to create the ICMP control socket.\n"); | ||
1433 | 1479 | ||
1434 | /* | 1480 | /* |
1435 | * Initialise the multicast router | 1481 | * Initialise the multicast router |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8e17f65f4002..68b72a7a1806 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -242,7 +242,7 @@ static int arp_constructor(struct neighbour *neigh) | |||
242 | return -EINVAL; | 242 | return -EINVAL; |
243 | } | 243 | } |
244 | 244 | ||
245 | neigh->type = inet_addr_type(&init_net, addr); | 245 | neigh->type = inet_addr_type(dev_net(dev), addr); |
246 | 246 | ||
247 | parms = in_dev->arp_parms; | 247 | parms = in_dev->arp_parms; |
248 | __neigh_parms_put(neigh->parms); | 248 | __neigh_parms_put(neigh->parms); |
@@ -341,14 +341,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) | |||
341 | switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { | 341 | switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { |
342 | default: | 342 | default: |
343 | case 0: /* By default announce any local IP */ | 343 | case 0: /* By default announce any local IP */ |
344 | if (skb && inet_addr_type(&init_net, ip_hdr(skb)->saddr) == RTN_LOCAL) | 344 | if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL) |
345 | saddr = ip_hdr(skb)->saddr; | 345 | saddr = ip_hdr(skb)->saddr; |
346 | break; | 346 | break; |
347 | case 1: /* Restrict announcements of saddr in same subnet */ | 347 | case 1: /* Restrict announcements of saddr in same subnet */ |
348 | if (!skb) | 348 | if (!skb) |
349 | break; | 349 | break; |
350 | saddr = ip_hdr(skb)->saddr; | 350 | saddr = ip_hdr(skb)->saddr; |
351 | if (inet_addr_type(&init_net, saddr) == RTN_LOCAL) { | 351 | if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) { |
352 | /* saddr should be known to target */ | 352 | /* saddr should be known to target */ |
353 | if (inet_addr_onlink(in_dev, target, saddr)) | 353 | if (inet_addr_onlink(in_dev, target, saddr)) |
354 | break; | 354 | break; |
@@ -424,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) | |||
424 | int flag = 0; | 424 | int flag = 0; |
425 | /*unsigned long now; */ | 425 | /*unsigned long now; */ |
426 | 426 | ||
427 | if (ip_route_output_key(&init_net, &rt, &fl) < 0) | 427 | if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0) |
428 | return 1; | 428 | return 1; |
429 | if (rt->u.dst.dev != dev) { | 429 | if (rt->u.dst.dev != dev) { |
430 | NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); | 430 | NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); |
@@ -475,9 +475,9 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) | |||
475 | return 1; | 475 | return 1; |
476 | } | 476 | } |
477 | 477 | ||
478 | paddr = ((struct rtable*)skb->dst)->rt_gateway; | 478 | paddr = skb->rtable->rt_gateway; |
479 | 479 | ||
480 | if (arp_set_predefined(inet_addr_type(&init_net, paddr), haddr, paddr, dev)) | 480 | if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) |
481 | return 0; | 481 | return 0; |
482 | 482 | ||
483 | n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); | 483 | n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); |
@@ -570,14 +570,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | |||
570 | * Allocate a buffer | 570 | * Allocate a buffer |
571 | */ | 571 | */ |
572 | 572 | ||
573 | skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) | 573 | skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC); |
574 | + LL_RESERVED_SPACE(dev), GFP_ATOMIC); | ||
575 | if (skb == NULL) | 574 | if (skb == NULL) |
576 | return NULL; | 575 | return NULL; |
577 | 576 | ||
578 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 577 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); |
579 | skb_reset_network_header(skb); | 578 | skb_reset_network_header(skb); |
580 | arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); | 579 | arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); |
581 | skb->dev = dev; | 580 | skb->dev = dev; |
582 | skb->protocol = htons(ETH_P_ARP); | 581 | skb->protocol = htons(ETH_P_ARP); |
583 | if (src_hw == NULL) | 582 | if (src_hw == NULL) |
@@ -710,6 +709,7 @@ static int arp_process(struct sk_buff *skb) | |||
710 | u16 dev_type = dev->type; | 709 | u16 dev_type = dev->type; |
711 | int addr_type; | 710 | int addr_type; |
712 | struct neighbour *n; | 711 | struct neighbour *n; |
712 | struct net *net = dev_net(dev); | ||
713 | 713 | ||
714 | /* arp_rcv below verifies the ARP header and verifies the device | 714 | /* arp_rcv below verifies the ARP header and verifies the device |
715 | * is ARP'able. | 715 | * is ARP'able. |
@@ -805,7 +805,7 @@ static int arp_process(struct sk_buff *skb) | |||
805 | /* Special case: IPv4 duplicate address detection packet (RFC2131) */ | 805 | /* Special case: IPv4 duplicate address detection packet (RFC2131) */ |
806 | if (sip == 0) { | 806 | if (sip == 0) { |
807 | if (arp->ar_op == htons(ARPOP_REQUEST) && | 807 | if (arp->ar_op == htons(ARPOP_REQUEST) && |
808 | inet_addr_type(&init_net, tip) == RTN_LOCAL && | 808 | inet_addr_type(net, tip) == RTN_LOCAL && |
809 | !arp_ignore(in_dev, sip, tip)) | 809 | !arp_ignore(in_dev, sip, tip)) |
810 | arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, | 810 | arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, |
811 | dev->dev_addr, sha); | 811 | dev->dev_addr, sha); |
@@ -815,7 +815,7 @@ static int arp_process(struct sk_buff *skb) | |||
815 | if (arp->ar_op == htons(ARPOP_REQUEST) && | 815 | if (arp->ar_op == htons(ARPOP_REQUEST) && |
816 | ip_route_input(skb, tip, sip, 0, dev) == 0) { | 816 | ip_route_input(skb, tip, sip, 0, dev) == 0) { |
817 | 817 | ||
818 | rt = (struct rtable*)skb->dst; | 818 | rt = skb->rtable; |
819 | addr_type = rt->rt_type; | 819 | addr_type = rt->rt_type; |
820 | 820 | ||
821 | if (addr_type == RTN_LOCAL) { | 821 | if (addr_type == RTN_LOCAL) { |
@@ -835,7 +835,7 @@ static int arp_process(struct sk_buff *skb) | |||
835 | goto out; | 835 | goto out; |
836 | } else if (IN_DEV_FORWARD(in_dev)) { | 836 | } else if (IN_DEV_FORWARD(in_dev)) { |
837 | if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && | 837 | if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && |
838 | (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0))) { | 838 | (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { |
839 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); | 839 | n = neigh_event_ns(&arp_tbl, sha, &sip, dev); |
840 | if (n) | 840 | if (n) |
841 | neigh_release(n); | 841 | neigh_release(n); |
@@ -858,14 +858,14 @@ static int arp_process(struct sk_buff *skb) | |||
858 | 858 | ||
859 | n = __neigh_lookup(&arp_tbl, &sip, dev, 0); | 859 | n = __neigh_lookup(&arp_tbl, &sip, dev, 0); |
860 | 860 | ||
861 | if (IPV4_DEVCONF_ALL(dev->nd_net, ARP_ACCEPT)) { | 861 | if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) { |
862 | /* Unsolicited ARP is not accepted by default. | 862 | /* Unsolicited ARP is not accepted by default. |
863 | It is possible, that this option should be enabled for some | 863 | It is possible, that this option should be enabled for some |
864 | devices (strip is candidate) | 864 | devices (strip is candidate) |
865 | */ | 865 | */ |
866 | if (n == NULL && | 866 | if (n == NULL && |
867 | arp->ar_op == htons(ARPOP_REPLY) && | 867 | arp->ar_op == htons(ARPOP_REPLY) && |
868 | inet_addr_type(&init_net, sip) == RTN_UNICAST) | 868 | inet_addr_type(net, sip) == RTN_UNICAST) |
869 | n = __neigh_lookup(&arp_tbl, &sip, dev, 1); | 869 | n = __neigh_lookup(&arp_tbl, &sip, dev, 1); |
870 | } | 870 | } |
871 | 871 | ||
@@ -912,13 +912,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, | |||
912 | { | 912 | { |
913 | struct arphdr *arp; | 913 | struct arphdr *arp; |
914 | 914 | ||
915 | if (dev->nd_net != &init_net) | ||
916 | goto freeskb; | ||
917 | |||
918 | /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ | 915 | /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ |
919 | if (!pskb_may_pull(skb, (sizeof(struct arphdr) + | 916 | if (!pskb_may_pull(skb, arp_hdr_len(dev))) |
920 | (2 * dev->addr_len) + | ||
921 | (2 * sizeof(u32))))) | ||
922 | goto freeskb; | 917 | goto freeskb; |
923 | 918 | ||
924 | arp = arp_hdr(skb); | 919 | arp = arp_hdr(skb); |
@@ -1201,9 +1196,6 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
1201 | { | 1196 | { |
1202 | struct net_device *dev = ptr; | 1197 | struct net_device *dev = ptr; |
1203 | 1198 | ||
1204 | if (dev->nd_net != &init_net) | ||
1205 | return NOTIFY_DONE; | ||
1206 | |||
1207 | switch (event) { | 1199 | switch (event) { |
1208 | case NETDEV_CHANGEADDR: | 1200 | case NETDEV_CHANGEADDR: |
1209 | neigh_changeaddr(&arp_tbl, dev); | 1201 | neigh_changeaddr(&arp_tbl, dev); |
@@ -1318,7 +1310,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, | |||
1318 | #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) | 1310 | #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) |
1319 | } | 1311 | } |
1320 | #endif | 1312 | #endif |
1321 | sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->primary_key)); | 1313 | sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->primary_key)); |
1322 | seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", | 1314 | seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", |
1323 | tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); | 1315 | tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); |
1324 | read_unlock(&n->lock); | 1316 | read_unlock(&n->lock); |
@@ -1331,7 +1323,7 @@ static void arp_format_pneigh_entry(struct seq_file *seq, | |||
1331 | int hatype = dev ? dev->type : 0; | 1323 | int hatype = dev ? dev->type : 0; |
1332 | char tbuf[16]; | 1324 | char tbuf[16]; |
1333 | 1325 | ||
1334 | sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->key)); | 1326 | sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->key)); |
1335 | seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", | 1327 | seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", |
1336 | tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00", | 1328 | tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00", |
1337 | dev ? dev->name : "*"); | 1329 | dev ? dev->name : "*"); |
@@ -1385,13 +1377,29 @@ static const struct file_operations arp_seq_fops = { | |||
1385 | .release = seq_release_net, | 1377 | .release = seq_release_net, |
1386 | }; | 1378 | }; |
1387 | 1379 | ||
1388 | static int __init arp_proc_init(void) | 1380 | |
1381 | static int __net_init arp_net_init(struct net *net) | ||
1389 | { | 1382 | { |
1390 | if (!proc_net_fops_create(&init_net, "arp", S_IRUGO, &arp_seq_fops)) | 1383 | if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops)) |
1391 | return -ENOMEM; | 1384 | return -ENOMEM; |
1392 | return 0; | 1385 | return 0; |
1393 | } | 1386 | } |
1394 | 1387 | ||
1388 | static void __net_exit arp_net_exit(struct net *net) | ||
1389 | { | ||
1390 | proc_net_remove(net, "arp"); | ||
1391 | } | ||
1392 | |||
1393 | static struct pernet_operations arp_net_ops = { | ||
1394 | .init = arp_net_init, | ||
1395 | .exit = arp_net_exit, | ||
1396 | }; | ||
1397 | |||
1398 | static int __init arp_proc_init(void) | ||
1399 | { | ||
1400 | return register_pernet_subsys(&arp_net_ops); | ||
1401 | } | ||
1402 | |||
1395 | #else /* CONFIG_PROC_FS */ | 1403 | #else /* CONFIG_PROC_FS */ |
1396 | 1404 | ||
1397 | static int __init arp_proc_init(void) | 1405 | static int __init arp_proc_init(void) |
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 8cd357f41283..4637ded3dba8 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -1800,7 +1800,6 @@ int cipso_v4_sock_setattr(struct sock *sk, | |||
1800 | } | 1800 | } |
1801 | memcpy(opt->__data, buf, buf_len); | 1801 | memcpy(opt->__data, buf, buf_len); |
1802 | opt->optlen = opt_len; | 1802 | opt->optlen = opt_len; |
1803 | opt->is_data = 1; | ||
1804 | opt->cipso = sizeof(struct iphdr); | 1803 | opt->cipso = sizeof(struct iphdr); |
1805 | kfree(buf); | 1804 | kfree(buf); |
1806 | buf = NULL; | 1805 | buf = NULL; |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 87490f7bb0f7..6848e4760f34 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -165,7 +165,7 @@ static struct in_device *inetdev_init(struct net_device *dev) | |||
165 | if (!in_dev) | 165 | if (!in_dev) |
166 | goto out; | 166 | goto out; |
167 | INIT_RCU_HEAD(&in_dev->rcu_head); | 167 | INIT_RCU_HEAD(&in_dev->rcu_head); |
168 | memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt, | 168 | memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, |
169 | sizeof(in_dev->cnf)); | 169 | sizeof(in_dev->cnf)); |
170 | in_dev->cnf.sysctl = NULL; | 170 | in_dev->cnf.sysctl = NULL; |
171 | in_dev->dev = dev; | 171 | in_dev->dev = dev; |
@@ -437,7 +437,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, | |||
437 | 437 | ||
438 | static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 438 | static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
439 | { | 439 | { |
440 | struct net *net = skb->sk->sk_net; | 440 | struct net *net = sock_net(skb->sk); |
441 | struct nlattr *tb[IFA_MAX+1]; | 441 | struct nlattr *tb[IFA_MAX+1]; |
442 | struct in_device *in_dev; | 442 | struct in_device *in_dev; |
443 | struct ifaddrmsg *ifm; | 443 | struct ifaddrmsg *ifm; |
@@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg | |||
446 | 446 | ||
447 | ASSERT_RTNL(); | 447 | ASSERT_RTNL(); |
448 | 448 | ||
449 | if (net != &init_net) | ||
450 | return -EINVAL; | ||
451 | |||
452 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); | 449 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); |
453 | if (err < 0) | 450 | if (err < 0) |
454 | goto errout; | 451 | goto errout; |
@@ -555,14 +552,11 @@ errout: | |||
555 | 552 | ||
556 | static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | 553 | static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) |
557 | { | 554 | { |
558 | struct net *net = skb->sk->sk_net; | 555 | struct net *net = sock_net(skb->sk); |
559 | struct in_ifaddr *ifa; | 556 | struct in_ifaddr *ifa; |
560 | 557 | ||
561 | ASSERT_RTNL(); | 558 | ASSERT_RTNL(); |
562 | 559 | ||
563 | if (net != &init_net) | ||
564 | return -EINVAL; | ||
565 | |||
566 | ifa = rtm_to_ifaddr(net, nlh); | 560 | ifa = rtm_to_ifaddr(net, nlh); |
567 | if (IS_ERR(ifa)) | 561 | if (IS_ERR(ifa)) |
568 | return PTR_ERR(ifa); | 562 | return PTR_ERR(ifa); |
@@ -595,7 +589,7 @@ static __inline__ int inet_abc_len(__be32 addr) | |||
595 | } | 589 | } |
596 | 590 | ||
597 | 591 | ||
598 | int devinet_ioctl(unsigned int cmd, void __user *arg) | 592 | int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) |
599 | { | 593 | { |
600 | struct ifreq ifr; | 594 | struct ifreq ifr; |
601 | struct sockaddr_in sin_orig; | 595 | struct sockaddr_in sin_orig; |
@@ -624,7 +618,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) | |||
624 | *colon = 0; | 618 | *colon = 0; |
625 | 619 | ||
626 | #ifdef CONFIG_KMOD | 620 | #ifdef CONFIG_KMOD |
627 | dev_load(&init_net, ifr.ifr_name); | 621 | dev_load(net, ifr.ifr_name); |
628 | #endif | 622 | #endif |
629 | 623 | ||
630 | switch (cmd) { | 624 | switch (cmd) { |
@@ -665,7 +659,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) | |||
665 | rtnl_lock(); | 659 | rtnl_lock(); |
666 | 660 | ||
667 | ret = -ENODEV; | 661 | ret = -ENODEV; |
668 | if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) | 662 | if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) |
669 | goto done; | 663 | goto done; |
670 | 664 | ||
671 | if (colon) | 665 | if (colon) |
@@ -878,6 +872,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) | |||
878 | { | 872 | { |
879 | __be32 addr = 0; | 873 | __be32 addr = 0; |
880 | struct in_device *in_dev; | 874 | struct in_device *in_dev; |
875 | struct net *net = dev_net(dev); | ||
881 | 876 | ||
882 | rcu_read_lock(); | 877 | rcu_read_lock(); |
883 | in_dev = __in_dev_get_rcu(dev); | 878 | in_dev = __in_dev_get_rcu(dev); |
@@ -906,7 +901,7 @@ no_in_dev: | |||
906 | */ | 901 | */ |
907 | read_lock(&dev_base_lock); | 902 | read_lock(&dev_base_lock); |
908 | rcu_read_lock(); | 903 | rcu_read_lock(); |
909 | for_each_netdev(&init_net, dev) { | 904 | for_each_netdev(net, dev) { |
910 | if ((in_dev = __in_dev_get_rcu(dev)) == NULL) | 905 | if ((in_dev = __in_dev_get_rcu(dev)) == NULL) |
911 | continue; | 906 | continue; |
912 | 907 | ||
@@ -979,7 +974,7 @@ __be32 inet_confirm_addr(struct in_device *in_dev, | |||
979 | if (scope != RT_SCOPE_LINK) | 974 | if (scope != RT_SCOPE_LINK) |
980 | return confirm_addr_indev(in_dev, dst, local, scope); | 975 | return confirm_addr_indev(in_dev, dst, local, scope); |
981 | 976 | ||
982 | net = in_dev->dev->nd_net; | 977 | net = dev_net(in_dev->dev); |
983 | read_lock(&dev_base_lock); | 978 | read_lock(&dev_base_lock); |
984 | rcu_read_lock(); | 979 | rcu_read_lock(); |
985 | for_each_netdev(net, dev) { | 980 | for_each_netdev(net, dev) { |
@@ -1045,9 +1040,6 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1045 | struct net_device *dev = ptr; | 1040 | struct net_device *dev = ptr; |
1046 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 1041 | struct in_device *in_dev = __in_dev_get_rtnl(dev); |
1047 | 1042 | ||
1048 | if (dev->nd_net != &init_net) | ||
1049 | return NOTIFY_DONE; | ||
1050 | |||
1051 | ASSERT_RTNL(); | 1043 | ASSERT_RTNL(); |
1052 | 1044 | ||
1053 | if (!in_dev) { | 1045 | if (!in_dev) { |
@@ -1166,16 +1158,13 @@ nla_put_failure: | |||
1166 | 1158 | ||
1167 | static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | 1159 | static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) |
1168 | { | 1160 | { |
1169 | struct net *net = skb->sk->sk_net; | 1161 | struct net *net = sock_net(skb->sk); |
1170 | int idx, ip_idx; | 1162 | int idx, ip_idx; |
1171 | struct net_device *dev; | 1163 | struct net_device *dev; |
1172 | struct in_device *in_dev; | 1164 | struct in_device *in_dev; |
1173 | struct in_ifaddr *ifa; | 1165 | struct in_ifaddr *ifa; |
1174 | int s_ip_idx, s_idx = cb->args[0]; | 1166 | int s_ip_idx, s_idx = cb->args[0]; |
1175 | 1167 | ||
1176 | if (net != &init_net) | ||
1177 | return 0; | ||
1178 | |||
1179 | s_ip_idx = ip_idx = cb->args[1]; | 1168 | s_ip_idx = ip_idx = cb->args[1]; |
1180 | idx = 0; | 1169 | idx = 0; |
1181 | for_each_netdev(net, dev) { | 1170 | for_each_netdev(net, dev) { |
@@ -1214,7 +1203,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, | |||
1214 | int err = -ENOBUFS; | 1203 | int err = -ENOBUFS; |
1215 | struct net *net; | 1204 | struct net *net; |
1216 | 1205 | ||
1217 | net = ifa->ifa_dev->dev->nd_net; | 1206 | net = dev_net(ifa->ifa_dev->dev); |
1218 | skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); | 1207 | skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); |
1219 | if (skb == NULL) | 1208 | if (skb == NULL) |
1220 | goto errout; | 1209 | goto errout; |
@@ -1528,7 +1517,7 @@ static void devinet_sysctl_register(struct in_device *idev) | |||
1528 | { | 1517 | { |
1529 | neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, | 1518 | neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, |
1530 | NET_IPV4_NEIGH, "ipv4", NULL, NULL); | 1519 | NET_IPV4_NEIGH, "ipv4", NULL, NULL); |
1531 | __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name, | 1520 | __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, |
1532 | idev->dev->ifindex, &idev->cnf); | 1521 | idev->dev->ifindex, &idev->cnf); |
1533 | } | 1522 | } |
1534 | 1523 | ||
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 86ff2711fc95..0f1557a4ac7a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -257,7 +257,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
257 | if (in_dev == NULL) | 257 | if (in_dev == NULL) |
258 | goto e_inval; | 258 | goto e_inval; |
259 | 259 | ||
260 | net = dev->nd_net; | 260 | net = dev_net(dev); |
261 | if (fib_lookup(net, &fl, &res)) | 261 | if (fib_lookup(net, &fl, &res)) |
262 | goto last_resort; | 262 | goto last_resort; |
263 | if (res.type != RTN_UNICAST) | 263 | if (res.type != RTN_UNICAST) |
@@ -583,7 +583,7 @@ errout: | |||
583 | 583 | ||
584 | static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 584 | static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
585 | { | 585 | { |
586 | struct net *net = skb->sk->sk_net; | 586 | struct net *net = sock_net(skb->sk); |
587 | struct fib_config cfg; | 587 | struct fib_config cfg; |
588 | struct fib_table *tb; | 588 | struct fib_table *tb; |
589 | int err; | 589 | int err; |
@@ -605,7 +605,7 @@ errout: | |||
605 | 605 | ||
606 | static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) | 606 | static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
607 | { | 607 | { |
608 | struct net *net = skb->sk->sk_net; | 608 | struct net *net = sock_net(skb->sk); |
609 | struct fib_config cfg; | 609 | struct fib_config cfg; |
610 | struct fib_table *tb; | 610 | struct fib_table *tb; |
611 | int err; | 611 | int err; |
@@ -627,7 +627,7 @@ errout: | |||
627 | 627 | ||
628 | static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) | 628 | static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) |
629 | { | 629 | { |
630 | struct net *net = skb->sk->sk_net; | 630 | struct net *net = sock_net(skb->sk); |
631 | unsigned int h, s_h; | 631 | unsigned int h, s_h; |
632 | unsigned int e = 0, s_e; | 632 | unsigned int e = 0, s_e; |
633 | struct fib_table *tb; | 633 | struct fib_table *tb; |
@@ -674,7 +674,7 @@ out: | |||
674 | 674 | ||
675 | static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) | 675 | static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) |
676 | { | 676 | { |
677 | struct net *net = ifa->ifa_dev->dev->nd_net; | 677 | struct net *net = dev_net(ifa->ifa_dev->dev); |
678 | struct fib_table *tb; | 678 | struct fib_table *tb; |
679 | struct fib_config cfg = { | 679 | struct fib_config cfg = { |
680 | .fc_protocol = RTPROT_KERNEL, | 680 | .fc_protocol = RTPROT_KERNEL, |
@@ -801,15 +801,15 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) | |||
801 | fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); | 801 | fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); |
802 | 802 | ||
803 | /* Check, that this local address finally disappeared. */ | 803 | /* Check, that this local address finally disappeared. */ |
804 | if (inet_addr_type(dev->nd_net, ifa->ifa_local) != RTN_LOCAL) { | 804 | if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { |
805 | /* And the last, but not the least thing. | 805 | /* And the last, but not the least thing. |
806 | We must flush stray FIB entries. | 806 | We must flush stray FIB entries. |
807 | 807 | ||
808 | First of all, we scan fib_info list searching | 808 | First of all, we scan fib_info list searching |
809 | for stray nexthop entries, then ignite fib_flush. | 809 | for stray nexthop entries, then ignite fib_flush. |
810 | */ | 810 | */ |
811 | if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local)) | 811 | if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) |
812 | fib_flush(dev->nd_net); | 812 | fib_flush(dev_net(dev)); |
813 | } | 813 | } |
814 | } | 814 | } |
815 | #undef LOCAL_OK | 815 | #undef LOCAL_OK |
@@ -857,7 +857,7 @@ static void nl_fib_input(struct sk_buff *skb) | |||
857 | struct fib_table *tb; | 857 | struct fib_table *tb; |
858 | u32 pid; | 858 | u32 pid; |
859 | 859 | ||
860 | net = skb->sk->sk_net; | 860 | net = sock_net(skb->sk); |
861 | nlh = nlmsg_hdr(skb); | 861 | nlh = nlmsg_hdr(skb); |
862 | if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || | 862 | if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || |
863 | nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) | 863 | nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) |
@@ -899,7 +899,7 @@ static void nl_fib_lookup_exit(struct net *net) | |||
899 | static void fib_disable_ip(struct net_device *dev, int force) | 899 | static void fib_disable_ip(struct net_device *dev, int force) |
900 | { | 900 | { |
901 | if (fib_sync_down_dev(dev, force)) | 901 | if (fib_sync_down_dev(dev, force)) |
902 | fib_flush(dev->nd_net); | 902 | fib_flush(dev_net(dev)); |
903 | rt_cache_flush(0); | 903 | rt_cache_flush(0); |
904 | arp_ifdown(dev); | 904 | arp_ifdown(dev); |
905 | } | 905 | } |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 8d58d85dfac6..02088deb0461 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
@@ -821,7 +821,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq) | |||
821 | struct fib_table *main_table; | 821 | struct fib_table *main_table; |
822 | struct fn_hash *table; | 822 | struct fn_hash *table; |
823 | 823 | ||
824 | main_table = fib_get_table(iter->p.net, RT_TABLE_MAIN); | 824 | main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); |
825 | table = (struct fn_hash *)main_table->tb_data; | 825 | table = (struct fn_hash *)main_table->tb_data; |
826 | 826 | ||
827 | iter->bucket = 0; | 827 | iter->bucket = 0; |
@@ -959,11 +959,10 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos) | |||
959 | static void *fib_seq_start(struct seq_file *seq, loff_t *pos) | 959 | static void *fib_seq_start(struct seq_file *seq, loff_t *pos) |
960 | __acquires(fib_hash_lock) | 960 | __acquires(fib_hash_lock) |
961 | { | 961 | { |
962 | struct fib_iter_state *iter = seq->private; | ||
963 | void *v = NULL; | 962 | void *v = NULL; |
964 | 963 | ||
965 | read_lock(&fib_hash_lock); | 964 | read_lock(&fib_hash_lock); |
966 | if (fib_get_table(iter->p.net, RT_TABLE_MAIN)) | 965 | if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) |
967 | v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 966 | v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; |
968 | return v; | 967 | return v; |
969 | } | 968 | } |
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 19274d01afa4..1fb56876be54 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c | |||
@@ -137,7 +137,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, | |||
137 | struct nlmsghdr *nlh, struct fib_rule_hdr *frh, | 137 | struct nlmsghdr *nlh, struct fib_rule_hdr *frh, |
138 | struct nlattr **tb) | 138 | struct nlattr **tb) |
139 | { | 139 | { |
140 | struct net *net = skb->sk->sk_net; | 140 | struct net *net = sock_net(skb->sk); |
141 | int err = -EINVAL; | 141 | int err = -EINVAL; |
142 | struct fib4_rule *rule4 = (struct fib4_rule *) rule; | 142 | struct fib4_rule *rule4 = (struct fib4_rule *) rule; |
143 | 143 | ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a13c84763d4c..3b83c34019fc 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -152,6 +152,7 @@ void free_fib_info(struct fib_info *fi) | |||
152 | nh->nh_dev = NULL; | 152 | nh->nh_dev = NULL; |
153 | } endfor_nexthops(fi); | 153 | } endfor_nexthops(fi); |
154 | fib_info_cnt--; | 154 | fib_info_cnt--; |
155 | release_net(fi->fib_net); | ||
155 | kfree(fi); | 156 | kfree(fi); |
156 | } | 157 | } |
157 | 158 | ||
@@ -730,7 +731,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
730 | goto failure; | 731 | goto failure; |
731 | fib_info_cnt++; | 732 | fib_info_cnt++; |
732 | 733 | ||
733 | fi->fib_net = net; | 734 | fi->fib_net = hold_net(net); |
734 | fi->fib_protocol = cfg->fc_protocol; | 735 | fi->fib_protocol = cfg->fc_protocol; |
735 | fi->fib_flags = cfg->fc_flags; | 736 | fi->fib_flags = cfg->fc_flags; |
736 | fi->fib_priority = cfg->fc_priority; | 737 | fi->fib_priority = cfg->fc_priority; |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f6cdc012eec5..ea294fffb9ce 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -122,7 +122,10 @@ struct tnode { | |||
122 | unsigned char bits; /* 2log(KEYLENGTH) bits needed */ | 122 | unsigned char bits; /* 2log(KEYLENGTH) bits needed */ |
123 | unsigned int full_children; /* KEYLENGTH bits needed */ | 123 | unsigned int full_children; /* KEYLENGTH bits needed */ |
124 | unsigned int empty_children; /* KEYLENGTH bits needed */ | 124 | unsigned int empty_children; /* KEYLENGTH bits needed */ |
125 | struct rcu_head rcu; | 125 | union { |
126 | struct rcu_head rcu; | ||
127 | struct work_struct work; | ||
128 | }; | ||
126 | struct node *child[0]; | 129 | struct node *child[0]; |
127 | }; | 130 | }; |
128 | 131 | ||
@@ -160,7 +163,6 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | |||
160 | static struct node *resize(struct trie *t, struct tnode *tn); | 163 | static struct node *resize(struct trie *t, struct tnode *tn); |
161 | static struct tnode *inflate(struct trie *t, struct tnode *tn); | 164 | static struct tnode *inflate(struct trie *t, struct tnode *tn); |
162 | static struct tnode *halve(struct trie *t, struct tnode *tn); | 165 | static struct tnode *halve(struct trie *t, struct tnode *tn); |
163 | static void tnode_free(struct tnode *tn); | ||
164 | 166 | ||
165 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 167 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
166 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 168 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
@@ -334,6 +336,11 @@ static void __leaf_free_rcu(struct rcu_head *head) | |||
334 | kmem_cache_free(trie_leaf_kmem, l); | 336 | kmem_cache_free(trie_leaf_kmem, l); |
335 | } | 337 | } |
336 | 338 | ||
339 | static inline void free_leaf(struct leaf *l) | ||
340 | { | ||
341 | call_rcu_bh(&l->rcu, __leaf_free_rcu); | ||
342 | } | ||
343 | |||
337 | static void __leaf_info_free_rcu(struct rcu_head *head) | 344 | static void __leaf_info_free_rcu(struct rcu_head *head) |
338 | { | 345 | { |
339 | kfree(container_of(head, struct leaf_info, rcu)); | 346 | kfree(container_of(head, struct leaf_info, rcu)); |
@@ -346,16 +353,16 @@ static inline void free_leaf_info(struct leaf_info *leaf) | |||
346 | 353 | ||
347 | static struct tnode *tnode_alloc(size_t size) | 354 | static struct tnode *tnode_alloc(size_t size) |
348 | { | 355 | { |
349 | struct page *pages; | ||
350 | |||
351 | if (size <= PAGE_SIZE) | 356 | if (size <= PAGE_SIZE) |
352 | return kzalloc(size, GFP_KERNEL); | 357 | return kzalloc(size, GFP_KERNEL); |
358 | else | ||
359 | return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); | ||
360 | } | ||
353 | 361 | ||
354 | pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size)); | 362 | static void __tnode_vfree(struct work_struct *arg) |
355 | if (!pages) | 363 | { |
356 | return NULL; | 364 | struct tnode *tn = container_of(arg, struct tnode, work); |
357 | 365 | vfree(tn); | |
358 | return page_address(pages); | ||
359 | } | 366 | } |
360 | 367 | ||
361 | static void __tnode_free_rcu(struct rcu_head *head) | 368 | static void __tnode_free_rcu(struct rcu_head *head) |
@@ -366,16 +373,17 @@ static void __tnode_free_rcu(struct rcu_head *head) | |||
366 | 373 | ||
367 | if (size <= PAGE_SIZE) | 374 | if (size <= PAGE_SIZE) |
368 | kfree(tn); | 375 | kfree(tn); |
369 | else | 376 | else { |
370 | free_pages((unsigned long)tn, get_order(size)); | 377 | INIT_WORK(&tn->work, __tnode_vfree); |
378 | schedule_work(&tn->work); | ||
379 | } | ||
371 | } | 380 | } |
372 | 381 | ||
373 | static inline void tnode_free(struct tnode *tn) | 382 | static inline void tnode_free(struct tnode *tn) |
374 | { | 383 | { |
375 | if (IS_LEAF(tn)) { | 384 | if (IS_LEAF(tn)) |
376 | struct leaf *l = (struct leaf *) tn; | 385 | free_leaf((struct leaf *) tn); |
377 | call_rcu_bh(&l->rcu, __leaf_free_rcu); | 386 | else |
378 | } else | ||
379 | call_rcu(&tn->rcu, __tnode_free_rcu); | 387 | call_rcu(&tn->rcu, __tnode_free_rcu); |
380 | } | 388 | } |
381 | 389 | ||
@@ -1086,7 +1094,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1086 | li = leaf_info_new(plen); | 1094 | li = leaf_info_new(plen); |
1087 | 1095 | ||
1088 | if (!li) { | 1096 | if (!li) { |
1089 | tnode_free((struct tnode *) l); | 1097 | free_leaf(l); |
1090 | return NULL; | 1098 | return NULL; |
1091 | } | 1099 | } |
1092 | 1100 | ||
@@ -1122,7 +1130,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1122 | 1130 | ||
1123 | if (!tn) { | 1131 | if (!tn) { |
1124 | free_leaf_info(li); | 1132 | free_leaf_info(li); |
1125 | tnode_free((struct tnode *) l); | 1133 | free_leaf(l); |
1126 | return NULL; | 1134 | return NULL; |
1127 | } | 1135 | } |
1128 | 1136 | ||
@@ -1578,7 +1586,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) | |||
1578 | } else | 1586 | } else |
1579 | rcu_assign_pointer(t->trie, NULL); | 1587 | rcu_assign_pointer(t->trie, NULL); |
1580 | 1588 | ||
1581 | tnode_free((struct tnode *) l); | 1589 | free_leaf(l); |
1582 | } | 1590 | } |
1583 | 1591 | ||
1584 | /* | 1592 | /* |
@@ -1665,7 +1673,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) | |||
1665 | return 0; | 1673 | return 0; |
1666 | } | 1674 | } |
1667 | 1675 | ||
1668 | static int trie_flush_list(struct trie *t, struct list_head *head) | 1676 | static int trie_flush_list(struct list_head *head) |
1669 | { | 1677 | { |
1670 | struct fib_alias *fa, *fa_node; | 1678 | struct fib_alias *fa, *fa_node; |
1671 | int found = 0; | 1679 | int found = 0; |
@@ -1683,7 +1691,7 @@ static int trie_flush_list(struct trie *t, struct list_head *head) | |||
1683 | return found; | 1691 | return found; |
1684 | } | 1692 | } |
1685 | 1693 | ||
1686 | static int trie_flush_leaf(struct trie *t, struct leaf *l) | 1694 | static int trie_flush_leaf(struct leaf *l) |
1687 | { | 1695 | { |
1688 | int found = 0; | 1696 | int found = 0; |
1689 | struct hlist_head *lih = &l->list; | 1697 | struct hlist_head *lih = &l->list; |
@@ -1691,7 +1699,7 @@ static int trie_flush_leaf(struct trie *t, struct leaf *l) | |||
1691 | struct leaf_info *li = NULL; | 1699 | struct leaf_info *li = NULL; |
1692 | 1700 | ||
1693 | hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { | 1701 | hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { |
1694 | found += trie_flush_list(t, &li->falh); | 1702 | found += trie_flush_list(&li->falh); |
1695 | 1703 | ||
1696 | if (list_empty(&li->falh)) { | 1704 | if (list_empty(&li->falh)) { |
1697 | hlist_del_rcu(&li->hlist); | 1705 | hlist_del_rcu(&li->hlist); |
@@ -1782,7 +1790,7 @@ static int fn_trie_flush(struct fib_table *tb) | |||
1782 | int found = 0; | 1790 | int found = 0; |
1783 | 1791 | ||
1784 | for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { | 1792 | for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { |
1785 | found += trie_flush_leaf(t, l); | 1793 | found += trie_flush_leaf(l); |
1786 | 1794 | ||
1787 | if (ll && hlist_empty(&ll->list)) | 1795 | if (ll && hlist_empty(&ll->list)) |
1788 | trie_leaf_remove(t, ll); | 1796 | trie_leaf_remove(t, ll); |
@@ -2029,9 +2037,8 @@ struct fib_table *fib_hash_table(u32 id) | |||
2029 | /* Depth first Trie walk iterator */ | 2037 | /* Depth first Trie walk iterator */ |
2030 | struct fib_trie_iter { | 2038 | struct fib_trie_iter { |
2031 | struct seq_net_private p; | 2039 | struct seq_net_private p; |
2032 | struct trie *trie_local, *trie_main; | 2040 | struct fib_table *tb; |
2033 | struct tnode *tnode; | 2041 | struct tnode *tnode; |
2034 | struct trie *trie; | ||
2035 | unsigned index; | 2042 | unsigned index; |
2036 | unsigned depth; | 2043 | unsigned depth; |
2037 | }; | 2044 | }; |
@@ -2084,31 +2091,26 @@ rescan: | |||
2084 | static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | 2091 | static struct node *fib_trie_get_first(struct fib_trie_iter *iter, |
2085 | struct trie *t) | 2092 | struct trie *t) |
2086 | { | 2093 | { |
2087 | struct node *n ; | 2094 | struct node *n; |
2088 | 2095 | ||
2089 | if (!t) | 2096 | if (!t) |
2090 | return NULL; | 2097 | return NULL; |
2091 | 2098 | ||
2092 | n = rcu_dereference(t->trie); | 2099 | n = rcu_dereference(t->trie); |
2093 | 2100 | if (!n) | |
2094 | if (!iter) | ||
2095 | return NULL; | 2101 | return NULL; |
2096 | 2102 | ||
2097 | if (n) { | 2103 | if (IS_TNODE(n)) { |
2098 | if (IS_TNODE(n)) { | 2104 | iter->tnode = (struct tnode *) n; |
2099 | iter->tnode = (struct tnode *) n; | 2105 | iter->index = 0; |
2100 | iter->trie = t; | 2106 | iter->depth = 1; |
2101 | iter->index = 0; | 2107 | } else { |
2102 | iter->depth = 1; | 2108 | iter->tnode = NULL; |
2103 | } else { | 2109 | iter->index = 0; |
2104 | iter->tnode = NULL; | 2110 | iter->depth = 0; |
2105 | iter->trie = t; | ||
2106 | iter->index = 0; | ||
2107 | iter->depth = 0; | ||
2108 | } | ||
2109 | return n; | ||
2110 | } | 2111 | } |
2111 | return NULL; | 2112 | |
2113 | return n; | ||
2112 | } | 2114 | } |
2113 | 2115 | ||
2114 | static void trie_collect_stats(struct trie *t, struct trie_stat *s) | 2116 | static void trie_collect_stats(struct trie *t, struct trie_stat *s) |
@@ -2119,8 +2121,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) | |||
2119 | memset(s, 0, sizeof(*s)); | 2121 | memset(s, 0, sizeof(*s)); |
2120 | 2122 | ||
2121 | rcu_read_lock(); | 2123 | rcu_read_lock(); |
2122 | for (n = fib_trie_get_first(&iter, t); n; | 2124 | for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { |
2123 | n = fib_trie_get_next(&iter)) { | ||
2124 | if (IS_LEAF(n)) { | 2125 | if (IS_LEAF(n)) { |
2125 | struct leaf *l = (struct leaf *)n; | 2126 | struct leaf *l = (struct leaf *)n; |
2126 | struct leaf_info *li; | 2127 | struct leaf_info *li; |
@@ -2209,36 +2210,48 @@ static void trie_show_usage(struct seq_file *seq, | |||
2209 | } | 2210 | } |
2210 | #endif /* CONFIG_IP_FIB_TRIE_STATS */ | 2211 | #endif /* CONFIG_IP_FIB_TRIE_STATS */ |
2211 | 2212 | ||
2212 | static void fib_trie_show(struct seq_file *seq, const char *name, | 2213 | static void fib_table_print(struct seq_file *seq, struct fib_table *tb) |
2213 | struct trie *trie) | ||
2214 | { | 2214 | { |
2215 | struct trie_stat stat; | 2215 | if (tb->tb_id == RT_TABLE_LOCAL) |
2216 | 2216 | seq_puts(seq, "Local:\n"); | |
2217 | trie_collect_stats(trie, &stat); | 2217 | else if (tb->tb_id == RT_TABLE_MAIN) |
2218 | seq_printf(seq, "%s:\n", name); | 2218 | seq_puts(seq, "Main:\n"); |
2219 | trie_show_stats(seq, &stat); | 2219 | else |
2220 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 2220 | seq_printf(seq, "Id %d:\n", tb->tb_id); |
2221 | trie_show_usage(seq, &trie->stats); | ||
2222 | #endif | ||
2223 | } | 2221 | } |
2224 | 2222 | ||
2223 | |||
2225 | static int fib_triestat_seq_show(struct seq_file *seq, void *v) | 2224 | static int fib_triestat_seq_show(struct seq_file *seq, void *v) |
2226 | { | 2225 | { |
2227 | struct net *net = (struct net *)seq->private; | 2226 | struct net *net = (struct net *)seq->private; |
2228 | struct fib_table *tb; | 2227 | unsigned int h; |
2229 | 2228 | ||
2230 | seq_printf(seq, | 2229 | seq_printf(seq, |
2231 | "Basic info: size of leaf:" | 2230 | "Basic info: size of leaf:" |
2232 | " %Zd bytes, size of tnode: %Zd bytes.\n", | 2231 | " %Zd bytes, size of tnode: %Zd bytes.\n", |
2233 | sizeof(struct leaf), sizeof(struct tnode)); | 2232 | sizeof(struct leaf), sizeof(struct tnode)); |
2234 | 2233 | ||
2235 | tb = fib_get_table(net, RT_TABLE_LOCAL); | 2234 | for (h = 0; h < FIB_TABLE_HASHSZ; h++) { |
2236 | if (tb) | 2235 | struct hlist_head *head = &net->ipv4.fib_table_hash[h]; |
2237 | fib_trie_show(seq, "Local", (struct trie *) tb->tb_data); | 2236 | struct hlist_node *node; |
2237 | struct fib_table *tb; | ||
2238 | |||
2239 | hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { | ||
2240 | struct trie *t = (struct trie *) tb->tb_data; | ||
2241 | struct trie_stat stat; | ||
2242 | |||
2243 | if (!t) | ||
2244 | continue; | ||
2238 | 2245 | ||
2239 | tb = fib_get_table(net, RT_TABLE_MAIN); | 2246 | fib_table_print(seq, tb); |
2240 | if (tb) | 2247 | |
2241 | fib_trie_show(seq, "Main", (struct trie *) tb->tb_data); | 2248 | trie_collect_stats(t, &stat); |
2249 | trie_show_stats(seq, &stat); | ||
2250 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
2251 | trie_show_usage(seq, &t->stats); | ||
2252 | #endif | ||
2253 | } | ||
2254 | } | ||
2242 | 2255 | ||
2243 | return 0; | 2256 | return 0; |
2244 | } | 2257 | } |
@@ -2274,67 +2287,79 @@ static const struct file_operations fib_triestat_fops = { | |||
2274 | .release = fib_triestat_seq_release, | 2287 | .release = fib_triestat_seq_release, |
2275 | }; | 2288 | }; |
2276 | 2289 | ||
2277 | static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, | 2290 | static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) |
2278 | loff_t pos) | ||
2279 | { | 2291 | { |
2292 | struct fib_trie_iter *iter = seq->private; | ||
2293 | struct net *net = seq_file_net(seq); | ||
2280 | loff_t idx = 0; | 2294 | loff_t idx = 0; |
2281 | struct node *n; | 2295 | unsigned int h; |
2282 | 2296 | ||
2283 | for (n = fib_trie_get_first(iter, iter->trie_local); | 2297 | for (h = 0; h < FIB_TABLE_HASHSZ; h++) { |
2284 | n; ++idx, n = fib_trie_get_next(iter)) { | 2298 | struct hlist_head *head = &net->ipv4.fib_table_hash[h]; |
2285 | if (pos == idx) | 2299 | struct hlist_node *node; |
2286 | return n; | 2300 | struct fib_table *tb; |
2287 | } | ||
2288 | 2301 | ||
2289 | for (n = fib_trie_get_first(iter, iter->trie_main); | 2302 | hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { |
2290 | n; ++idx, n = fib_trie_get_next(iter)) { | 2303 | struct node *n; |
2291 | if (pos == idx) | 2304 | |
2292 | return n; | 2305 | for (n = fib_trie_get_first(iter, |
2306 | (struct trie *) tb->tb_data); | ||
2307 | n; n = fib_trie_get_next(iter)) | ||
2308 | if (pos == idx++) { | ||
2309 | iter->tb = tb; | ||
2310 | return n; | ||
2311 | } | ||
2312 | } | ||
2293 | } | 2313 | } |
2314 | |||
2294 | return NULL; | 2315 | return NULL; |
2295 | } | 2316 | } |
2296 | 2317 | ||
2297 | static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) | 2318 | static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) |
2298 | __acquires(RCU) | 2319 | __acquires(RCU) |
2299 | { | 2320 | { |
2300 | struct fib_trie_iter *iter = seq->private; | ||
2301 | struct fib_table *tb; | ||
2302 | |||
2303 | if (!iter->trie_local) { | ||
2304 | tb = fib_get_table(iter->p.net, RT_TABLE_LOCAL); | ||
2305 | if (tb) | ||
2306 | iter->trie_local = (struct trie *) tb->tb_data; | ||
2307 | } | ||
2308 | if (!iter->trie_main) { | ||
2309 | tb = fib_get_table(iter->p.net, RT_TABLE_MAIN); | ||
2310 | if (tb) | ||
2311 | iter->trie_main = (struct trie *) tb->tb_data; | ||
2312 | } | ||
2313 | rcu_read_lock(); | 2321 | rcu_read_lock(); |
2314 | if (*pos == 0) | 2322 | return fib_trie_get_idx(seq, *pos); |
2315 | return SEQ_START_TOKEN; | ||
2316 | return fib_trie_get_idx(iter, *pos - 1); | ||
2317 | } | 2323 | } |
2318 | 2324 | ||
2319 | static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2325 | static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2320 | { | 2326 | { |
2321 | struct fib_trie_iter *iter = seq->private; | 2327 | struct fib_trie_iter *iter = seq->private; |
2322 | void *l = v; | 2328 | struct net *net = seq_file_net(seq); |
2329 | struct fib_table *tb = iter->tb; | ||
2330 | struct hlist_node *tb_node; | ||
2331 | unsigned int h; | ||
2332 | struct node *n; | ||
2323 | 2333 | ||
2324 | ++*pos; | 2334 | ++*pos; |
2325 | if (v == SEQ_START_TOKEN) | 2335 | /* next node in same table */ |
2326 | return fib_trie_get_idx(iter, 0); | 2336 | n = fib_trie_get_next(iter); |
2327 | 2337 | if (n) | |
2328 | v = fib_trie_get_next(iter); | 2338 | return n; |
2329 | BUG_ON(v == l); | ||
2330 | if (v) | ||
2331 | return v; | ||
2332 | 2339 | ||
2333 | /* continue scan in next trie */ | 2340 | /* walk rest of this hash chain */ |
2334 | if (iter->trie == iter->trie_local) | 2341 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); |
2335 | return fib_trie_get_first(iter, iter->trie_main); | 2342 | while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { |
2343 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); | ||
2344 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); | ||
2345 | if (n) | ||
2346 | goto found; | ||
2347 | } | ||
2336 | 2348 | ||
2349 | /* new hash chain */ | ||
2350 | while (++h < FIB_TABLE_HASHSZ) { | ||
2351 | struct hlist_head *head = &net->ipv4.fib_table_hash[h]; | ||
2352 | hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) { | ||
2353 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); | ||
2354 | if (n) | ||
2355 | goto found; | ||
2356 | } | ||
2357 | } | ||
2337 | return NULL; | 2358 | return NULL; |
2359 | |||
2360 | found: | ||
2361 | iter->tb = tb; | ||
2362 | return n; | ||
2338 | } | 2363 | } |
2339 | 2364 | ||
2340 | static void fib_trie_seq_stop(struct seq_file *seq, void *v) | 2365 | static void fib_trie_seq_stop(struct seq_file *seq, void *v) |
@@ -2391,22 +2416,15 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2391 | const struct fib_trie_iter *iter = seq->private; | 2416 | const struct fib_trie_iter *iter = seq->private; |
2392 | struct node *n = v; | 2417 | struct node *n = v; |
2393 | 2418 | ||
2394 | if (v == SEQ_START_TOKEN) | 2419 | if (!node_parent_rcu(n)) |
2395 | return 0; | 2420 | fib_table_print(seq, iter->tb); |
2396 | |||
2397 | if (!node_parent_rcu(n)) { | ||
2398 | if (iter->trie == iter->trie_local) | ||
2399 | seq_puts(seq, "<local>:\n"); | ||
2400 | else | ||
2401 | seq_puts(seq, "<main>:\n"); | ||
2402 | } | ||
2403 | 2421 | ||
2404 | if (IS_TNODE(n)) { | 2422 | if (IS_TNODE(n)) { |
2405 | struct tnode *tn = (struct tnode *) n; | 2423 | struct tnode *tn = (struct tnode *) n; |
2406 | __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); | 2424 | __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); |
2407 | 2425 | ||
2408 | seq_indent(seq, iter->depth-1); | 2426 | seq_indent(seq, iter->depth-1); |
2409 | seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", | 2427 | seq_printf(seq, " +-- " NIPQUAD_FMT "/%d %d %d %d\n", |
2410 | NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, | 2428 | NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, |
2411 | tn->empty_children); | 2429 | tn->empty_children); |
2412 | 2430 | ||
@@ -2417,7 +2435,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2417 | __be32 val = htonl(l->key); | 2435 | __be32 val = htonl(l->key); |
2418 | 2436 | ||
2419 | seq_indent(seq, iter->depth); | 2437 | seq_indent(seq, iter->depth); |
2420 | seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); | 2438 | seq_printf(seq, " |-- " NIPQUAD_FMT "\n", NIPQUAD(val)); |
2421 | 2439 | ||
2422 | hlist_for_each_entry_rcu(li, node, &l->list, hlist) { | 2440 | hlist_for_each_entry_rcu(li, node, &l->list, hlist) { |
2423 | struct fib_alias *fa; | 2441 | struct fib_alias *fa; |
@@ -2502,7 +2520,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) | |||
2502 | struct fib_table *tb; | 2520 | struct fib_table *tb; |
2503 | 2521 | ||
2504 | rcu_read_lock(); | 2522 | rcu_read_lock(); |
2505 | tb = fib_get_table(iter->p.net, RT_TABLE_MAIN); | 2523 | tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); |
2506 | if (!tb) | 2524 | if (!tb) |
2507 | return NULL; | 2525 | return NULL; |
2508 | 2526 | ||
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 40508babad8c..f064031f2031 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -93,6 +93,7 @@ | |||
93 | #include <asm/uaccess.h> | 93 | #include <asm/uaccess.h> |
94 | #include <net/checksum.h> | 94 | #include <net/checksum.h> |
95 | #include <net/xfrm.h> | 95 | #include <net/xfrm.h> |
96 | #include <net/inet_common.h> | ||
96 | 97 | ||
97 | /* | 98 | /* |
98 | * Build xmit assembly blocks | 99 | * Build xmit assembly blocks |
@@ -188,29 +189,6 @@ struct icmp_err icmp_err_convert[] = { | |||
188 | }, | 189 | }, |
189 | }; | 190 | }; |
190 | 191 | ||
191 | /* Control parameters for ECHO replies. */ | ||
192 | int sysctl_icmp_echo_ignore_all __read_mostly; | ||
193 | int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1; | ||
194 | |||
195 | /* Control parameter - ignore bogus broadcast responses? */ | ||
196 | int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1; | ||
197 | |||
198 | /* | ||
199 | * Configurable global rate limit. | ||
200 | * | ||
201 | * ratelimit defines tokens/packet consumed for dst->rate_token bucket | ||
202 | * ratemask defines which icmp types are ratelimited by setting | ||
203 | * it's bit position. | ||
204 | * | ||
205 | * default: | ||
206 | * dest unreachable (3), source quench (4), | ||
207 | * time exceeded (11), parameter problem (12) | ||
208 | */ | ||
209 | |||
210 | int sysctl_icmp_ratelimit __read_mostly = 1 * HZ; | ||
211 | int sysctl_icmp_ratemask __read_mostly = 0x1818; | ||
212 | int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly; | ||
213 | |||
214 | /* | 192 | /* |
215 | * ICMP control array. This specifies what to do with each ICMP. | 193 | * ICMP control array. This specifies what to do with each ICMP. |
216 | */ | 194 | */ |
@@ -229,14 +207,16 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; | |||
229 | * | 207 | * |
230 | * On SMP we have one ICMP socket per-cpu. | 208 | * On SMP we have one ICMP socket per-cpu. |
231 | */ | 209 | */ |
232 | static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; | 210 | static struct sock *icmp_sk(struct net *net) |
233 | #define icmp_socket __get_cpu_var(__icmp_socket) | 211 | { |
212 | return net->ipv4.icmp_sk[smp_processor_id()]; | ||
213 | } | ||
234 | 214 | ||
235 | static inline int icmp_xmit_lock(void) | 215 | static inline int icmp_xmit_lock(struct sock *sk) |
236 | { | 216 | { |
237 | local_bh_disable(); | 217 | local_bh_disable(); |
238 | 218 | ||
239 | if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { | 219 | if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { |
240 | /* This can happen if the output path signals a | 220 | /* This can happen if the output path signals a |
241 | * dst_link_failure() for an outgoing ICMP packet. | 221 | * dst_link_failure() for an outgoing ICMP packet. |
242 | */ | 222 | */ |
@@ -246,9 +226,9 @@ static inline int icmp_xmit_lock(void) | |||
246 | return 0; | 226 | return 0; |
247 | } | 227 | } |
248 | 228 | ||
249 | static inline void icmp_xmit_unlock(void) | 229 | static inline void icmp_xmit_unlock(struct sock *sk) |
250 | { | 230 | { |
251 | spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); | 231 | spin_unlock_bh(&sk->sk_lock.slock); |
252 | } | 232 | } |
253 | 233 | ||
254 | /* | 234 | /* |
@@ -291,7 +271,8 @@ int xrlim_allow(struct dst_entry *dst, int timeout) | |||
291 | return rc; | 271 | return rc; |
292 | } | 272 | } |
293 | 273 | ||
294 | static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) | 274 | static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, |
275 | int type, int code) | ||
295 | { | 276 | { |
296 | struct dst_entry *dst = &rt->u.dst; | 277 | struct dst_entry *dst = &rt->u.dst; |
297 | int rc = 1; | 278 | int rc = 1; |
@@ -308,8 +289,8 @@ static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) | |||
308 | goto out; | 289 | goto out; |
309 | 290 | ||
310 | /* Limit if icmp type is enabled in ratemask. */ | 291 | /* Limit if icmp type is enabled in ratemask. */ |
311 | if ((1 << type) & sysctl_icmp_ratemask) | 292 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) |
312 | rc = xrlim_allow(dst, sysctl_icmp_ratelimit); | 293 | rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); |
313 | out: | 294 | out: |
314 | return rc; | 295 | return rc; |
315 | } | 296 | } |
@@ -346,19 +327,21 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, | |||
346 | static void icmp_push_reply(struct icmp_bxm *icmp_param, | 327 | static void icmp_push_reply(struct icmp_bxm *icmp_param, |
347 | struct ipcm_cookie *ipc, struct rtable *rt) | 328 | struct ipcm_cookie *ipc, struct rtable *rt) |
348 | { | 329 | { |
330 | struct sock *sk; | ||
349 | struct sk_buff *skb; | 331 | struct sk_buff *skb; |
350 | 332 | ||
351 | if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, | 333 | sk = icmp_sk(dev_net(rt->u.dst.dev)); |
334 | if (ip_append_data(sk, icmp_glue_bits, icmp_param, | ||
352 | icmp_param->data_len+icmp_param->head_len, | 335 | icmp_param->data_len+icmp_param->head_len, |
353 | icmp_param->head_len, | 336 | icmp_param->head_len, |
354 | ipc, rt, MSG_DONTWAIT) < 0) | 337 | ipc, rt, MSG_DONTWAIT) < 0) |
355 | ip_flush_pending_frames(icmp_socket->sk); | 338 | ip_flush_pending_frames(sk); |
356 | else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { | 339 | else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { |
357 | struct icmphdr *icmph = icmp_hdr(skb); | 340 | struct icmphdr *icmph = icmp_hdr(skb); |
358 | __wsum csum = 0; | 341 | __wsum csum = 0; |
359 | struct sk_buff *skb1; | 342 | struct sk_buff *skb1; |
360 | 343 | ||
361 | skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { | 344 | skb_queue_walk(&sk->sk_write_queue, skb1) { |
362 | csum = csum_add(csum, skb1->csum); | 345 | csum = csum_add(csum, skb1->csum); |
363 | } | 346 | } |
364 | csum = csum_partial_copy_nocheck((void *)&icmp_param->data, | 347 | csum = csum_partial_copy_nocheck((void *)&icmp_param->data, |
@@ -366,7 +349,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |||
366 | icmp_param->head_len, csum); | 349 | icmp_param->head_len, csum); |
367 | icmph->checksum = csum_fold(csum); | 350 | icmph->checksum = csum_fold(csum); |
368 | skb->ip_summed = CHECKSUM_NONE; | 351 | skb->ip_summed = CHECKSUM_NONE; |
369 | ip_push_pending_frames(icmp_socket->sk); | 352 | ip_push_pending_frames(sk); |
370 | } | 353 | } |
371 | } | 354 | } |
372 | 355 | ||
@@ -376,16 +359,17 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |||
376 | 359 | ||
377 | static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | 360 | static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) |
378 | { | 361 | { |
379 | struct sock *sk = icmp_socket->sk; | ||
380 | struct inet_sock *inet = inet_sk(sk); | ||
381 | struct ipcm_cookie ipc; | 362 | struct ipcm_cookie ipc; |
382 | struct rtable *rt = (struct rtable *)skb->dst; | 363 | struct rtable *rt = skb->rtable; |
364 | struct net *net = dev_net(rt->u.dst.dev); | ||
365 | struct sock *sk = icmp_sk(net); | ||
366 | struct inet_sock *inet = inet_sk(sk); | ||
383 | __be32 daddr; | 367 | __be32 daddr; |
384 | 368 | ||
385 | if (ip_options_echo(&icmp_param->replyopts, skb)) | 369 | if (ip_options_echo(&icmp_param->replyopts, skb)) |
386 | return; | 370 | return; |
387 | 371 | ||
388 | if (icmp_xmit_lock()) | 372 | if (icmp_xmit_lock(sk)) |
389 | return; | 373 | return; |
390 | 374 | ||
391 | icmp_param->data.icmph.checksum = 0; | 375 | icmp_param->data.icmph.checksum = 0; |
@@ -405,15 +389,15 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
405 | .tos = RT_TOS(ip_hdr(skb)->tos) } }, | 389 | .tos = RT_TOS(ip_hdr(skb)->tos) } }, |
406 | .proto = IPPROTO_ICMP }; | 390 | .proto = IPPROTO_ICMP }; |
407 | security_skb_classify_flow(skb, &fl); | 391 | security_skb_classify_flow(skb, &fl); |
408 | if (ip_route_output_key(rt->u.dst.dev->nd_net, &rt, &fl)) | 392 | if (ip_route_output_key(net, &rt, &fl)) |
409 | goto out_unlock; | 393 | goto out_unlock; |
410 | } | 394 | } |
411 | if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, | 395 | if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, |
412 | icmp_param->data.icmph.code)) | 396 | icmp_param->data.icmph.code)) |
413 | icmp_push_reply(icmp_param, &ipc, rt); | 397 | icmp_push_reply(icmp_param, &ipc, rt); |
414 | ip_rt_put(rt); | 398 | ip_rt_put(rt); |
415 | out_unlock: | 399 | out_unlock: |
416 | icmp_xmit_unlock(); | 400 | icmp_xmit_unlock(sk); |
417 | } | 401 | } |
418 | 402 | ||
419 | 403 | ||
@@ -433,15 +417,17 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
433 | struct iphdr *iph; | 417 | struct iphdr *iph; |
434 | int room; | 418 | int room; |
435 | struct icmp_bxm icmp_param; | 419 | struct icmp_bxm icmp_param; |
436 | struct rtable *rt = (struct rtable *)skb_in->dst; | 420 | struct rtable *rt = skb_in->rtable; |
437 | struct ipcm_cookie ipc; | 421 | struct ipcm_cookie ipc; |
438 | __be32 saddr; | 422 | __be32 saddr; |
439 | u8 tos; | 423 | u8 tos; |
440 | struct net *net; | 424 | struct net *net; |
425 | struct sock *sk; | ||
441 | 426 | ||
442 | if (!rt) | 427 | if (!rt) |
443 | goto out; | 428 | goto out; |
444 | net = rt->u.dst.dev->nd_net; | 429 | net = dev_net(rt->u.dst.dev); |
430 | sk = icmp_sk(net); | ||
445 | 431 | ||
446 | /* | 432 | /* |
447 | * Find the original header. It is expected to be valid, of course. | 433 | * Find the original header. It is expected to be valid, of course. |
@@ -505,7 +491,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
505 | } | 491 | } |
506 | } | 492 | } |
507 | 493 | ||
508 | if (icmp_xmit_lock()) | 494 | if (icmp_xmit_lock(sk)) |
509 | return; | 495 | return; |
510 | 496 | ||
511 | /* | 497 | /* |
@@ -516,7 +502,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
516 | if (!(rt->rt_flags & RTCF_LOCAL)) { | 502 | if (!(rt->rt_flags & RTCF_LOCAL)) { |
517 | struct net_device *dev = NULL; | 503 | struct net_device *dev = NULL; |
518 | 504 | ||
519 | if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) | 505 | if (rt->fl.iif && |
506 | net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) | ||
520 | dev = dev_get_by_index(net, rt->fl.iif); | 507 | dev = dev_get_by_index(net, rt->fl.iif); |
521 | 508 | ||
522 | if (dev) { | 509 | if (dev) { |
@@ -544,7 +531,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
544 | icmp_param.data.icmph.checksum = 0; | 531 | icmp_param.data.icmph.checksum = 0; |
545 | icmp_param.skb = skb_in; | 532 | icmp_param.skb = skb_in; |
546 | icmp_param.offset = skb_network_offset(skb_in); | 533 | icmp_param.offset = skb_network_offset(skb_in); |
547 | inet_sk(icmp_socket->sk)->tos = tos; | 534 | inet_sk(sk)->tos = tos; |
548 | ipc.addr = iph->saddr; | 535 | ipc.addr = iph->saddr; |
549 | ipc.opt = &icmp_param.replyopts; | 536 | ipc.opt = &icmp_param.replyopts; |
550 | 537 | ||
@@ -609,7 +596,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
609 | RT_TOS(tos), rt2->u.dst.dev); | 596 | RT_TOS(tos), rt2->u.dst.dev); |
610 | 597 | ||
611 | dst_release(&rt2->u.dst); | 598 | dst_release(&rt2->u.dst); |
612 | rt2 = (struct rtable *)skb_in->dst; | 599 | rt2 = skb_in->rtable; |
613 | skb_in->dst = odst; | 600 | skb_in->dst = odst; |
614 | } | 601 | } |
615 | 602 | ||
@@ -634,7 +621,7 @@ relookup_failed: | |||
634 | } | 621 | } |
635 | 622 | ||
636 | route_done: | 623 | route_done: |
637 | if (!icmpv4_xrlim_allow(rt, type, code)) | 624 | if (!icmpv4_xrlim_allow(net, rt, type, code)) |
638 | goto ende; | 625 | goto ende; |
639 | 626 | ||
640 | /* RFC says return as much as we can without exceeding 576 bytes. */ | 627 | /* RFC says return as much as we can without exceeding 576 bytes. */ |
@@ -654,7 +641,7 @@ route_done: | |||
654 | ende: | 641 | ende: |
655 | ip_rt_put(rt); | 642 | ip_rt_put(rt); |
656 | out_unlock: | 643 | out_unlock: |
657 | icmp_xmit_unlock(); | 644 | icmp_xmit_unlock(sk); |
658 | out:; | 645 | out:; |
659 | } | 646 | } |
660 | 647 | ||
@@ -672,7 +659,7 @@ static void icmp_unreach(struct sk_buff *skb) | |||
672 | u32 info = 0; | 659 | u32 info = 0; |
673 | struct net *net; | 660 | struct net *net; |
674 | 661 | ||
675 | net = skb->dst->dev->nd_net; | 662 | net = dev_net(skb->dst->dev); |
676 | 663 | ||
677 | /* | 664 | /* |
678 | * Incomplete header ? | 665 | * Incomplete header ? |
@@ -698,7 +685,7 @@ static void icmp_unreach(struct sk_buff *skb) | |||
698 | break; | 685 | break; |
699 | case ICMP_FRAG_NEEDED: | 686 | case ICMP_FRAG_NEEDED: |
700 | if (ipv4_config.no_pmtu_disc) { | 687 | if (ipv4_config.no_pmtu_disc) { |
701 | LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: " | 688 | LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": " |
702 | "fragmentation needed " | 689 | "fragmentation needed " |
703 | "and DF set.\n", | 690 | "and DF set.\n", |
704 | NIPQUAD(iph->daddr)); | 691 | NIPQUAD(iph->daddr)); |
@@ -710,7 +697,7 @@ static void icmp_unreach(struct sk_buff *skb) | |||
710 | } | 697 | } |
711 | break; | 698 | break; |
712 | case ICMP_SR_FAILED: | 699 | case ICMP_SR_FAILED: |
713 | LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: Source " | 700 | LIMIT_NETDEBUG(KERN_INFO "ICMP: " NIPQUAD_FMT ": Source " |
714 | "Route Failed.\n", | 701 | "Route Failed.\n", |
715 | NIPQUAD(iph->daddr)); | 702 | NIPQUAD(iph->daddr)); |
716 | break; | 703 | break; |
@@ -740,12 +727,12 @@ static void icmp_unreach(struct sk_buff *skb) | |||
740 | * get the other vendor to fix their kit. | 727 | * get the other vendor to fix their kit. |
741 | */ | 728 | */ |
742 | 729 | ||
743 | if (!sysctl_icmp_ignore_bogus_error_responses && | 730 | if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && |
744 | inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { | 731 | inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { |
745 | if (net_ratelimit()) | 732 | if (net_ratelimit()) |
746 | printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " | 733 | printk(KERN_WARNING NIPQUAD_FMT " sent an invalid ICMP " |
747 | "type %u, code %u " | 734 | "type %u, code %u " |
748 | "error to a broadcast: %u.%u.%u.%u on %s\n", | 735 | "error to a broadcast: " NIPQUAD_FMT " on %s\n", |
749 | NIPQUAD(ip_hdr(skb)->saddr), | 736 | NIPQUAD(ip_hdr(skb)->saddr), |
750 | icmph->type, icmph->code, | 737 | icmph->type, icmph->code, |
751 | NIPQUAD(iph->daddr), | 738 | NIPQUAD(iph->daddr), |
@@ -835,7 +822,10 @@ out_err: | |||
835 | 822 | ||
836 | static void icmp_echo(struct sk_buff *skb) | 823 | static void icmp_echo(struct sk_buff *skb) |
837 | { | 824 | { |
838 | if (!sysctl_icmp_echo_ignore_all) { | 825 | struct net *net; |
826 | |||
827 | net = dev_net(skb->dst->dev); | ||
828 | if (!net->ipv4.sysctl_icmp_echo_ignore_all) { | ||
839 | struct icmp_bxm icmp_param; | 829 | struct icmp_bxm icmp_param; |
840 | 830 | ||
841 | icmp_param.data.icmph = *icmp_hdr(skb); | 831 | icmp_param.data.icmph = *icmp_hdr(skb); |
@@ -938,7 +928,7 @@ static void icmp_address(struct sk_buff *skb) | |||
938 | 928 | ||
939 | static void icmp_address_reply(struct sk_buff *skb) | 929 | static void icmp_address_reply(struct sk_buff *skb) |
940 | { | 930 | { |
941 | struct rtable *rt = (struct rtable *)skb->dst; | 931 | struct rtable *rt = skb->rtable; |
942 | struct net_device *dev = skb->dev; | 932 | struct net_device *dev = skb->dev; |
943 | struct in_device *in_dev; | 933 | struct in_device *in_dev; |
944 | struct in_ifaddr *ifa; | 934 | struct in_ifaddr *ifa; |
@@ -963,8 +953,8 @@ static void icmp_address_reply(struct sk_buff *skb) | |||
963 | break; | 953 | break; |
964 | } | 954 | } |
965 | if (!ifa && net_ratelimit()) { | 955 | if (!ifa && net_ratelimit()) { |
966 | printk(KERN_INFO "Wrong address mask %u.%u.%u.%u from " | 956 | printk(KERN_INFO "Wrong address mask " NIPQUAD_FMT " from " |
967 | "%s/%u.%u.%u.%u\n", | 957 | "%s/" NIPQUAD_FMT "\n", |
968 | NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src)); | 958 | NIPQUAD(*mp), dev->name, NIPQUAD(rt->rt_src)); |
969 | } | 959 | } |
970 | } | 960 | } |
@@ -983,7 +973,7 @@ static void icmp_discard(struct sk_buff *skb) | |||
983 | int icmp_rcv(struct sk_buff *skb) | 973 | int icmp_rcv(struct sk_buff *skb) |
984 | { | 974 | { |
985 | struct icmphdr *icmph; | 975 | struct icmphdr *icmph; |
986 | struct rtable *rt = (struct rtable *)skb->dst; | 976 | struct rtable *rt = skb->rtable; |
987 | 977 | ||
988 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 978 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
989 | int nh; | 979 | int nh; |
@@ -1038,6 +1028,9 @@ int icmp_rcv(struct sk_buff *skb) | |||
1038 | */ | 1028 | */ |
1039 | 1029 | ||
1040 | if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 1030 | if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
1031 | struct net *net; | ||
1032 | |||
1033 | net = dev_net(rt->u.dst.dev); | ||
1041 | /* | 1034 | /* |
1042 | * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be | 1035 | * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be |
1043 | * silently ignored (we let user decide with a sysctl). | 1036 | * silently ignored (we let user decide with a sysctl). |
@@ -1046,7 +1039,7 @@ int icmp_rcv(struct sk_buff *skb) | |||
1046 | */ | 1039 | */ |
1047 | if ((icmph->type == ICMP_ECHO || | 1040 | if ((icmph->type == ICMP_ECHO || |
1048 | icmph->type == ICMP_TIMESTAMP) && | 1041 | icmph->type == ICMP_TIMESTAMP) && |
1049 | sysctl_icmp_echo_ignore_broadcasts) { | 1042 | net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { |
1050 | goto error; | 1043 | goto error; |
1051 | } | 1044 | } |
1052 | if (icmph->type != ICMP_ECHO && | 1045 | if (icmph->type != ICMP_ECHO && |
@@ -1141,38 +1134,84 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { | |||
1141 | }, | 1134 | }, |
1142 | }; | 1135 | }; |
1143 | 1136 | ||
1144 | void __init icmp_init(struct net_proto_family *ops) | 1137 | static void __net_exit icmp_sk_exit(struct net *net) |
1145 | { | 1138 | { |
1146 | struct inet_sock *inet; | ||
1147 | int i; | 1139 | int i; |
1148 | 1140 | ||
1149 | for_each_possible_cpu(i) { | 1141 | for_each_possible_cpu(i) |
1150 | int err; | 1142 | inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); |
1143 | kfree(net->ipv4.icmp_sk); | ||
1144 | net->ipv4.icmp_sk = NULL; | ||
1145 | } | ||
1146 | |||
1147 | int __net_init icmp_sk_init(struct net *net) | ||
1148 | { | ||
1149 | int i, err; | ||
1151 | 1150 | ||
1152 | err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, | 1151 | net->ipv4.icmp_sk = |
1153 | &per_cpu(__icmp_socket, i)); | 1152 | kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); |
1153 | if (net->ipv4.icmp_sk == NULL) | ||
1154 | return -ENOMEM; | ||
1154 | 1155 | ||
1156 | for_each_possible_cpu(i) { | ||
1157 | struct sock *sk; | ||
1158 | |||
1159 | err = inet_ctl_sock_create(&sk, PF_INET, | ||
1160 | SOCK_RAW, IPPROTO_ICMP, net); | ||
1155 | if (err < 0) | 1161 | if (err < 0) |
1156 | panic("Failed to create the ICMP control socket.\n"); | 1162 | goto fail; |
1157 | 1163 | ||
1158 | per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; | 1164 | net->ipv4.icmp_sk[i] = sk; |
1159 | 1165 | ||
1160 | /* Enough space for 2 64K ICMP packets, including | 1166 | /* Enough space for 2 64K ICMP packets, including |
1161 | * sk_buff struct overhead. | 1167 | * sk_buff struct overhead. |
1162 | */ | 1168 | */ |
1163 | per_cpu(__icmp_socket, i)->sk->sk_sndbuf = | 1169 | sk->sk_sndbuf = |
1164 | (2 * ((64 * 1024) + sizeof(struct sk_buff))); | 1170 | (2 * ((64 * 1024) + sizeof(struct sk_buff))); |
1165 | 1171 | ||
1166 | inet = inet_sk(per_cpu(__icmp_socket, i)->sk); | 1172 | inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT; |
1167 | inet->uc_ttl = -1; | ||
1168 | inet->pmtudisc = IP_PMTUDISC_DONT; | ||
1169 | |||
1170 | /* Unhash it so that IP input processing does not even | ||
1171 | * see it, we do not wish this socket to see incoming | ||
1172 | * packets. | ||
1173 | */ | ||
1174 | per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk); | ||
1175 | } | 1173 | } |
1174 | |||
1175 | /* Control parameters for ECHO replies. */ | ||
1176 | net->ipv4.sysctl_icmp_echo_ignore_all = 0; | ||
1177 | net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1; | ||
1178 | |||
1179 | /* Control parameter - ignore bogus broadcast responses? */ | ||
1180 | net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1; | ||
1181 | |||
1182 | /* | ||
1183 | * Configurable global rate limit. | ||
1184 | * | ||
1185 | * ratelimit defines tokens/packet consumed for dst->rate_token | ||
1186 | * bucket ratemask defines which icmp types are ratelimited by | ||
1187 | * setting it's bit position. | ||
1188 | * | ||
1189 | * default: | ||
1190 | * dest unreachable (3), source quench (4), | ||
1191 | * time exceeded (11), parameter problem (12) | ||
1192 | */ | ||
1193 | |||
1194 | net->ipv4.sysctl_icmp_ratelimit = 1 * HZ; | ||
1195 | net->ipv4.sysctl_icmp_ratemask = 0x1818; | ||
1196 | net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0; | ||
1197 | |||
1198 | return 0; | ||
1199 | |||
1200 | fail: | ||
1201 | for_each_possible_cpu(i) | ||
1202 | inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); | ||
1203 | kfree(net->ipv4.icmp_sk); | ||
1204 | return err; | ||
1205 | } | ||
1206 | |||
1207 | static struct pernet_operations __net_initdata icmp_sk_ops = { | ||
1208 | .init = icmp_sk_init, | ||
1209 | .exit = icmp_sk_exit, | ||
1210 | }; | ||
1211 | |||
1212 | int __init icmp_init(void) | ||
1213 | { | ||
1214 | return register_pernet_device(&icmp_sk_ops); | ||
1176 | } | 1215 | } |
1177 | 1216 | ||
1178 | EXPORT_SYMBOL(icmp_err_convert); | 1217 | EXPORT_SYMBOL(icmp_err_convert); |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 732cd07e6071..6250f4239b61 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -130,12 +130,12 @@ | |||
130 | */ | 130 | */ |
131 | 131 | ||
132 | #define IGMP_V1_SEEN(in_dev) \ | 132 | #define IGMP_V1_SEEN(in_dev) \ |
133 | (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \ | 133 | (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 1 || \ |
134 | IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ | 134 | IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ |
135 | ((in_dev)->mr_v1_seen && \ | 135 | ((in_dev)->mr_v1_seen && \ |
136 | time_before(jiffies, (in_dev)->mr_v1_seen))) | 136 | time_before(jiffies, (in_dev)->mr_v1_seen))) |
137 | #define IGMP_V2_SEEN(in_dev) \ | 137 | #define IGMP_V2_SEEN(in_dev) \ |
138 | (IPV4_DEVCONF_ALL(in_dev->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \ | 138 | (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), FORCE_IGMP_VERSION) == 2 || \ |
139 | IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ | 139 | IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ |
140 | ((in_dev)->mr_v2_seen && \ | 140 | ((in_dev)->mr_v2_seen && \ |
141 | time_before(jiffies, (in_dev)->mr_v2_seen))) | 141 | time_before(jiffies, (in_dev)->mr_v2_seen))) |
@@ -948,7 +948,7 @@ int igmp_rcv(struct sk_buff *skb) | |||
948 | case IGMPV2_HOST_MEMBERSHIP_REPORT: | 948 | case IGMPV2_HOST_MEMBERSHIP_REPORT: |
949 | case IGMPV3_HOST_MEMBERSHIP_REPORT: | 949 | case IGMPV3_HOST_MEMBERSHIP_REPORT: |
950 | /* Is it our report looped back? */ | 950 | /* Is it our report looped back? */ |
951 | if (((struct rtable*)skb->dst)->fl.iif == 0) | 951 | if (skb->rtable->fl.iif == 0) |
952 | break; | 952 | break; |
953 | /* don't rely on MC router hearing unicast reports */ | 953 | /* don't rely on MC router hearing unicast reports */ |
954 | if (skb->pkt_type == PACKET_MULTICAST || | 954 | if (skb->pkt_type == PACKET_MULTICAST || |
@@ -1198,6 +1198,9 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1198 | 1198 | ||
1199 | ASSERT_RTNL(); | 1199 | ASSERT_RTNL(); |
1200 | 1200 | ||
1201 | if (dev_net(in_dev->dev) != &init_net) | ||
1202 | return; | ||
1203 | |||
1201 | for (im=in_dev->mc_list; im; im=im->next) { | 1204 | for (im=in_dev->mc_list; im; im=im->next) { |
1202 | if (im->multiaddr == addr) { | 1205 | if (im->multiaddr == addr) { |
1203 | im->users++; | 1206 | im->users++; |
@@ -1277,6 +1280,9 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) | |||
1277 | 1280 | ||
1278 | ASSERT_RTNL(); | 1281 | ASSERT_RTNL(); |
1279 | 1282 | ||
1283 | if (dev_net(in_dev->dev) != &init_net) | ||
1284 | return; | ||
1285 | |||
1280 | for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { | 1286 | for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { |
1281 | if (i->multiaddr==addr) { | 1287 | if (i->multiaddr==addr) { |
1282 | if (--i->users == 0) { | 1288 | if (--i->users == 0) { |
@@ -1304,6 +1310,9 @@ void ip_mc_down(struct in_device *in_dev) | |||
1304 | 1310 | ||
1305 | ASSERT_RTNL(); | 1311 | ASSERT_RTNL(); |
1306 | 1312 | ||
1313 | if (dev_net(in_dev->dev) != &init_net) | ||
1314 | return; | ||
1315 | |||
1307 | for (i=in_dev->mc_list; i; i=i->next) | 1316 | for (i=in_dev->mc_list; i; i=i->next) |
1308 | igmp_group_dropped(i); | 1317 | igmp_group_dropped(i); |
1309 | 1318 | ||
@@ -1324,6 +1333,9 @@ void ip_mc_init_dev(struct in_device *in_dev) | |||
1324 | { | 1333 | { |
1325 | ASSERT_RTNL(); | 1334 | ASSERT_RTNL(); |
1326 | 1335 | ||
1336 | if (dev_net(in_dev->dev) != &init_net) | ||
1337 | return; | ||
1338 | |||
1327 | in_dev->mc_tomb = NULL; | 1339 | in_dev->mc_tomb = NULL; |
1328 | #ifdef CONFIG_IP_MULTICAST | 1340 | #ifdef CONFIG_IP_MULTICAST |
1329 | in_dev->mr_gq_running = 0; | 1341 | in_dev->mr_gq_running = 0; |
@@ -1347,6 +1359,9 @@ void ip_mc_up(struct in_device *in_dev) | |||
1347 | 1359 | ||
1348 | ASSERT_RTNL(); | 1360 | ASSERT_RTNL(); |
1349 | 1361 | ||
1362 | if (dev_net(in_dev->dev) != &init_net) | ||
1363 | return; | ||
1364 | |||
1350 | ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); | 1365 | ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); |
1351 | 1366 | ||
1352 | for (i=in_dev->mc_list; i; i=i->next) | 1367 | for (i=in_dev->mc_list; i; i=i->next) |
@@ -1363,6 +1378,9 @@ void ip_mc_destroy_dev(struct in_device *in_dev) | |||
1363 | 1378 | ||
1364 | ASSERT_RTNL(); | 1379 | ASSERT_RTNL(); |
1365 | 1380 | ||
1381 | if (dev_net(in_dev->dev) != &init_net) | ||
1382 | return; | ||
1383 | |||
1366 | /* Deactivate timers */ | 1384 | /* Deactivate timers */ |
1367 | ip_mc_down(in_dev); | 1385 | ip_mc_down(in_dev); |
1368 | 1386 | ||
@@ -1744,6 +1762,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) | |||
1744 | if (!ipv4_is_multicast(addr)) | 1762 | if (!ipv4_is_multicast(addr)) |
1745 | return -EINVAL; | 1763 | return -EINVAL; |
1746 | 1764 | ||
1765 | if (sock_net(sk) != &init_net) | ||
1766 | return -EPROTONOSUPPORT; | ||
1767 | |||
1747 | rtnl_lock(); | 1768 | rtnl_lock(); |
1748 | 1769 | ||
1749 | in_dev = ip_mc_find_dev(imr); | 1770 | in_dev = ip_mc_find_dev(imr); |
@@ -1812,6 +1833,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) | |||
1812 | u32 ifindex; | 1833 | u32 ifindex; |
1813 | int ret = -EADDRNOTAVAIL; | 1834 | int ret = -EADDRNOTAVAIL; |
1814 | 1835 | ||
1836 | if (sock_net(sk) != &init_net) | ||
1837 | return -EPROTONOSUPPORT; | ||
1838 | |||
1815 | rtnl_lock(); | 1839 | rtnl_lock(); |
1816 | in_dev = ip_mc_find_dev(imr); | 1840 | in_dev = ip_mc_find_dev(imr); |
1817 | ifindex = imr->imr_ifindex; | 1841 | ifindex = imr->imr_ifindex; |
@@ -1857,6 +1881,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct | |||
1857 | if (!ipv4_is_multicast(addr)) | 1881 | if (!ipv4_is_multicast(addr)) |
1858 | return -EINVAL; | 1882 | return -EINVAL; |
1859 | 1883 | ||
1884 | if (sock_net(sk) != &init_net) | ||
1885 | return -EPROTONOSUPPORT; | ||
1886 | |||
1860 | rtnl_lock(); | 1887 | rtnl_lock(); |
1861 | 1888 | ||
1862 | imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; | 1889 | imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; |
@@ -1990,6 +2017,9 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) | |||
1990 | msf->imsf_fmode != MCAST_EXCLUDE) | 2017 | msf->imsf_fmode != MCAST_EXCLUDE) |
1991 | return -EINVAL; | 2018 | return -EINVAL; |
1992 | 2019 | ||
2020 | if (sock_net(sk) != &init_net) | ||
2021 | return -EPROTONOSUPPORT; | ||
2022 | |||
1993 | rtnl_lock(); | 2023 | rtnl_lock(); |
1994 | 2024 | ||
1995 | imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; | 2025 | imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; |
@@ -2070,6 +2100,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, | |||
2070 | if (!ipv4_is_multicast(addr)) | 2100 | if (!ipv4_is_multicast(addr)) |
2071 | return -EINVAL; | 2101 | return -EINVAL; |
2072 | 2102 | ||
2103 | if (sock_net(sk) != &init_net) | ||
2104 | return -EPROTONOSUPPORT; | ||
2105 | |||
2073 | rtnl_lock(); | 2106 | rtnl_lock(); |
2074 | 2107 | ||
2075 | imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; | 2108 | imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; |
@@ -2132,6 +2165,9 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, | |||
2132 | if (!ipv4_is_multicast(addr)) | 2165 | if (!ipv4_is_multicast(addr)) |
2133 | return -EINVAL; | 2166 | return -EINVAL; |
2134 | 2167 | ||
2168 | if (sock_net(sk) != &init_net) | ||
2169 | return -EPROTONOSUPPORT; | ||
2170 | |||
2135 | rtnl_lock(); | 2171 | rtnl_lock(); |
2136 | 2172 | ||
2137 | err = -EADDRNOTAVAIL; | 2173 | err = -EADDRNOTAVAIL; |
@@ -2216,6 +2252,9 @@ void ip_mc_drop_socket(struct sock *sk) | |||
2216 | if (inet->mc_list == NULL) | 2252 | if (inet->mc_list == NULL) |
2217 | return; | 2253 | return; |
2218 | 2254 | ||
2255 | if (sock_net(sk) != &init_net) | ||
2256 | return; | ||
2257 | |||
2219 | rtnl_lock(); | 2258 | rtnl_lock(); |
2220 | while ((iml = inet->mc_list) != NULL) { | 2259 | while ((iml = inet->mc_list) != NULL) { |
2221 | struct in_device *in_dev; | 2260 | struct in_device *in_dev; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b189278c7bc1..828ea211ff21 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -55,6 +55,13 @@ int inet_csk_bind_conflict(const struct sock *sk, | |||
55 | struct hlist_node *node; | 55 | struct hlist_node *node; |
56 | int reuse = sk->sk_reuse; | 56 | int reuse = sk->sk_reuse; |
57 | 57 | ||
58 | /* | ||
59 | * Unlike other sk lookup places we do not check | ||
60 | * for sk_net here, since _all_ the socks listed | ||
61 | * in tb->owners list belong to the same net - the | ||
62 | * one this bucket belongs to. | ||
63 | */ | ||
64 | |||
58 | sk_for_each_bound(sk2, node, &tb->owners) { | 65 | sk_for_each_bound(sk2, node, &tb->owners) { |
59 | if (sk != sk2 && | 66 | if (sk != sk2 && |
60 | !inet_v6_ipv6only(sk2) && | 67 | !inet_v6_ipv6only(sk2) && |
@@ -80,12 +87,12 @@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); | |||
80 | */ | 87 | */ |
81 | int inet_csk_get_port(struct sock *sk, unsigned short snum) | 88 | int inet_csk_get_port(struct sock *sk, unsigned short snum) |
82 | { | 89 | { |
83 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | 90 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
84 | struct inet_bind_hashbucket *head; | 91 | struct inet_bind_hashbucket *head; |
85 | struct hlist_node *node; | 92 | struct hlist_node *node; |
86 | struct inet_bind_bucket *tb; | 93 | struct inet_bind_bucket *tb; |
87 | int ret; | 94 | int ret; |
88 | struct net *net = sk->sk_net; | 95 | struct net *net = sock_net(sk); |
89 | 96 | ||
90 | local_bh_disable(); | 97 | local_bh_disable(); |
91 | if (!snum) { | 98 | if (!snum) { |
@@ -133,8 +140,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) | |||
133 | goto tb_not_found; | 140 | goto tb_not_found; |
134 | tb_found: | 141 | tb_found: |
135 | if (!hlist_empty(&tb->owners)) { | 142 | if (!hlist_empty(&tb->owners)) { |
136 | if (sk->sk_reuse > 1) | ||
137 | goto success; | ||
138 | if (tb->fastreuse > 0 && | 143 | if (tb->fastreuse > 0 && |
139 | sk->sk_reuse && sk->sk_state != TCP_LISTEN) { | 144 | sk->sk_reuse && sk->sk_state != TCP_LISTEN) { |
140 | goto success; | 145 | goto success; |
@@ -333,7 +338,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, | |||
333 | .dport = ireq->rmt_port } } }; | 338 | .dport = ireq->rmt_port } } }; |
334 | 339 | ||
335 | security_req_classify_flow(req, &fl); | 340 | security_req_classify_flow(req, &fl); |
336 | if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) { | 341 | if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) { |
337 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | 342 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); |
338 | return NULL; | 343 | return NULL; |
339 | } | 344 | } |
@@ -414,8 +419,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
414 | struct inet_connection_sock *icsk = inet_csk(parent); | 419 | struct inet_connection_sock *icsk = inet_csk(parent); |
415 | struct request_sock_queue *queue = &icsk->icsk_accept_queue; | 420 | struct request_sock_queue *queue = &icsk->icsk_accept_queue; |
416 | struct listen_sock *lopt = queue->listen_opt; | 421 | struct listen_sock *lopt = queue->listen_opt; |
417 | int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; | 422 | int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; |
418 | int thresh = max_retries; | ||
419 | unsigned long now = jiffies; | 423 | unsigned long now = jiffies; |
420 | struct request_sock **reqp, *req; | 424 | struct request_sock **reqp, *req; |
421 | int i, budget; | 425 | int i, budget; |
@@ -451,9 +455,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
451 | } | 455 | } |
452 | } | 456 | } |
453 | 457 | ||
454 | if (queue->rskq_defer_accept) | ||
455 | max_retries = queue->rskq_defer_accept; | ||
456 | |||
457 | budget = 2 * (lopt->nr_table_entries / (timeout / interval)); | 458 | budget = 2 * (lopt->nr_table_entries / (timeout / interval)); |
458 | i = lopt->clock_hand; | 459 | i = lopt->clock_hand; |
459 | 460 | ||
@@ -461,9 +462,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
461 | reqp=&lopt->syn_table[i]; | 462 | reqp=&lopt->syn_table[i]; |
462 | while ((req = *reqp) != NULL) { | 463 | while ((req = *reqp) != NULL) { |
463 | if (time_after_eq(now, req->expires)) { | 464 | if (time_after_eq(now, req->expires)) { |
464 | if ((req->retrans < thresh || | 465 | if (req->retrans < thresh && |
465 | (inet_rsk(req)->acked && req->retrans < max_retries)) | 466 | !req->rsk_ops->rtx_syn_ack(parent, req)) { |
466 | && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { | ||
467 | unsigned long timeo; | 467 | unsigned long timeo; |
468 | 468 | ||
469 | if (req->retrans++ == 0) | 469 | if (req->retrans++ == 0) |
@@ -656,25 +656,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) | |||
656 | 656 | ||
657 | EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); | 657 | EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); |
658 | 658 | ||
659 | int inet_csk_ctl_sock_create(struct socket **sock, unsigned short family, | ||
660 | unsigned short type, unsigned char protocol) | ||
661 | { | ||
662 | int rc = sock_create_kern(family, type, protocol, sock); | ||
663 | |||
664 | if (rc == 0) { | ||
665 | (*sock)->sk->sk_allocation = GFP_ATOMIC; | ||
666 | inet_sk((*sock)->sk)->uc_ttl = -1; | ||
667 | /* | ||
668 | * Unhash it so that IP input processing does not even see it, | ||
669 | * we do not wish this socket to see incoming packets. | ||
670 | */ | ||
671 | (*sock)->sk->sk_prot->unhash((*sock)->sk); | ||
672 | } | ||
673 | return rc; | ||
674 | } | ||
675 | |||
676 | EXPORT_SYMBOL_GPL(inet_csk_ctl_sock_create); | ||
677 | |||
678 | #ifdef CONFIG_COMPAT | 659 | #ifdef CONFIG_COMPAT |
679 | int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, | 660 | int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, |
680 | char __user *optval, int __user *optlen) | 661 | char __user *optval, int __user *optlen) |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index a0a3c78cb5e0..4ed429bd5951 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -107,10 +107,10 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) | |||
107 | if (del_timer(&fq->timer)) | 107 | if (del_timer(&fq->timer)) |
108 | atomic_dec(&fq->refcnt); | 108 | atomic_dec(&fq->refcnt); |
109 | 109 | ||
110 | if (!(fq->last_in & COMPLETE)) { | 110 | if (!(fq->last_in & INET_FRAG_COMPLETE)) { |
111 | fq_unlink(fq, f); | 111 | fq_unlink(fq, f); |
112 | atomic_dec(&fq->refcnt); | 112 | atomic_dec(&fq->refcnt); |
113 | fq->last_in |= COMPLETE; | 113 | fq->last_in |= INET_FRAG_COMPLETE; |
114 | } | 114 | } |
115 | } | 115 | } |
116 | 116 | ||
@@ -134,7 +134,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, | |||
134 | struct sk_buff *fp; | 134 | struct sk_buff *fp; |
135 | struct netns_frags *nf; | 135 | struct netns_frags *nf; |
136 | 136 | ||
137 | BUG_TRAP(q->last_in & COMPLETE); | 137 | BUG_TRAP(q->last_in & INET_FRAG_COMPLETE); |
138 | BUG_TRAP(del_timer(&q->timer) == 0); | 138 | BUG_TRAP(del_timer(&q->timer) == 0); |
139 | 139 | ||
140 | /* Release all fragment data. */ | 140 | /* Release all fragment data. */ |
@@ -177,7 +177,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f) | |||
177 | read_unlock(&f->lock); | 177 | read_unlock(&f->lock); |
178 | 178 | ||
179 | spin_lock(&q->lock); | 179 | spin_lock(&q->lock); |
180 | if (!(q->last_in & COMPLETE)) | 180 | if (!(q->last_in & INET_FRAG_COMPLETE)) |
181 | inet_frag_kill(q, f); | 181 | inet_frag_kill(q, f); |
182 | spin_unlock(&q->lock); | 182 | spin_unlock(&q->lock); |
183 | 183 | ||
@@ -209,7 +209,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, | |||
209 | if (qp->net == nf && f->match(qp, arg)) { | 209 | if (qp->net == nf && f->match(qp, arg)) { |
210 | atomic_inc(&qp->refcnt); | 210 | atomic_inc(&qp->refcnt); |
211 | write_unlock(&f->lock); | 211 | write_unlock(&f->lock); |
212 | qp_in->last_in |= COMPLETE; | 212 | qp_in->last_in |= INET_FRAG_COMPLETE; |
213 | inet_frag_put(qp_in, f); | 213 | inet_frag_put(qp_in, f); |
214 | return qp; | 214 | return qp; |
215 | } | 215 | } |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 1aba606f6bbb..2023d37b2708 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -35,7 +35,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, | |||
35 | struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); | 35 | struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); |
36 | 36 | ||
37 | if (tb != NULL) { | 37 | if (tb != NULL) { |
38 | tb->ib_net = net; | 38 | tb->ib_net = hold_net(net); |
39 | tb->port = snum; | 39 | tb->port = snum; |
40 | tb->fastreuse = 0; | 40 | tb->fastreuse = 0; |
41 | INIT_HLIST_HEAD(&tb->owners); | 41 | INIT_HLIST_HEAD(&tb->owners); |
@@ -51,6 +51,7 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket | |||
51 | { | 51 | { |
52 | if (hlist_empty(&tb->owners)) { | 52 | if (hlist_empty(&tb->owners)) { |
53 | __hlist_del(&tb->node); | 53 | __hlist_del(&tb->node); |
54 | release_net(tb->ib_net); | ||
54 | kmem_cache_free(cachep, tb); | 55 | kmem_cache_free(cachep, tb); |
55 | } | 56 | } |
56 | } | 57 | } |
@@ -68,7 +69,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | |||
68 | */ | 69 | */ |
69 | static void __inet_put_port(struct sock *sk) | 70 | static void __inet_put_port(struct sock *sk) |
70 | { | 71 | { |
71 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | 72 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
72 | const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); | 73 | const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); |
73 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; | 74 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; |
74 | struct inet_bind_bucket *tb; | 75 | struct inet_bind_bucket *tb; |
@@ -91,6 +92,22 @@ void inet_put_port(struct sock *sk) | |||
91 | 92 | ||
92 | EXPORT_SYMBOL(inet_put_port); | 93 | EXPORT_SYMBOL(inet_put_port); |
93 | 94 | ||
95 | void __inet_inherit_port(struct sock *sk, struct sock *child) | ||
96 | { | ||
97 | struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; | ||
98 | const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); | ||
99 | struct inet_bind_hashbucket *head = &table->bhash[bhash]; | ||
100 | struct inet_bind_bucket *tb; | ||
101 | |||
102 | spin_lock(&head->lock); | ||
103 | tb = inet_csk(sk)->icsk_bind_hash; | ||
104 | sk_add_bind_node(child, &tb->owners); | ||
105 | inet_csk(child)->icsk_bind_hash = tb; | ||
106 | spin_unlock(&head->lock); | ||
107 | } | ||
108 | |||
109 | EXPORT_SYMBOL_GPL(__inet_inherit_port); | ||
110 | |||
94 | /* | 111 | /* |
95 | * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. | 112 | * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. |
96 | * Look, when several writers sleep and reader wakes them up, all but one | 113 | * Look, when several writers sleep and reader wakes them up, all but one |
@@ -139,7 +156,7 @@ static struct sock *inet_lookup_listener_slow(struct net *net, | |||
139 | sk_for_each(sk, node, head) { | 156 | sk_for_each(sk, node, head) { |
140 | const struct inet_sock *inet = inet_sk(sk); | 157 | const struct inet_sock *inet = inet_sk(sk); |
141 | 158 | ||
142 | if (sk->sk_net == net && inet->num == hnum && | 159 | if (net_eq(sock_net(sk), net) && inet->num == hnum && |
143 | !ipv6_only_sock(sk)) { | 160 | !ipv6_only_sock(sk)) { |
144 | const __be32 rcv_saddr = inet->rcv_saddr; | 161 | const __be32 rcv_saddr = inet->rcv_saddr; |
145 | int score = sk->sk_family == PF_INET ? 1 : 0; | 162 | int score = sk->sk_family == PF_INET ? 1 : 0; |
@@ -182,7 +199,7 @@ struct sock *__inet_lookup_listener(struct net *net, | |||
182 | if (inet->num == hnum && !sk->sk_node.next && | 199 | if (inet->num == hnum && !sk->sk_node.next && |
183 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | 200 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && |
184 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | 201 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && |
185 | !sk->sk_bound_dev_if && sk->sk_net == net) | 202 | !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) |
186 | goto sherry_cache; | 203 | goto sherry_cache; |
187 | sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); | 204 | sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); |
188 | } | 205 | } |
@@ -254,7 +271,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
254 | struct sock *sk2; | 271 | struct sock *sk2; |
255 | const struct hlist_node *node; | 272 | const struct hlist_node *node; |
256 | struct inet_timewait_sock *tw; | 273 | struct inet_timewait_sock *tw; |
257 | struct net *net = sk->sk_net; | 274 | struct net *net = sock_net(sk); |
258 | 275 | ||
259 | prefetch(head->chain.first); | 276 | prefetch(head->chain.first); |
260 | write_lock(lock); | 277 | write_lock(lock); |
@@ -288,7 +305,7 @@ unique: | |||
288 | sk->sk_hash = hash; | 305 | sk->sk_hash = hash; |
289 | BUG_TRAP(sk_unhashed(sk)); | 306 | BUG_TRAP(sk_unhashed(sk)); |
290 | __sk_add_node(sk, &head->chain); | 307 | __sk_add_node(sk, &head->chain); |
291 | sock_prot_inuse_add(sk->sk_prot, 1); | 308 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
292 | write_unlock(lock); | 309 | write_unlock(lock); |
293 | 310 | ||
294 | if (twp) { | 311 | if (twp) { |
@@ -318,7 +335,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) | |||
318 | 335 | ||
319 | void __inet_hash_nolisten(struct sock *sk) | 336 | void __inet_hash_nolisten(struct sock *sk) |
320 | { | 337 | { |
321 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | 338 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
322 | struct hlist_head *list; | 339 | struct hlist_head *list; |
323 | rwlock_t *lock; | 340 | rwlock_t *lock; |
324 | struct inet_ehash_bucket *head; | 341 | struct inet_ehash_bucket *head; |
@@ -332,14 +349,14 @@ void __inet_hash_nolisten(struct sock *sk) | |||
332 | 349 | ||
333 | write_lock(lock); | 350 | write_lock(lock); |
334 | __sk_add_node(sk, list); | 351 | __sk_add_node(sk, list); |
335 | sock_prot_inuse_add(sk->sk_prot, 1); | 352 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
336 | write_unlock(lock); | 353 | write_unlock(lock); |
337 | } | 354 | } |
338 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); | 355 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); |
339 | 356 | ||
340 | static void __inet_hash(struct sock *sk) | 357 | static void __inet_hash(struct sock *sk) |
341 | { | 358 | { |
342 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | 359 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
343 | struct hlist_head *list; | 360 | struct hlist_head *list; |
344 | rwlock_t *lock; | 361 | rwlock_t *lock; |
345 | 362 | ||
@@ -354,7 +371,7 @@ static void __inet_hash(struct sock *sk) | |||
354 | 371 | ||
355 | inet_listen_wlock(hashinfo); | 372 | inet_listen_wlock(hashinfo); |
356 | __sk_add_node(sk, list); | 373 | __sk_add_node(sk, list); |
357 | sock_prot_inuse_add(sk->sk_prot, 1); | 374 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
358 | write_unlock(lock); | 375 | write_unlock(lock); |
359 | wake_up(&hashinfo->lhash_wait); | 376 | wake_up(&hashinfo->lhash_wait); |
360 | } | 377 | } |
@@ -372,7 +389,7 @@ EXPORT_SYMBOL_GPL(inet_hash); | |||
372 | void inet_unhash(struct sock *sk) | 389 | void inet_unhash(struct sock *sk) |
373 | { | 390 | { |
374 | rwlock_t *lock; | 391 | rwlock_t *lock; |
375 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | 392 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
376 | 393 | ||
377 | if (sk_unhashed(sk)) | 394 | if (sk_unhashed(sk)) |
378 | goto out; | 395 | goto out; |
@@ -387,7 +404,7 @@ void inet_unhash(struct sock *sk) | |||
387 | } | 404 | } |
388 | 405 | ||
389 | if (__sk_del_node_init(sk)) | 406 | if (__sk_del_node_init(sk)) |
390 | sock_prot_inuse_add(sk->sk_prot, -1); | 407 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
391 | write_unlock_bh(lock); | 408 | write_unlock_bh(lock); |
392 | out: | 409 | out: |
393 | if (sk->sk_state == TCP_LISTEN) | 410 | if (sk->sk_state == TCP_LISTEN) |
@@ -406,7 +423,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
406 | struct inet_bind_hashbucket *head; | 423 | struct inet_bind_hashbucket *head; |
407 | struct inet_bind_bucket *tb; | 424 | struct inet_bind_bucket *tb; |
408 | int ret; | 425 | int ret; |
409 | struct net *net = sk->sk_net; | 426 | struct net *net = sock_net(sk); |
410 | 427 | ||
411 | if (!snum) { | 428 | if (!snum) { |
412 | int i, remaining, low, high, port; | 429 | int i, remaining, low, high, port; |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 717c411a5c6b..ce16e9ac24c1 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -57,6 +57,7 @@ void inet_twsk_put(struct inet_timewait_sock *tw) | |||
57 | printk(KERN_DEBUG "%s timewait_sock %p released\n", | 57 | printk(KERN_DEBUG "%s timewait_sock %p released\n", |
58 | tw->tw_prot->name, tw); | 58 | tw->tw_prot->name, tw); |
59 | #endif | 59 | #endif |
60 | release_net(twsk_net(tw)); | ||
60 | kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); | 61 | kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); |
61 | module_put(owner); | 62 | module_put(owner); |
62 | } | 63 | } |
@@ -91,7 +92,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, | |||
91 | 92 | ||
92 | /* Step 2: Remove SK from established hash. */ | 93 | /* Step 2: Remove SK from established hash. */ |
93 | if (__sk_del_node_init(sk)) | 94 | if (__sk_del_node_init(sk)) |
94 | sock_prot_inuse_add(sk->sk_prot, -1); | 95 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
95 | 96 | ||
96 | /* Step 3: Hash TW into TIMEWAIT chain. */ | 97 | /* Step 3: Hash TW into TIMEWAIT chain. */ |
97 | inet_twsk_add_node(tw, &ehead->twchain); | 98 | inet_twsk_add_node(tw, &ehead->twchain); |
@@ -124,7 +125,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat | |||
124 | tw->tw_hash = sk->sk_hash; | 125 | tw->tw_hash = sk->sk_hash; |
125 | tw->tw_ipv6only = 0; | 126 | tw->tw_ipv6only = 0; |
126 | tw->tw_prot = sk->sk_prot_creator; | 127 | tw->tw_prot = sk->sk_prot_creator; |
127 | tw->tw_net = sk->sk_net; | 128 | twsk_net_set(tw, hold_net(sock_net(sk))); |
128 | atomic_set(&tw->tw_refcnt, 1); | 129 | atomic_set(&tw->tw_refcnt, 1); |
129 | inet_twsk_dead_node_init(tw); | 130 | inet_twsk_dead_node_init(tw); |
130 | __module_get(tw->tw_prot->owner); | 131 | __module_get(tw->tw_prot->owner); |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index a4506c8cfef0..4813c39b438b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -80,7 +80,7 @@ int ip_forward(struct sk_buff *skb) | |||
80 | if (!xfrm4_route_forward(skb)) | 80 | if (!xfrm4_route_forward(skb)) |
81 | goto drop; | 81 | goto drop; |
82 | 82 | ||
83 | rt = (struct rtable*)skb->dst; | 83 | rt = skb->rtable; |
84 | 84 | ||
85 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 85 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
86 | goto sr_failed; | 86 | goto sr_failed; |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 3b2e5adca838..cd6ce6ac6358 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -194,7 +194,7 @@ static void ip_expire(unsigned long arg) | |||
194 | 194 | ||
195 | spin_lock(&qp->q.lock); | 195 | spin_lock(&qp->q.lock); |
196 | 196 | ||
197 | if (qp->q.last_in & COMPLETE) | 197 | if (qp->q.last_in & INET_FRAG_COMPLETE) |
198 | goto out; | 198 | goto out; |
199 | 199 | ||
200 | ipq_kill(qp); | 200 | ipq_kill(qp); |
@@ -202,10 +202,13 @@ static void ip_expire(unsigned long arg) | |||
202 | IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); | 202 | IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); |
203 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); | 203 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); |
204 | 204 | ||
205 | if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) { | 205 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { |
206 | struct sk_buff *head = qp->q.fragments; | 206 | struct sk_buff *head = qp->q.fragments; |
207 | struct net *net; | ||
208 | |||
209 | net = container_of(qp->q.net, struct net, ipv4.frags); | ||
207 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ | 210 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ |
208 | if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { | 211 | if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) { |
209 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); | 212 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); |
210 | dev_put(head->dev); | 213 | dev_put(head->dev); |
211 | } | 214 | } |
@@ -298,7 +301,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
298 | int ihl, end; | 301 | int ihl, end; |
299 | int err = -ENOENT; | 302 | int err = -ENOENT; |
300 | 303 | ||
301 | if (qp->q.last_in & COMPLETE) | 304 | if (qp->q.last_in & INET_FRAG_COMPLETE) |
302 | goto err; | 305 | goto err; |
303 | 306 | ||
304 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && | 307 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && |
@@ -324,9 +327,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
324 | * or have different end, the segment is corrrupted. | 327 | * or have different end, the segment is corrrupted. |
325 | */ | 328 | */ |
326 | if (end < qp->q.len || | 329 | if (end < qp->q.len || |
327 | ((qp->q.last_in & LAST_IN) && end != qp->q.len)) | 330 | ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) |
328 | goto err; | 331 | goto err; |
329 | qp->q.last_in |= LAST_IN; | 332 | qp->q.last_in |= INET_FRAG_LAST_IN; |
330 | qp->q.len = end; | 333 | qp->q.len = end; |
331 | } else { | 334 | } else { |
332 | if (end&7) { | 335 | if (end&7) { |
@@ -336,7 +339,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
336 | } | 339 | } |
337 | if (end > qp->q.len) { | 340 | if (end > qp->q.len) { |
338 | /* Some bits beyond end -> corruption. */ | 341 | /* Some bits beyond end -> corruption. */ |
339 | if (qp->q.last_in & LAST_IN) | 342 | if (qp->q.last_in & INET_FRAG_LAST_IN) |
340 | goto err; | 343 | goto err; |
341 | qp->q.len = end; | 344 | qp->q.len = end; |
342 | } | 345 | } |
@@ -435,9 +438,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
435 | qp->q.meat += skb->len; | 438 | qp->q.meat += skb->len; |
436 | atomic_add(skb->truesize, &qp->q.net->mem); | 439 | atomic_add(skb->truesize, &qp->q.net->mem); |
437 | if (offset == 0) | 440 | if (offset == 0) |
438 | qp->q.last_in |= FIRST_IN; | 441 | qp->q.last_in |= INET_FRAG_FIRST_IN; |
439 | 442 | ||
440 | if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len) | 443 | if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && |
444 | qp->q.meat == qp->q.len) | ||
441 | return ip_frag_reasm(qp, prev, dev); | 445 | return ip_frag_reasm(qp, prev, dev); |
442 | 446 | ||
443 | write_lock(&ip4_frags.lock); | 447 | write_lock(&ip4_frags.lock); |
@@ -553,7 +557,7 @@ out_nomem: | |||
553 | out_oversize: | 557 | out_oversize: |
554 | if (net_ratelimit()) | 558 | if (net_ratelimit()) |
555 | printk(KERN_INFO | 559 | printk(KERN_INFO |
556 | "Oversized IP packet from %d.%d.%d.%d.\n", | 560 | "Oversized IP packet from " NIPQUAD_FMT ".\n", |
557 | NIPQUAD(qp->saddr)); | 561 | NIPQUAD(qp->saddr)); |
558 | out_fail: | 562 | out_fail: |
559 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); | 563 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); |
@@ -568,7 +572,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) | |||
568 | 572 | ||
569 | IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); | 573 | IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); |
570 | 574 | ||
571 | net = skb->dev ? skb->dev->nd_net : skb->dst->dev->nd_net; | 575 | net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev); |
572 | /* Start by cleaning up the memory. */ | 576 | /* Start by cleaning up the memory. */ |
573 | if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) | 577 | if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) |
574 | ip_evictor(net); | 578 | ip_evictor(net); |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e7821ba7a9a0..2ada033406de 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -39,6 +39,8 @@ | |||
39 | #include <net/dsfield.h> | 39 | #include <net/dsfield.h> |
40 | #include <net/inet_ecn.h> | 40 | #include <net/inet_ecn.h> |
41 | #include <net/xfrm.h> | 41 | #include <net/xfrm.h> |
42 | #include <net/net_namespace.h> | ||
43 | #include <net/netns/generic.h> | ||
42 | 44 | ||
43 | #ifdef CONFIG_IPV6 | 45 | #ifdef CONFIG_IPV6 |
44 | #include <net/ipv6.h> | 46 | #include <net/ipv6.h> |
@@ -122,7 +124,14 @@ static void ipgre_tunnel_setup(struct net_device *dev); | |||
122 | 124 | ||
123 | static int ipgre_fb_tunnel_init(struct net_device *dev); | 125 | static int ipgre_fb_tunnel_init(struct net_device *dev); |
124 | 126 | ||
125 | static struct net_device *ipgre_fb_tunnel_dev; | 127 | #define HASH_SIZE 16 |
128 | |||
129 | static int ipgre_net_id; | ||
130 | struct ipgre_net { | ||
131 | struct ip_tunnel *tunnels[4][HASH_SIZE]; | ||
132 | |||
133 | struct net_device *fb_tunnel_dev; | ||
134 | }; | ||
126 | 135 | ||
127 | /* Tunnel hash table */ | 136 | /* Tunnel hash table */ |
128 | 137 | ||
@@ -142,39 +151,38 @@ static struct net_device *ipgre_fb_tunnel_dev; | |||
142 | will match fallback tunnel. | 151 | will match fallback tunnel. |
143 | */ | 152 | */ |
144 | 153 | ||
145 | #define HASH_SIZE 16 | ||
146 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) | 154 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) |
147 | 155 | ||
148 | static struct ip_tunnel *tunnels[4][HASH_SIZE]; | 156 | #define tunnels_r_l tunnels[3] |
149 | 157 | #define tunnels_r tunnels[2] | |
150 | #define tunnels_r_l (tunnels[3]) | 158 | #define tunnels_l tunnels[1] |
151 | #define tunnels_r (tunnels[2]) | 159 | #define tunnels_wc tunnels[0] |
152 | #define tunnels_l (tunnels[1]) | ||
153 | #define tunnels_wc (tunnels[0]) | ||
154 | 160 | ||
155 | static DEFINE_RWLOCK(ipgre_lock); | 161 | static DEFINE_RWLOCK(ipgre_lock); |
156 | 162 | ||
157 | /* Given src, dst and key, find appropriate for input tunnel. */ | 163 | /* Given src, dst and key, find appropriate for input tunnel. */ |
158 | 164 | ||
159 | static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key) | 165 | static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, |
166 | __be32 remote, __be32 local, __be32 key) | ||
160 | { | 167 | { |
161 | unsigned h0 = HASH(remote); | 168 | unsigned h0 = HASH(remote); |
162 | unsigned h1 = HASH(key); | 169 | unsigned h1 = HASH(key); |
163 | struct ip_tunnel *t; | 170 | struct ip_tunnel *t; |
171 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | ||
164 | 172 | ||
165 | for (t = tunnels_r_l[h0^h1]; t; t = t->next) { | 173 | for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { |
166 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { | 174 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { |
167 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) | 175 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) |
168 | return t; | 176 | return t; |
169 | } | 177 | } |
170 | } | 178 | } |
171 | for (t = tunnels_r[h0^h1]; t; t = t->next) { | 179 | for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { |
172 | if (remote == t->parms.iph.daddr) { | 180 | if (remote == t->parms.iph.daddr) { |
173 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) | 181 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) |
174 | return t; | 182 | return t; |
175 | } | 183 | } |
176 | } | 184 | } |
177 | for (t = tunnels_l[h1]; t; t = t->next) { | 185 | for (t = ign->tunnels_l[h1]; t; t = t->next) { |
178 | if (local == t->parms.iph.saddr || | 186 | if (local == t->parms.iph.saddr || |
179 | (local == t->parms.iph.daddr && | 187 | (local == t->parms.iph.daddr && |
180 | ipv4_is_multicast(local))) { | 188 | ipv4_is_multicast(local))) { |
@@ -182,17 +190,18 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3 | |||
182 | return t; | 190 | return t; |
183 | } | 191 | } |
184 | } | 192 | } |
185 | for (t = tunnels_wc[h1]; t; t = t->next) { | 193 | for (t = ign->tunnels_wc[h1]; t; t = t->next) { |
186 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) | 194 | if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) |
187 | return t; | 195 | return t; |
188 | } | 196 | } |
189 | 197 | ||
190 | if (ipgre_fb_tunnel_dev->flags&IFF_UP) | 198 | if (ign->fb_tunnel_dev->flags&IFF_UP) |
191 | return netdev_priv(ipgre_fb_tunnel_dev); | 199 | return netdev_priv(ign->fb_tunnel_dev); |
192 | return NULL; | 200 | return NULL; |
193 | } | 201 | } |
194 | 202 | ||
195 | static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms) | 203 | static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, |
204 | struct ip_tunnel_parm *parms) | ||
196 | { | 205 | { |
197 | __be32 remote = parms->iph.daddr; | 206 | __be32 remote = parms->iph.daddr; |
198 | __be32 local = parms->iph.saddr; | 207 | __be32 local = parms->iph.saddr; |
@@ -207,17 +216,18 @@ static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms) | |||
207 | h ^= HASH(remote); | 216 | h ^= HASH(remote); |
208 | } | 217 | } |
209 | 218 | ||
210 | return &tunnels[prio][h]; | 219 | return &ign->tunnels[prio][h]; |
211 | } | 220 | } |
212 | 221 | ||
213 | static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) | 222 | static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, |
223 | struct ip_tunnel *t) | ||
214 | { | 224 | { |
215 | return __ipgre_bucket(&t->parms); | 225 | return __ipgre_bucket(ign, &t->parms); |
216 | } | 226 | } |
217 | 227 | ||
218 | static void ipgre_tunnel_link(struct ip_tunnel *t) | 228 | static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) |
219 | { | 229 | { |
220 | struct ip_tunnel **tp = ipgre_bucket(t); | 230 | struct ip_tunnel **tp = ipgre_bucket(ign, t); |
221 | 231 | ||
222 | t->next = *tp; | 232 | t->next = *tp; |
223 | write_lock_bh(&ipgre_lock); | 233 | write_lock_bh(&ipgre_lock); |
@@ -225,11 +235,11 @@ static void ipgre_tunnel_link(struct ip_tunnel *t) | |||
225 | write_unlock_bh(&ipgre_lock); | 235 | write_unlock_bh(&ipgre_lock); |
226 | } | 236 | } |
227 | 237 | ||
228 | static void ipgre_tunnel_unlink(struct ip_tunnel *t) | 238 | static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) |
229 | { | 239 | { |
230 | struct ip_tunnel **tp; | 240 | struct ip_tunnel **tp; |
231 | 241 | ||
232 | for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) { | 242 | for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { |
233 | if (t == *tp) { | 243 | if (t == *tp) { |
234 | write_lock_bh(&ipgre_lock); | 244 | write_lock_bh(&ipgre_lock); |
235 | *tp = t->next; | 245 | *tp = t->next; |
@@ -239,7 +249,8 @@ static void ipgre_tunnel_unlink(struct ip_tunnel *t) | |||
239 | } | 249 | } |
240 | } | 250 | } |
241 | 251 | ||
242 | static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create) | 252 | static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, |
253 | struct ip_tunnel_parm *parms, int create) | ||
243 | { | 254 | { |
244 | __be32 remote = parms->iph.daddr; | 255 | __be32 remote = parms->iph.daddr; |
245 | __be32 local = parms->iph.saddr; | 256 | __be32 local = parms->iph.saddr; |
@@ -247,8 +258,9 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int | |||
247 | struct ip_tunnel *t, **tp, *nt; | 258 | struct ip_tunnel *t, **tp, *nt; |
248 | struct net_device *dev; | 259 | struct net_device *dev; |
249 | char name[IFNAMSIZ]; | 260 | char name[IFNAMSIZ]; |
261 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | ||
250 | 262 | ||
251 | for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) { | 263 | for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) { |
252 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { | 264 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { |
253 | if (key == t->parms.i_key) | 265 | if (key == t->parms.i_key) |
254 | return t; | 266 | return t; |
@@ -266,6 +278,8 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int | |||
266 | if (!dev) | 278 | if (!dev) |
267 | return NULL; | 279 | return NULL; |
268 | 280 | ||
281 | dev_net_set(dev, net); | ||
282 | |||
269 | if (strchr(name, '%')) { | 283 | if (strchr(name, '%')) { |
270 | if (dev_alloc_name(dev, name) < 0) | 284 | if (dev_alloc_name(dev, name) < 0) |
271 | goto failed_free; | 285 | goto failed_free; |
@@ -279,7 +293,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int | |||
279 | goto failed_free; | 293 | goto failed_free; |
280 | 294 | ||
281 | dev_hold(dev); | 295 | dev_hold(dev); |
282 | ipgre_tunnel_link(nt); | 296 | ipgre_tunnel_link(ign, nt); |
283 | return nt; | 297 | return nt; |
284 | 298 | ||
285 | failed_free: | 299 | failed_free: |
@@ -289,7 +303,10 @@ failed_free: | |||
289 | 303 | ||
290 | static void ipgre_tunnel_uninit(struct net_device *dev) | 304 | static void ipgre_tunnel_uninit(struct net_device *dev) |
291 | { | 305 | { |
292 | ipgre_tunnel_unlink(netdev_priv(dev)); | 306 | struct net *net = dev_net(dev); |
307 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | ||
308 | |||
309 | ipgre_tunnel_unlink(ign, netdev_priv(dev)); | ||
293 | dev_put(dev); | 310 | dev_put(dev); |
294 | } | 311 | } |
295 | 312 | ||
@@ -363,7 +380,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
363 | } | 380 | } |
364 | 381 | ||
365 | read_lock(&ipgre_lock); | 382 | read_lock(&ipgre_lock); |
366 | t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0); | 383 | t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, |
384 | (flags&GRE_KEY) ? | ||
385 | *(((__be32*)p) + (grehlen>>2) - 1) : 0); | ||
367 | if (t == NULL || t->parms.iph.daddr == 0 || | 386 | if (t == NULL || t->parms.iph.daddr == 0 || |
368 | ipv4_is_multicast(t->parms.iph.daddr)) | 387 | ipv4_is_multicast(t->parms.iph.daddr)) |
369 | goto out; | 388 | goto out; |
@@ -476,7 +495,7 @@ out: | |||
476 | fl.fl4_dst = eiph->saddr; | 495 | fl.fl4_dst = eiph->saddr; |
477 | fl.fl4_tos = RT_TOS(eiph->tos); | 496 | fl.fl4_tos = RT_TOS(eiph->tos); |
478 | fl.proto = IPPROTO_GRE; | 497 | fl.proto = IPPROTO_GRE; |
479 | if (ip_route_output_key(&init_net, &rt, &fl)) { | 498 | if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) { |
480 | kfree_skb(skb2); | 499 | kfree_skb(skb2); |
481 | return; | 500 | return; |
482 | } | 501 | } |
@@ -489,7 +508,7 @@ out: | |||
489 | fl.fl4_dst = eiph->daddr; | 508 | fl.fl4_dst = eiph->daddr; |
490 | fl.fl4_src = eiph->saddr; | 509 | fl.fl4_src = eiph->saddr; |
491 | fl.fl4_tos = eiph->tos; | 510 | fl.fl4_tos = eiph->tos; |
492 | if (ip_route_output_key(&init_net, &rt, &fl) || | 511 | if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || |
493 | rt->u.dst.dev->type != ARPHRD_IPGRE) { | 512 | rt->u.dst.dev->type != ARPHRD_IPGRE) { |
494 | ip_rt_put(rt); | 513 | ip_rt_put(rt); |
495 | kfree_skb(skb2); | 514 | kfree_skb(skb2); |
@@ -596,7 +615,8 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
596 | } | 615 | } |
597 | 616 | ||
598 | read_lock(&ipgre_lock); | 617 | read_lock(&ipgre_lock); |
599 | if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) { | 618 | if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), |
619 | iph->saddr, iph->daddr, key)) != NULL) { | ||
600 | secpath_reset(skb); | 620 | secpath_reset(skb); |
601 | 621 | ||
602 | skb->protocol = *(__be16*)(h + 2); | 622 | skb->protocol = *(__be16*)(h + 2); |
@@ -619,7 +639,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
619 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 639 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
620 | if (ipv4_is_multicast(iph->daddr)) { | 640 | if (ipv4_is_multicast(iph->daddr)) { |
621 | /* Looped back packet, drop it! */ | 641 | /* Looped back packet, drop it! */ |
622 | if (((struct rtable*)skb->dst)->fl.iif == 0) | 642 | if (skb->rtable->fl.iif == 0) |
623 | goto drop; | 643 | goto drop; |
624 | tunnel->stat.multicast++; | 644 | tunnel->stat.multicast++; |
625 | skb->pkt_type = PACKET_BROADCAST; | 645 | skb->pkt_type = PACKET_BROADCAST; |
@@ -699,7 +719,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
699 | } | 719 | } |
700 | 720 | ||
701 | if (skb->protocol == htons(ETH_P_IP)) { | 721 | if (skb->protocol == htons(ETH_P_IP)) { |
702 | rt = (struct rtable*)skb->dst; | 722 | rt = skb->rtable; |
703 | if ((dst = rt->rt_gateway) == 0) | 723 | if ((dst = rt->rt_gateway) == 0) |
704 | goto tx_error_icmp; | 724 | goto tx_error_icmp; |
705 | } | 725 | } |
@@ -744,7 +764,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
744 | .saddr = tiph->saddr, | 764 | .saddr = tiph->saddr, |
745 | .tos = RT_TOS(tos) } }, | 765 | .tos = RT_TOS(tos) } }, |
746 | .proto = IPPROTO_GRE }; | 766 | .proto = IPPROTO_GRE }; |
747 | if (ip_route_output_key(&init_net, &rt, &fl)) { | 767 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { |
748 | tunnel->stat.tx_carrier_errors++; | 768 | tunnel->stat.tx_carrier_errors++; |
749 | goto tx_error; | 769 | goto tx_error; |
750 | } | 770 | } |
@@ -917,7 +937,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) | |||
917 | .tos = RT_TOS(iph->tos) } }, | 937 | .tos = RT_TOS(iph->tos) } }, |
918 | .proto = IPPROTO_GRE }; | 938 | .proto = IPPROTO_GRE }; |
919 | struct rtable *rt; | 939 | struct rtable *rt; |
920 | if (!ip_route_output_key(&init_net, &rt, &fl)) { | 940 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { |
921 | tdev = rt->u.dst.dev; | 941 | tdev = rt->u.dst.dev; |
922 | ip_rt_put(rt); | 942 | ip_rt_put(rt); |
923 | } | 943 | } |
@@ -925,7 +945,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) | |||
925 | } | 945 | } |
926 | 946 | ||
927 | if (!tdev && tunnel->parms.link) | 947 | if (!tdev && tunnel->parms.link) |
928 | tdev = __dev_get_by_index(&init_net, tunnel->parms.link); | 948 | tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); |
929 | 949 | ||
930 | if (tdev) { | 950 | if (tdev) { |
931 | hlen = tdev->hard_header_len; | 951 | hlen = tdev->hard_header_len; |
@@ -954,16 +974,18 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
954 | int err = 0; | 974 | int err = 0; |
955 | struct ip_tunnel_parm p; | 975 | struct ip_tunnel_parm p; |
956 | struct ip_tunnel *t; | 976 | struct ip_tunnel *t; |
977 | struct net *net = dev_net(dev); | ||
978 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | ||
957 | 979 | ||
958 | switch (cmd) { | 980 | switch (cmd) { |
959 | case SIOCGETTUNNEL: | 981 | case SIOCGETTUNNEL: |
960 | t = NULL; | 982 | t = NULL; |
961 | if (dev == ipgre_fb_tunnel_dev) { | 983 | if (dev == ign->fb_tunnel_dev) { |
962 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { | 984 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { |
963 | err = -EFAULT; | 985 | err = -EFAULT; |
964 | break; | 986 | break; |
965 | } | 987 | } |
966 | t = ipgre_tunnel_locate(&p, 0); | 988 | t = ipgre_tunnel_locate(net, &p, 0); |
967 | } | 989 | } |
968 | if (t == NULL) | 990 | if (t == NULL) |
969 | t = netdev_priv(dev); | 991 | t = netdev_priv(dev); |
@@ -995,9 +1017,9 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
995 | if (!(p.o_flags&GRE_KEY)) | 1017 | if (!(p.o_flags&GRE_KEY)) |
996 | p.o_key = 0; | 1018 | p.o_key = 0; |
997 | 1019 | ||
998 | t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL); | 1020 | t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); |
999 | 1021 | ||
1000 | if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | 1022 | if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
1001 | if (t != NULL) { | 1023 | if (t != NULL) { |
1002 | if (t->dev != dev) { | 1024 | if (t->dev != dev) { |
1003 | err = -EEXIST; | 1025 | err = -EEXIST; |
@@ -1017,14 +1039,14 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
1017 | err = -EINVAL; | 1039 | err = -EINVAL; |
1018 | break; | 1040 | break; |
1019 | } | 1041 | } |
1020 | ipgre_tunnel_unlink(t); | 1042 | ipgre_tunnel_unlink(ign, t); |
1021 | t->parms.iph.saddr = p.iph.saddr; | 1043 | t->parms.iph.saddr = p.iph.saddr; |
1022 | t->parms.iph.daddr = p.iph.daddr; | 1044 | t->parms.iph.daddr = p.iph.daddr; |
1023 | t->parms.i_key = p.i_key; | 1045 | t->parms.i_key = p.i_key; |
1024 | t->parms.o_key = p.o_key; | 1046 | t->parms.o_key = p.o_key; |
1025 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | 1047 | memcpy(dev->dev_addr, &p.iph.saddr, 4); |
1026 | memcpy(dev->broadcast, &p.iph.daddr, 4); | 1048 | memcpy(dev->broadcast, &p.iph.daddr, 4); |
1027 | ipgre_tunnel_link(t); | 1049 | ipgre_tunnel_link(ign, t); |
1028 | netdev_state_change(dev); | 1050 | netdev_state_change(dev); |
1029 | } | 1051 | } |
1030 | } | 1052 | } |
@@ -1052,15 +1074,15 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
1052 | if (!capable(CAP_NET_ADMIN)) | 1074 | if (!capable(CAP_NET_ADMIN)) |
1053 | goto done; | 1075 | goto done; |
1054 | 1076 | ||
1055 | if (dev == ipgre_fb_tunnel_dev) { | 1077 | if (dev == ign->fb_tunnel_dev) { |
1056 | err = -EFAULT; | 1078 | err = -EFAULT; |
1057 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 1079 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
1058 | goto done; | 1080 | goto done; |
1059 | err = -ENOENT; | 1081 | err = -ENOENT; |
1060 | if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) | 1082 | if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) |
1061 | goto done; | 1083 | goto done; |
1062 | err = -EPERM; | 1084 | err = -EPERM; |
1063 | if (t == netdev_priv(ipgre_fb_tunnel_dev)) | 1085 | if (t == netdev_priv(ign->fb_tunnel_dev)) |
1064 | goto done; | 1086 | goto done; |
1065 | dev = t->dev; | 1087 | dev = t->dev; |
1066 | } | 1088 | } |
@@ -1173,7 +1195,7 @@ static int ipgre_open(struct net_device *dev) | |||
1173 | .tos = RT_TOS(t->parms.iph.tos) } }, | 1195 | .tos = RT_TOS(t->parms.iph.tos) } }, |
1174 | .proto = IPPROTO_GRE }; | 1196 | .proto = IPPROTO_GRE }; |
1175 | struct rtable *rt; | 1197 | struct rtable *rt; |
1176 | if (ip_route_output_key(&init_net, &rt, &fl)) | 1198 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) |
1177 | return -EADDRNOTAVAIL; | 1199 | return -EADDRNOTAVAIL; |
1178 | dev = rt->u.dst.dev; | 1200 | dev = rt->u.dst.dev; |
1179 | ip_rt_put(rt); | 1201 | ip_rt_put(rt); |
@@ -1190,7 +1212,7 @@ static int ipgre_close(struct net_device *dev) | |||
1190 | struct ip_tunnel *t = netdev_priv(dev); | 1212 | struct ip_tunnel *t = netdev_priv(dev); |
1191 | if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { | 1213 | if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { |
1192 | struct in_device *in_dev; | 1214 | struct in_device *in_dev; |
1193 | in_dev = inetdev_by_index(dev->nd_net, t->mlink); | 1215 | in_dev = inetdev_by_index(dev_net(dev), t->mlink); |
1194 | if (in_dev) { | 1216 | if (in_dev) { |
1195 | ip_mc_dec_group(in_dev, t->parms.iph.daddr); | 1217 | ip_mc_dec_group(in_dev, t->parms.iph.daddr); |
1196 | in_dev_put(in_dev); | 1218 | in_dev_put(in_dev); |
@@ -1216,6 +1238,7 @@ static void ipgre_tunnel_setup(struct net_device *dev) | |||
1216 | dev->flags = IFF_NOARP; | 1238 | dev->flags = IFF_NOARP; |
1217 | dev->iflink = 0; | 1239 | dev->iflink = 0; |
1218 | dev->addr_len = 4; | 1240 | dev->addr_len = 4; |
1241 | dev->features |= NETIF_F_NETNS_LOCAL; | ||
1219 | } | 1242 | } |
1220 | 1243 | ||
1221 | static int ipgre_tunnel_init(struct net_device *dev) | 1244 | static int ipgre_tunnel_init(struct net_device *dev) |
@@ -1251,10 +1274,11 @@ static int ipgre_tunnel_init(struct net_device *dev) | |||
1251 | return 0; | 1274 | return 0; |
1252 | } | 1275 | } |
1253 | 1276 | ||
1254 | static int __init ipgre_fb_tunnel_init(struct net_device *dev) | 1277 | static int ipgre_fb_tunnel_init(struct net_device *dev) |
1255 | { | 1278 | { |
1256 | struct ip_tunnel *tunnel = netdev_priv(dev); | 1279 | struct ip_tunnel *tunnel = netdev_priv(dev); |
1257 | struct iphdr *iph = &tunnel->parms.iph; | 1280 | struct iphdr *iph = &tunnel->parms.iph; |
1281 | struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id); | ||
1258 | 1282 | ||
1259 | tunnel->dev = dev; | 1283 | tunnel->dev = dev; |
1260 | strcpy(tunnel->parms.name, dev->name); | 1284 | strcpy(tunnel->parms.name, dev->name); |
@@ -1265,7 +1289,7 @@ static int __init ipgre_fb_tunnel_init(struct net_device *dev) | |||
1265 | tunnel->hlen = sizeof(struct iphdr) + 4; | 1289 | tunnel->hlen = sizeof(struct iphdr) + 4; |
1266 | 1290 | ||
1267 | dev_hold(dev); | 1291 | dev_hold(dev); |
1268 | tunnels_wc[0] = tunnel; | 1292 | ign->tunnels_wc[0] = tunnel; |
1269 | return 0; | 1293 | return 0; |
1270 | } | 1294 | } |
1271 | 1295 | ||
@@ -1273,56 +1297,98 @@ static int __init ipgre_fb_tunnel_init(struct net_device *dev) | |||
1273 | static struct net_protocol ipgre_protocol = { | 1297 | static struct net_protocol ipgre_protocol = { |
1274 | .handler = ipgre_rcv, | 1298 | .handler = ipgre_rcv, |
1275 | .err_handler = ipgre_err, | 1299 | .err_handler = ipgre_err, |
1300 | .netns_ok = 1, | ||
1276 | }; | 1301 | }; |
1277 | 1302 | ||
1303 | static void ipgre_destroy_tunnels(struct ipgre_net *ign) | ||
1304 | { | ||
1305 | int prio; | ||
1278 | 1306 | ||
1279 | /* | 1307 | for (prio = 0; prio < 4; prio++) { |
1280 | * And now the modules code and kernel interface. | 1308 | int h; |
1281 | */ | 1309 | for (h = 0; h < HASH_SIZE; h++) { |
1310 | struct ip_tunnel *t; | ||
1311 | while ((t = ign->tunnels[prio][h]) != NULL) | ||
1312 | unregister_netdevice(t->dev); | ||
1313 | } | ||
1314 | } | ||
1315 | } | ||
1282 | 1316 | ||
1283 | static int __init ipgre_init(void) | 1317 | static int ipgre_init_net(struct net *net) |
1284 | { | 1318 | { |
1285 | int err; | 1319 | int err; |
1320 | struct ipgre_net *ign; | ||
1286 | 1321 | ||
1287 | printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); | 1322 | err = -ENOMEM; |
1323 | ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL); | ||
1324 | if (ign == NULL) | ||
1325 | goto err_alloc; | ||
1288 | 1326 | ||
1289 | if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { | 1327 | err = net_assign_generic(net, ipgre_net_id, ign); |
1290 | printk(KERN_INFO "ipgre init: can't add protocol\n"); | 1328 | if (err < 0) |
1291 | return -EAGAIN; | 1329 | goto err_assign; |
1292 | } | ||
1293 | 1330 | ||
1294 | ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", | 1331 | ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", |
1295 | ipgre_tunnel_setup); | 1332 | ipgre_tunnel_setup); |
1296 | if (!ipgre_fb_tunnel_dev) { | 1333 | if (!ign->fb_tunnel_dev) { |
1297 | err = -ENOMEM; | 1334 | err = -ENOMEM; |
1298 | goto err1; | 1335 | goto err_alloc_dev; |
1299 | } | 1336 | } |
1300 | 1337 | ||
1301 | ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init; | 1338 | ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; |
1339 | dev_net_set(ign->fb_tunnel_dev, net); | ||
1302 | 1340 | ||
1303 | if ((err = register_netdev(ipgre_fb_tunnel_dev))) | 1341 | if ((err = register_netdev(ign->fb_tunnel_dev))) |
1304 | goto err2; | 1342 | goto err_reg_dev; |
1305 | out: | 1343 | |
1344 | return 0; | ||
1345 | |||
1346 | err_reg_dev: | ||
1347 | free_netdev(ign->fb_tunnel_dev); | ||
1348 | err_alloc_dev: | ||
1349 | /* nothing */ | ||
1350 | err_assign: | ||
1351 | kfree(ign); | ||
1352 | err_alloc: | ||
1306 | return err; | 1353 | return err; |
1307 | err2: | ||
1308 | free_netdev(ipgre_fb_tunnel_dev); | ||
1309 | err1: | ||
1310 | inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); | ||
1311 | goto out; | ||
1312 | } | 1354 | } |
1313 | 1355 | ||
1314 | static void __exit ipgre_destroy_tunnels(void) | 1356 | static void ipgre_exit_net(struct net *net) |
1315 | { | 1357 | { |
1316 | int prio; | 1358 | struct ipgre_net *ign; |
1317 | 1359 | ||
1318 | for (prio = 0; prio < 4; prio++) { | 1360 | ign = net_generic(net, ipgre_net_id); |
1319 | int h; | 1361 | rtnl_lock(); |
1320 | for (h = 0; h < HASH_SIZE; h++) { | 1362 | ipgre_destroy_tunnels(ign); |
1321 | struct ip_tunnel *t; | 1363 | rtnl_unlock(); |
1322 | while ((t = tunnels[prio][h]) != NULL) | 1364 | kfree(ign); |
1323 | unregister_netdevice(t->dev); | 1365 | } |
1324 | } | 1366 | |
1367 | static struct pernet_operations ipgre_net_ops = { | ||
1368 | .init = ipgre_init_net, | ||
1369 | .exit = ipgre_exit_net, | ||
1370 | }; | ||
1371 | |||
1372 | /* | ||
1373 | * And now the modules code and kernel interface. | ||
1374 | */ | ||
1375 | |||
1376 | static int __init ipgre_init(void) | ||
1377 | { | ||
1378 | int err; | ||
1379 | |||
1380 | printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); | ||
1381 | |||
1382 | if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { | ||
1383 | printk(KERN_INFO "ipgre init: can't add protocol\n"); | ||
1384 | return -EAGAIN; | ||
1325 | } | 1385 | } |
1386 | |||
1387 | err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); | ||
1388 | if (err < 0) | ||
1389 | inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); | ||
1390 | |||
1391 | return err; | ||
1326 | } | 1392 | } |
1327 | 1393 | ||
1328 | static void __exit ipgre_fini(void) | 1394 | static void __exit ipgre_fini(void) |
@@ -1330,9 +1396,7 @@ static void __exit ipgre_fini(void) | |||
1330 | if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) | 1396 | if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) |
1331 | printk(KERN_INFO "ipgre close: can't remove protocol\n"); | 1397 | printk(KERN_INFO "ipgre close: can't remove protocol\n"); |
1332 | 1398 | ||
1333 | rtnl_lock(); | 1399 | unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); |
1334 | ipgre_destroy_tunnels(); | ||
1335 | rtnl_unlock(); | ||
1336 | } | 1400 | } |
1337 | 1401 | ||
1338 | module_init(ipgre_init); | 1402 | module_init(ipgre_init); |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 65631391d479..7b4bad6d572f 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -160,6 +160,7 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
160 | struct ip_ra_chain *ra; | 160 | struct ip_ra_chain *ra; |
161 | u8 protocol = ip_hdr(skb)->protocol; | 161 | u8 protocol = ip_hdr(skb)->protocol; |
162 | struct sock *last = NULL; | 162 | struct sock *last = NULL; |
163 | struct net_device *dev = skb->dev; | ||
163 | 164 | ||
164 | read_lock(&ip_ra_lock); | 165 | read_lock(&ip_ra_lock); |
165 | for (ra = ip_ra_chain; ra; ra = ra->next) { | 166 | for (ra = ip_ra_chain; ra; ra = ra->next) { |
@@ -170,7 +171,8 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
170 | */ | 171 | */ |
171 | if (sk && inet_sk(sk)->num == protocol && | 172 | if (sk && inet_sk(sk)->num == protocol && |
172 | (!sk->sk_bound_dev_if || | 173 | (!sk->sk_bound_dev_if || |
173 | sk->sk_bound_dev_if == skb->dev->ifindex)) { | 174 | sk->sk_bound_dev_if == dev->ifindex) && |
175 | sock_net(sk) == dev_net(dev)) { | ||
174 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 176 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
175 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { | 177 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { |
176 | read_unlock(&ip_ra_lock); | 178 | read_unlock(&ip_ra_lock); |
@@ -197,6 +199,8 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
197 | 199 | ||
198 | static int ip_local_deliver_finish(struct sk_buff *skb) | 200 | static int ip_local_deliver_finish(struct sk_buff *skb) |
199 | { | 201 | { |
202 | struct net *net = dev_net(skb->dev); | ||
203 | |||
200 | __skb_pull(skb, ip_hdrlen(skb)); | 204 | __skb_pull(skb, ip_hdrlen(skb)); |
201 | 205 | ||
202 | /* Point into the IP datagram, just past the header. */ | 206 | /* Point into the IP datagram, just past the header. */ |
@@ -212,7 +216,8 @@ static int ip_local_deliver_finish(struct sk_buff *skb) | |||
212 | raw = raw_local_deliver(skb, protocol); | 216 | raw = raw_local_deliver(skb, protocol); |
213 | 217 | ||
214 | hash = protocol & (MAX_INET_PROTOS - 1); | 218 | hash = protocol & (MAX_INET_PROTOS - 1); |
215 | if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { | 219 | ipprot = rcu_dereference(inet_protos[hash]); |
220 | if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) { | ||
216 | int ret; | 221 | int ret; |
217 | 222 | ||
218 | if (!ipprot->no_policy) { | 223 | if (!ipprot->no_policy) { |
@@ -283,13 +288,14 @@ static inline int ip_rcv_options(struct sk_buff *skb) | |||
283 | } | 288 | } |
284 | 289 | ||
285 | iph = ip_hdr(skb); | 290 | iph = ip_hdr(skb); |
291 | opt = &(IPCB(skb)->opt); | ||
292 | opt->optlen = iph->ihl*4 - sizeof(struct iphdr); | ||
286 | 293 | ||
287 | if (ip_options_compile(NULL, skb)) { | 294 | if (ip_options_compile(dev_net(dev), opt, skb)) { |
288 | IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); | 295 | IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); |
289 | goto drop; | 296 | goto drop; |
290 | } | 297 | } |
291 | 298 | ||
292 | opt = &(IPCB(skb)->opt); | ||
293 | if (unlikely(opt->srr)) { | 299 | if (unlikely(opt->srr)) { |
294 | struct in_device *in_dev = in_dev_get(dev); | 300 | struct in_device *in_dev = in_dev_get(dev); |
295 | if (in_dev) { | 301 | if (in_dev) { |
@@ -297,7 +303,7 @@ static inline int ip_rcv_options(struct sk_buff *skb) | |||
297 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 303 | if (IN_DEV_LOG_MARTIANS(in_dev) && |
298 | net_ratelimit()) | 304 | net_ratelimit()) |
299 | printk(KERN_INFO "source route option " | 305 | printk(KERN_INFO "source route option " |
300 | "%u.%u.%u.%u -> %u.%u.%u.%u\n", | 306 | NIPQUAD_FMT " -> " NIPQUAD_FMT "\n", |
301 | NIPQUAD(iph->saddr), | 307 | NIPQUAD(iph->saddr), |
302 | NIPQUAD(iph->daddr)); | 308 | NIPQUAD(iph->daddr)); |
303 | in_dev_put(in_dev); | 309 | in_dev_put(in_dev); |
@@ -351,7 +357,7 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
351 | if (iph->ihl > 5 && ip_rcv_options(skb)) | 357 | if (iph->ihl > 5 && ip_rcv_options(skb)) |
352 | goto drop; | 358 | goto drop; |
353 | 359 | ||
354 | rt = (struct rtable*)skb->dst; | 360 | rt = skb->rtable; |
355 | if (rt->rt_type == RTN_MULTICAST) | 361 | if (rt->rt_type == RTN_MULTICAST) |
356 | IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); | 362 | IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); |
357 | else if (rt->rt_type == RTN_BROADCAST) | 363 | else if (rt->rt_type == RTN_BROADCAST) |
@@ -372,9 +378,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, | |||
372 | struct iphdr *iph; | 378 | struct iphdr *iph; |
373 | u32 len; | 379 | u32 len; |
374 | 380 | ||
375 | if (dev->nd_net != &init_net) | ||
376 | goto drop; | ||
377 | |||
378 | /* When the interface is in promisc. mode, drop all the crap | 381 | /* When the interface is in promisc. mode, drop all the crap |
379 | * that it receives, do not try to analyse it. | 382 | * that it receives, do not try to analyse it. |
380 | */ | 383 | */ |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 4d315158fd3c..d107543d3f81 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -45,7 +45,6 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, | |||
45 | memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); | 45 | memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options)); |
46 | memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); | 46 | memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen); |
47 | opt = &(IPCB(skb)->opt); | 47 | opt = &(IPCB(skb)->opt); |
48 | opt->is_data = 0; | ||
49 | 48 | ||
50 | if (opt->srr) | 49 | if (opt->srr) |
51 | memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4); | 50 | memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4); |
@@ -95,8 +94,6 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
95 | 94 | ||
96 | memset(dopt, 0, sizeof(struct ip_options)); | 95 | memset(dopt, 0, sizeof(struct ip_options)); |
97 | 96 | ||
98 | dopt->is_data = 1; | ||
99 | |||
100 | sopt = &(IPCB(skb)->opt); | 97 | sopt = &(IPCB(skb)->opt); |
101 | 98 | ||
102 | if (sopt->optlen == 0) { | 99 | if (sopt->optlen == 0) { |
@@ -107,10 +104,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
107 | sptr = skb_network_header(skb); | 104 | sptr = skb_network_header(skb); |
108 | dptr = dopt->__data; | 105 | dptr = dopt->__data; |
109 | 106 | ||
110 | if (skb->dst) | 107 | daddr = skb->rtable->rt_spec_dst; |
111 | daddr = ((struct rtable*)skb->dst)->rt_spec_dst; | ||
112 | else | ||
113 | daddr = ip_hdr(skb)->daddr; | ||
114 | 108 | ||
115 | if (sopt->rr) { | 109 | if (sopt->rr) { |
116 | optlen = sptr[sopt->rr+1]; | 110 | optlen = sptr[sopt->rr+1]; |
@@ -151,7 +145,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) | |||
151 | __be32 addr; | 145 | __be32 addr; |
152 | 146 | ||
153 | memcpy(&addr, sptr+soffset-1, 4); | 147 | memcpy(&addr, sptr+soffset-1, 4); |
154 | if (inet_addr_type(&init_net, addr) != RTN_LOCAL) { | 148 | if (inet_addr_type(dev_net(skb->dst->dev), addr) != RTN_LOCAL) { |
155 | dopt->ts_needtime = 1; | 149 | dopt->ts_needtime = 1; |
156 | soffset += 8; | 150 | soffset += 8; |
157 | } | 151 | } |
@@ -254,26 +248,22 @@ void ip_options_fragment(struct sk_buff * skb) | |||
254 | * If opt == NULL, then skb->data should point to IP header. | 248 | * If opt == NULL, then skb->data should point to IP header. |
255 | */ | 249 | */ |
256 | 250 | ||
257 | int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) | 251 | int ip_options_compile(struct net *net, |
252 | struct ip_options * opt, struct sk_buff * skb) | ||
258 | { | 253 | { |
259 | int l; | 254 | int l; |
260 | unsigned char * iph; | 255 | unsigned char * iph; |
261 | unsigned char * optptr; | 256 | unsigned char * optptr; |
262 | int optlen; | 257 | int optlen; |
263 | unsigned char * pp_ptr = NULL; | 258 | unsigned char * pp_ptr = NULL; |
264 | struct rtable *rt = skb ? (struct rtable*)skb->dst : NULL; | 259 | struct rtable *rt = NULL; |
265 | 260 | ||
266 | if (!opt) { | 261 | if (skb != NULL) { |
267 | opt = &(IPCB(skb)->opt); | 262 | rt = skb->rtable; |
268 | iph = skb_network_header(skb); | 263 | optptr = (unsigned char *)&(ip_hdr(skb)[1]); |
269 | opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr); | 264 | } else |
270 | optptr = iph + sizeof(struct iphdr); | 265 | optptr = opt->__data; |
271 | opt->is_data = 0; | 266 | iph = optptr - sizeof(struct iphdr); |
272 | } else { | ||
273 | optptr = opt->is_data ? opt->__data : | ||
274 | (unsigned char *)&(ip_hdr(skb)[1]); | ||
275 | iph = optptr - sizeof(struct iphdr); | ||
276 | } | ||
277 | 267 | ||
278 | for (l = opt->optlen; l > 0; ) { | 268 | for (l = opt->optlen; l > 0; ) { |
279 | switch (*optptr) { | 269 | switch (*optptr) { |
@@ -400,7 +390,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) | |||
400 | { | 390 | { |
401 | __be32 addr; | 391 | __be32 addr; |
402 | memcpy(&addr, &optptr[optptr[2]-1], 4); | 392 | memcpy(&addr, &optptr[optptr[2]-1], 4); |
403 | if (inet_addr_type(&init_net, addr) == RTN_UNICAST) | 393 | if (inet_addr_type(net, addr) == RTN_UNICAST) |
404 | break; | 394 | break; |
405 | if (skb) | 395 | if (skb) |
406 | timeptr = (__be32*)&optptr[optptr[2]+3]; | 396 | timeptr = (__be32*)&optptr[optptr[2]+3]; |
@@ -517,14 +507,13 @@ static struct ip_options *ip_options_get_alloc(const int optlen) | |||
517 | GFP_KERNEL); | 507 | GFP_KERNEL); |
518 | } | 508 | } |
519 | 509 | ||
520 | static int ip_options_get_finish(struct ip_options **optp, | 510 | static int ip_options_get_finish(struct net *net, struct ip_options **optp, |
521 | struct ip_options *opt, int optlen) | 511 | struct ip_options *opt, int optlen) |
522 | { | 512 | { |
523 | while (optlen & 3) | 513 | while (optlen & 3) |
524 | opt->__data[optlen++] = IPOPT_END; | 514 | opt->__data[optlen++] = IPOPT_END; |
525 | opt->optlen = optlen; | 515 | opt->optlen = optlen; |
526 | opt->is_data = 1; | 516 | if (optlen && ip_options_compile(net, opt, NULL)) { |
527 | if (optlen && ip_options_compile(opt, NULL)) { | ||
528 | kfree(opt); | 517 | kfree(opt); |
529 | return -EINVAL; | 518 | return -EINVAL; |
530 | } | 519 | } |
@@ -533,7 +522,8 @@ static int ip_options_get_finish(struct ip_options **optp, | |||
533 | return 0; | 522 | return 0; |
534 | } | 523 | } |
535 | 524 | ||
536 | int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *data, int optlen) | 525 | int ip_options_get_from_user(struct net *net, struct ip_options **optp, |
526 | unsigned char __user *data, int optlen) | ||
537 | { | 527 | { |
538 | struct ip_options *opt = ip_options_get_alloc(optlen); | 528 | struct ip_options *opt = ip_options_get_alloc(optlen); |
539 | 529 | ||
@@ -543,10 +533,11 @@ int ip_options_get_from_user(struct ip_options **optp, unsigned char __user *dat | |||
543 | kfree(opt); | 533 | kfree(opt); |
544 | return -EFAULT; | 534 | return -EFAULT; |
545 | } | 535 | } |
546 | return ip_options_get_finish(optp, opt, optlen); | 536 | return ip_options_get_finish(net, optp, opt, optlen); |
547 | } | 537 | } |
548 | 538 | ||
549 | int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) | 539 | int ip_options_get(struct net *net, struct ip_options **optp, |
540 | unsigned char *data, int optlen) | ||
550 | { | 541 | { |
551 | struct ip_options *opt = ip_options_get_alloc(optlen); | 542 | struct ip_options *opt = ip_options_get_alloc(optlen); |
552 | 543 | ||
@@ -554,14 +545,14 @@ int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen) | |||
554 | return -ENOMEM; | 545 | return -ENOMEM; |
555 | if (optlen) | 546 | if (optlen) |
556 | memcpy(opt->__data, data, optlen); | 547 | memcpy(opt->__data, data, optlen); |
557 | return ip_options_get_finish(optp, opt, optlen); | 548 | return ip_options_get_finish(net, optp, opt, optlen); |
558 | } | 549 | } |
559 | 550 | ||
560 | void ip_forward_options(struct sk_buff *skb) | 551 | void ip_forward_options(struct sk_buff *skb) |
561 | { | 552 | { |
562 | struct ip_options * opt = &(IPCB(skb)->opt); | 553 | struct ip_options * opt = &(IPCB(skb)->opt); |
563 | unsigned char * optptr; | 554 | unsigned char * optptr; |
564 | struct rtable *rt = (struct rtable*)skb->dst; | 555 | struct rtable *rt = skb->rtable; |
565 | unsigned char *raw = skb_network_header(skb); | 556 | unsigned char *raw = skb_network_header(skb); |
566 | 557 | ||
567 | if (opt->rr_needaddr) { | 558 | if (opt->rr_needaddr) { |
@@ -609,7 +600,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) | |||
609 | __be32 nexthop; | 600 | __be32 nexthop; |
610 | struct iphdr *iph = ip_hdr(skb); | 601 | struct iphdr *iph = ip_hdr(skb); |
611 | unsigned char *optptr = skb_network_header(skb) + opt->srr; | 602 | unsigned char *optptr = skb_network_header(skb) + opt->srr; |
612 | struct rtable *rt = (struct rtable*)skb->dst; | 603 | struct rtable *rt = skb->rtable; |
613 | struct rtable *rt2; | 604 | struct rtable *rt2; |
614 | int err; | 605 | int err; |
615 | 606 | ||
@@ -634,13 +625,13 @@ int ip_options_rcv_srr(struct sk_buff *skb) | |||
634 | } | 625 | } |
635 | memcpy(&nexthop, &optptr[srrptr-1], 4); | 626 | memcpy(&nexthop, &optptr[srrptr-1], 4); |
636 | 627 | ||
637 | rt = (struct rtable*)skb->dst; | 628 | rt = skb->rtable; |
638 | skb->dst = NULL; | 629 | skb->rtable = NULL; |
639 | err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); | 630 | err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); |
640 | rt2 = (struct rtable*)skb->dst; | 631 | rt2 = skb->rtable; |
641 | if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { | 632 | if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { |
642 | ip_rt_put(rt2); | 633 | ip_rt_put(rt2); |
643 | skb->dst = &rt->u.dst; | 634 | skb->rtable = rt; |
644 | return -EINVAL; | 635 | return -EINVAL; |
645 | } | 636 | } |
646 | ip_rt_put(rt); | 637 | ip_rt_put(rt); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 341779e685d9..08349267ceb4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -142,7 +142,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
142 | __be32 saddr, __be32 daddr, struct ip_options *opt) | 142 | __be32 saddr, __be32 daddr, struct ip_options *opt) |
143 | { | 143 | { |
144 | struct inet_sock *inet = inet_sk(sk); | 144 | struct inet_sock *inet = inet_sk(sk); |
145 | struct rtable *rt = (struct rtable *)skb->dst; | 145 | struct rtable *rt = skb->rtable; |
146 | struct iphdr *iph; | 146 | struct iphdr *iph; |
147 | 147 | ||
148 | /* Build the IP header. */ | 148 | /* Build the IP header. */ |
@@ -240,7 +240,7 @@ static int ip_finish_output(struct sk_buff *skb) | |||
240 | int ip_mc_output(struct sk_buff *skb) | 240 | int ip_mc_output(struct sk_buff *skb) |
241 | { | 241 | { |
242 | struct sock *sk = skb->sk; | 242 | struct sock *sk = skb->sk; |
243 | struct rtable *rt = (struct rtable*)skb->dst; | 243 | struct rtable *rt = skb->rtable; |
244 | struct net_device *dev = rt->u.dst.dev; | 244 | struct net_device *dev = rt->u.dst.dev; |
245 | 245 | ||
246 | /* | 246 | /* |
@@ -321,7 +321,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) | |||
321 | /* Skip all of this if the packet is already routed, | 321 | /* Skip all of this if the packet is already routed, |
322 | * f.e. by something like SCTP. | 322 | * f.e. by something like SCTP. |
323 | */ | 323 | */ |
324 | rt = (struct rtable *) skb->dst; | 324 | rt = skb->rtable; |
325 | if (rt != NULL) | 325 | if (rt != NULL) |
326 | goto packet_routed; | 326 | goto packet_routed; |
327 | 327 | ||
@@ -351,7 +351,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) | |||
351 | * itself out. | 351 | * itself out. |
352 | */ | 352 | */ |
353 | security_sk_classify_flow(sk, &fl); | 353 | security_sk_classify_flow(sk, &fl); |
354 | if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0)) | 354 | if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) |
355 | goto no_route; | 355 | goto no_route; |
356 | } | 356 | } |
357 | sk_setup_caps(sk, &rt->u.dst); | 357 | sk_setup_caps(sk, &rt->u.dst); |
@@ -441,7 +441,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
441 | unsigned int mtu, hlen, left, len, ll_rs, pad; | 441 | unsigned int mtu, hlen, left, len, ll_rs, pad; |
442 | int offset; | 442 | int offset; |
443 | __be16 not_last_frag; | 443 | __be16 not_last_frag; |
444 | struct rtable *rt = (struct rtable*)skb->dst; | 444 | struct rtable *rt = skb->rtable; |
445 | int err = 0; | 445 | int err = 0; |
446 | 446 | ||
447 | dev = rt->u.dst.dev; | 447 | dev = rt->u.dst.dev; |
@@ -825,7 +825,7 @@ int ip_append_data(struct sock *sk, | |||
825 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? | 825 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? |
826 | rt->u.dst.dev->mtu : | 826 | rt->u.dst.dev->mtu : |
827 | dst_mtu(rt->u.dst.path); | 827 | dst_mtu(rt->u.dst.path); |
828 | inet->cork.rt = rt; | 828 | inet->cork.dst = &rt->u.dst; |
829 | inet->cork.length = 0; | 829 | inet->cork.length = 0; |
830 | sk->sk_sndmsg_page = NULL; | 830 | sk->sk_sndmsg_page = NULL; |
831 | sk->sk_sndmsg_off = 0; | 831 | sk->sk_sndmsg_off = 0; |
@@ -834,7 +834,7 @@ int ip_append_data(struct sock *sk, | |||
834 | transhdrlen += exthdrlen; | 834 | transhdrlen += exthdrlen; |
835 | } | 835 | } |
836 | } else { | 836 | } else { |
837 | rt = inet->cork.rt; | 837 | rt = (struct rtable *)inet->cork.dst; |
838 | if (inet->cork.flags & IPCORK_OPT) | 838 | if (inet->cork.flags & IPCORK_OPT) |
839 | opt = inet->cork.opt; | 839 | opt = inet->cork.opt; |
840 | 840 | ||
@@ -1083,7 +1083,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1083 | if (skb_queue_empty(&sk->sk_write_queue)) | 1083 | if (skb_queue_empty(&sk->sk_write_queue)) |
1084 | return -EINVAL; | 1084 | return -EINVAL; |
1085 | 1085 | ||
1086 | rt = inet->cork.rt; | 1086 | rt = (struct rtable *)inet->cork.dst; |
1087 | if (inet->cork.flags & IPCORK_OPT) | 1087 | if (inet->cork.flags & IPCORK_OPT) |
1088 | opt = inet->cork.opt; | 1088 | opt = inet->cork.opt; |
1089 | 1089 | ||
@@ -1208,10 +1208,8 @@ static void ip_cork_release(struct inet_sock *inet) | |||
1208 | inet->cork.flags &= ~IPCORK_OPT; | 1208 | inet->cork.flags &= ~IPCORK_OPT; |
1209 | kfree(inet->cork.opt); | 1209 | kfree(inet->cork.opt); |
1210 | inet->cork.opt = NULL; | 1210 | inet->cork.opt = NULL; |
1211 | if (inet->cork.rt) { | 1211 | dst_release(inet->cork.dst); |
1212 | ip_rt_put(inet->cork.rt); | 1212 | inet->cork.dst = NULL; |
1213 | inet->cork.rt = NULL; | ||
1214 | } | ||
1215 | } | 1213 | } |
1216 | 1214 | ||
1217 | /* | 1215 | /* |
@@ -1224,7 +1222,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
1224 | struct sk_buff **tail_skb; | 1222 | struct sk_buff **tail_skb; |
1225 | struct inet_sock *inet = inet_sk(sk); | 1223 | struct inet_sock *inet = inet_sk(sk); |
1226 | struct ip_options *opt = NULL; | 1224 | struct ip_options *opt = NULL; |
1227 | struct rtable *rt = inet->cork.rt; | 1225 | struct rtable *rt = (struct rtable *)inet->cork.dst; |
1228 | struct iphdr *iph; | 1226 | struct iphdr *iph; |
1229 | __be16 df = 0; | 1227 | __be16 df = 0; |
1230 | __u8 ttl; | 1228 | __u8 ttl; |
@@ -1357,7 +1355,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1357 | } replyopts; | 1355 | } replyopts; |
1358 | struct ipcm_cookie ipc; | 1356 | struct ipcm_cookie ipc; |
1359 | __be32 daddr; | 1357 | __be32 daddr; |
1360 | struct rtable *rt = (struct rtable*)skb->dst; | 1358 | struct rtable *rt = skb->rtable; |
1361 | 1359 | ||
1362 | if (ip_options_echo(&replyopts.opt, skb)) | 1360 | if (ip_options_echo(&replyopts.opt, skb)) |
1363 | return; | 1361 | return; |
@@ -1384,7 +1382,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1384 | .dport = tcp_hdr(skb)->source } }, | 1382 | .dport = tcp_hdr(skb)->source } }, |
1385 | .proto = sk->sk_protocol }; | 1383 | .proto = sk->sk_protocol }; |
1386 | security_skb_classify_flow(skb, &fl); | 1384 | security_skb_classify_flow(skb, &fl); |
1387 | if (ip_route_output_key(sk->sk_net, &rt, &fl)) | 1385 | if (ip_route_output_key(sock_net(sk), &rt, &fl)) |
1388 | return; | 1386 | return; |
1389 | } | 1387 | } |
1390 | 1388 | ||
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c2921d01e925..d8adfd4972e2 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -57,7 +57,7 @@ | |||
57 | static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) | 57 | static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) |
58 | { | 58 | { |
59 | struct in_pktinfo info; | 59 | struct in_pktinfo info; |
60 | struct rtable *rt = (struct rtable *)skb->dst; | 60 | struct rtable *rt = skb->rtable; |
61 | 61 | ||
62 | info.ipi_addr.s_addr = ip_hdr(skb)->daddr; | 62 | info.ipi_addr.s_addr = ip_hdr(skb)->daddr; |
63 | if (rt) { | 63 | if (rt) { |
@@ -163,7 +163,7 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) | |||
163 | ip_cmsg_recv_security(msg, skb); | 163 | ip_cmsg_recv_security(msg, skb); |
164 | } | 164 | } |
165 | 165 | ||
166 | int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc) | 166 | int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) |
167 | { | 167 | { |
168 | int err; | 168 | int err; |
169 | struct cmsghdr *cmsg; | 169 | struct cmsghdr *cmsg; |
@@ -176,7 +176,7 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc) | |||
176 | switch (cmsg->cmsg_type) { | 176 | switch (cmsg->cmsg_type) { |
177 | case IP_RETOPTS: | 177 | case IP_RETOPTS: |
178 | err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); | 178 | err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); |
179 | err = ip_options_get(&ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); | 179 | err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); |
180 | if (err) | 180 | if (err) |
181 | return err; | 181 | return err; |
182 | break; | 182 | break; |
@@ -449,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
449 | struct ip_options * opt = NULL; | 449 | struct ip_options * opt = NULL; |
450 | if (optlen > 40 || optlen < 0) | 450 | if (optlen > 40 || optlen < 0) |
451 | goto e_inval; | 451 | goto e_inval; |
452 | err = ip_options_get_from_user(&opt, optval, optlen); | 452 | err = ip_options_get_from_user(sock_net(sk), &opt, |
453 | optval, optlen); | ||
453 | if (err) | 454 | if (err) |
454 | break; | 455 | break; |
455 | if (inet->is_icsk) { | 456 | if (inet->is_icsk) { |
@@ -589,13 +590,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
589 | err = 0; | 590 | err = 0; |
590 | break; | 591 | break; |
591 | } | 592 | } |
592 | dev = ip_dev_find(&init_net, mreq.imr_address.s_addr); | 593 | dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); |
593 | if (dev) { | 594 | if (dev) { |
594 | mreq.imr_ifindex = dev->ifindex; | 595 | mreq.imr_ifindex = dev->ifindex; |
595 | dev_put(dev); | 596 | dev_put(dev); |
596 | } | 597 | } |
597 | } else | 598 | } else |
598 | dev = __dev_get_by_index(&init_net, mreq.imr_ifindex); | 599 | dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex); |
599 | 600 | ||
600 | 601 | ||
601 | err = -EADDRNOTAVAIL; | 602 | err = -EADDRNOTAVAIL; |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 58b60b2fb011..fb53ddfea5b5 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -179,7 +179,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) | |||
179 | spi, IPPROTO_COMP, AF_INET); | 179 | spi, IPPROTO_COMP, AF_INET); |
180 | if (!x) | 180 | if (!x) |
181 | return; | 181 | return; |
182 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n", | 182 | NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIPQUAD_FMT "\n", |
183 | spi, NIPQUAD(iph->daddr)); | 183 | spi, NIPQUAD(iph->daddr)); |
184 | xfrm_state_put(x); | 184 | xfrm_state_put(x); |
185 | } | 185 | } |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 4824fe8996bf..0f42d1c1f690 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -292,7 +292,7 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg) | |||
292 | 292 | ||
293 | mm_segment_t oldfs = get_fs(); | 293 | mm_segment_t oldfs = get_fs(); |
294 | set_fs(get_ds()); | 294 | set_fs(get_ds()); |
295 | res = devinet_ioctl(cmd, (struct ifreq __user *) arg); | 295 | res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg); |
296 | set_fs(oldfs); | 296 | set_fs(oldfs); |
297 | return res; | 297 | return res; |
298 | } | 298 | } |
@@ -376,7 +376,7 @@ static int __init ic_defaults(void) | |||
376 | */ | 376 | */ |
377 | 377 | ||
378 | if (!ic_host_name_set) | 378 | if (!ic_host_name_set) |
379 | sprintf(init_utsname()->nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr)); | 379 | sprintf(init_utsname()->nodename, NIPQUAD_FMT, NIPQUAD(ic_myaddr)); |
380 | 380 | ||
381 | if (root_server_addr == NONE) | 381 | if (root_server_addr == NONE) |
382 | root_server_addr = ic_servaddr; | 382 | root_server_addr = ic_servaddr; |
@@ -389,11 +389,11 @@ static int __init ic_defaults(void) | |||
389 | else if (IN_CLASSC(ntohl(ic_myaddr))) | 389 | else if (IN_CLASSC(ntohl(ic_myaddr))) |
390 | ic_netmask = htonl(IN_CLASSC_NET); | 390 | ic_netmask = htonl(IN_CLASSC_NET); |
391 | else { | 391 | else { |
392 | printk(KERN_ERR "IP-Config: Unable to guess netmask for address %u.%u.%u.%u\n", | 392 | printk(KERN_ERR "IP-Config: Unable to guess netmask for address " NIPQUAD_FMT "\n", |
393 | NIPQUAD(ic_myaddr)); | 393 | NIPQUAD(ic_myaddr)); |
394 | return -1; | 394 | return -1; |
395 | } | 395 | } |
396 | printk("IP-Config: Guessing netmask %u.%u.%u.%u\n", NIPQUAD(ic_netmask)); | 396 | printk("IP-Config: Guessing netmask " NIPQUAD_FMT "\n", NIPQUAD(ic_netmask)); |
397 | } | 397 | } |
398 | 398 | ||
399 | return 0; | 399 | return 0; |
@@ -434,7 +434,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt | |||
434 | unsigned char *sha, *tha; /* s for "source", t for "target" */ | 434 | unsigned char *sha, *tha; /* s for "source", t for "target" */ |
435 | struct ic_device *d; | 435 | struct ic_device *d; |
436 | 436 | ||
437 | if (dev->nd_net != &init_net) | 437 | if (dev_net(dev) != &init_net) |
438 | goto drop; | 438 | goto drop; |
439 | 439 | ||
440 | if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) | 440 | if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) |
@@ -460,10 +460,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt | |||
460 | if (rarp->ar_pro != htons(ETH_P_IP)) | 460 | if (rarp->ar_pro != htons(ETH_P_IP)) |
461 | goto drop; | 461 | goto drop; |
462 | 462 | ||
463 | if (!pskb_may_pull(skb, | 463 | if (!pskb_may_pull(skb, arp_hdr_len(dev))) |
464 | sizeof(struct arphdr) + | ||
465 | (2 * dev->addr_len) + | ||
466 | (2 * 4))) | ||
467 | goto drop; | 464 | goto drop; |
468 | 465 | ||
469 | /* OK, it is all there and looks valid, process... */ | 466 | /* OK, it is all there and looks valid, process... */ |
@@ -857,7 +854,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
857 | struct ic_device *d; | 854 | struct ic_device *d; |
858 | int len, ext_len; | 855 | int len, ext_len; |
859 | 856 | ||
860 | if (dev->nd_net != &init_net) | 857 | if (dev_net(dev) != &init_net) |
861 | goto drop; | 858 | goto drop; |
862 | 859 | ||
863 | /* Perform verifications before taking the lock. */ | 860 | /* Perform verifications before taking the lock. */ |
@@ -984,9 +981,9 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
984 | ic_myaddr = b->your_ip; | 981 | ic_myaddr = b->your_ip; |
985 | ic_servaddr = server_id; | 982 | ic_servaddr = server_id; |
986 | #ifdef IPCONFIG_DEBUG | 983 | #ifdef IPCONFIG_DEBUG |
987 | printk("DHCP: Offered address %u.%u.%u.%u", | 984 | printk("DHCP: Offered address " NIPQUAD_FMT, |
988 | NIPQUAD(ic_myaddr)); | 985 | NIPQUAD(ic_myaddr)); |
989 | printk(" by server %u.%u.%u.%u\n", | 986 | printk(" by server " NIPQUAD_FMT "\n", |
990 | NIPQUAD(ic_servaddr)); | 987 | NIPQUAD(ic_servaddr)); |
991 | #endif | 988 | #endif |
992 | /* The DHCP indicated server address takes | 989 | /* The DHCP indicated server address takes |
@@ -1182,11 +1179,11 @@ static int __init ic_dynamic(void) | |||
1182 | return -1; | 1179 | return -1; |
1183 | } | 1180 | } |
1184 | 1181 | ||
1185 | printk("IP-Config: Got %s answer from %u.%u.%u.%u, ", | 1182 | printk("IP-Config: Got %s answer from " NIPQUAD_FMT ", ", |
1186 | ((ic_got_reply & IC_RARP) ? "RARP" | 1183 | ((ic_got_reply & IC_RARP) ? "RARP" |
1187 | : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), | 1184 | : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), |
1188 | NIPQUAD(ic_servaddr)); | 1185 | NIPQUAD(ic_servaddr)); |
1189 | printk("my address is %u.%u.%u.%u\n", NIPQUAD(ic_myaddr)); | 1186 | printk("my address is " NIPQUAD_FMT "\n", NIPQUAD(ic_myaddr)); |
1190 | 1187 | ||
1191 | return 0; | 1188 | return 0; |
1192 | } | 1189 | } |
@@ -1212,12 +1209,12 @@ static int pnp_seq_show(struct seq_file *seq, void *v) | |||
1212 | for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { | 1209 | for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { |
1213 | if (ic_nameservers[i] != NONE) | 1210 | if (ic_nameservers[i] != NONE) |
1214 | seq_printf(seq, | 1211 | seq_printf(seq, |
1215 | "nameserver %u.%u.%u.%u\n", | 1212 | "nameserver " NIPQUAD_FMT "\n", |
1216 | NIPQUAD(ic_nameservers[i])); | 1213 | NIPQUAD(ic_nameservers[i])); |
1217 | } | 1214 | } |
1218 | if (ic_servaddr != NONE) | 1215 | if (ic_servaddr != NONE) |
1219 | seq_printf(seq, | 1216 | seq_printf(seq, |
1220 | "bootserver %u.%u.%u.%u\n", | 1217 | "bootserver " NIPQUAD_FMT "\n", |
1221 | NIPQUAD(ic_servaddr)); | 1218 | NIPQUAD(ic_servaddr)); |
1222 | return 0; | 1219 | return 0; |
1223 | } | 1220 | } |
@@ -1392,13 +1389,13 @@ static int __init ip_auto_config(void) | |||
1392 | */ | 1389 | */ |
1393 | printk("IP-Config: Complete:"); | 1390 | printk("IP-Config: Complete:"); |
1394 | printk("\n device=%s", ic_dev->name); | 1391 | printk("\n device=%s", ic_dev->name); |
1395 | printk(", addr=%u.%u.%u.%u", NIPQUAD(ic_myaddr)); | 1392 | printk(", addr=" NIPQUAD_FMT, NIPQUAD(ic_myaddr)); |
1396 | printk(", mask=%u.%u.%u.%u", NIPQUAD(ic_netmask)); | 1393 | printk(", mask=" NIPQUAD_FMT, NIPQUAD(ic_netmask)); |
1397 | printk(", gw=%u.%u.%u.%u", NIPQUAD(ic_gateway)); | 1394 | printk(", gw=" NIPQUAD_FMT, NIPQUAD(ic_gateway)); |
1398 | printk(",\n host=%s, domain=%s, nis-domain=%s", | 1395 | printk(",\n host=%s, domain=%s, nis-domain=%s", |
1399 | utsname()->nodename, ic_domain, utsname()->domainname); | 1396 | utsname()->nodename, ic_domain, utsname()->domainname); |
1400 | printk(",\n bootserver=%u.%u.%u.%u", NIPQUAD(ic_servaddr)); | 1397 | printk(",\n bootserver=" NIPQUAD_FMT, NIPQUAD(ic_servaddr)); |
1401 | printk(", rootserver=%u.%u.%u.%u", NIPQUAD(root_server_addr)); | 1398 | printk(", rootserver=" NIPQUAD_FMT, NIPQUAD(root_server_addr)); |
1402 | printk(", rootpath=%s", root_server_path); | 1399 | printk(", rootpath=%s", root_server_path); |
1403 | printk("\n"); | 1400 | printk("\n"); |
1404 | #endif /* !SILENT */ | 1401 | #endif /* !SILENT */ |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index dbaed69de06a..149111f08e8d 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -115,49 +115,57 @@ | |||
115 | #include <net/ipip.h> | 115 | #include <net/ipip.h> |
116 | #include <net/inet_ecn.h> | 116 | #include <net/inet_ecn.h> |
117 | #include <net/xfrm.h> | 117 | #include <net/xfrm.h> |
118 | #include <net/net_namespace.h> | ||
119 | #include <net/netns/generic.h> | ||
118 | 120 | ||
119 | #define HASH_SIZE 16 | 121 | #define HASH_SIZE 16 |
120 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) | 122 | #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) |
121 | 123 | ||
124 | static int ipip_net_id; | ||
125 | struct ipip_net { | ||
126 | struct ip_tunnel *tunnels_r_l[HASH_SIZE]; | ||
127 | struct ip_tunnel *tunnels_r[HASH_SIZE]; | ||
128 | struct ip_tunnel *tunnels_l[HASH_SIZE]; | ||
129 | struct ip_tunnel *tunnels_wc[1]; | ||
130 | struct ip_tunnel **tunnels[4]; | ||
131 | |||
132 | struct net_device *fb_tunnel_dev; | ||
133 | }; | ||
134 | |||
122 | static int ipip_fb_tunnel_init(struct net_device *dev); | 135 | static int ipip_fb_tunnel_init(struct net_device *dev); |
123 | static int ipip_tunnel_init(struct net_device *dev); | 136 | static int ipip_tunnel_init(struct net_device *dev); |
124 | static void ipip_tunnel_setup(struct net_device *dev); | 137 | static void ipip_tunnel_setup(struct net_device *dev); |
125 | 138 | ||
126 | static struct net_device *ipip_fb_tunnel_dev; | ||
127 | |||
128 | static struct ip_tunnel *tunnels_r_l[HASH_SIZE]; | ||
129 | static struct ip_tunnel *tunnels_r[HASH_SIZE]; | ||
130 | static struct ip_tunnel *tunnels_l[HASH_SIZE]; | ||
131 | static struct ip_tunnel *tunnels_wc[1]; | ||
132 | static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l }; | ||
133 | |||
134 | static DEFINE_RWLOCK(ipip_lock); | 139 | static DEFINE_RWLOCK(ipip_lock); |
135 | 140 | ||
136 | static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local) | 141 | static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, |
142 | __be32 remote, __be32 local) | ||
137 | { | 143 | { |
138 | unsigned h0 = HASH(remote); | 144 | unsigned h0 = HASH(remote); |
139 | unsigned h1 = HASH(local); | 145 | unsigned h1 = HASH(local); |
140 | struct ip_tunnel *t; | 146 | struct ip_tunnel *t; |
147 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
141 | 148 | ||
142 | for (t = tunnels_r_l[h0^h1]; t; t = t->next) { | 149 | for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) { |
143 | if (local == t->parms.iph.saddr && | 150 | if (local == t->parms.iph.saddr && |
144 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 151 | remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
145 | return t; | 152 | return t; |
146 | } | 153 | } |
147 | for (t = tunnels_r[h0]; t; t = t->next) { | 154 | for (t = ipn->tunnels_r[h0]; t; t = t->next) { |
148 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) | 155 | if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) |
149 | return t; | 156 | return t; |
150 | } | 157 | } |
151 | for (t = tunnels_l[h1]; t; t = t->next) { | 158 | for (t = ipn->tunnels_l[h1]; t; t = t->next) { |
152 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) | 159 | if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) |
153 | return t; | 160 | return t; |
154 | } | 161 | } |
155 | if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) | 162 | if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) |
156 | return t; | 163 | return t; |
157 | return NULL; | 164 | return NULL; |
158 | } | 165 | } |
159 | 166 | ||
160 | static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms) | 167 | static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn, |
168 | struct ip_tunnel_parm *parms) | ||
161 | { | 169 | { |
162 | __be32 remote = parms->iph.daddr; | 170 | __be32 remote = parms->iph.daddr; |
163 | __be32 local = parms->iph.saddr; | 171 | __be32 local = parms->iph.saddr; |
@@ -172,19 +180,20 @@ static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms) | |||
172 | prio |= 1; | 180 | prio |= 1; |
173 | h ^= HASH(local); | 181 | h ^= HASH(local); |
174 | } | 182 | } |
175 | return &tunnels[prio][h]; | 183 | return &ipn->tunnels[prio][h]; |
176 | } | 184 | } |
177 | 185 | ||
178 | static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t) | 186 | static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn, |
187 | struct ip_tunnel *t) | ||
179 | { | 188 | { |
180 | return __ipip_bucket(&t->parms); | 189 | return __ipip_bucket(ipn, &t->parms); |
181 | } | 190 | } |
182 | 191 | ||
183 | static void ipip_tunnel_unlink(struct ip_tunnel *t) | 192 | static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) |
184 | { | 193 | { |
185 | struct ip_tunnel **tp; | 194 | struct ip_tunnel **tp; |
186 | 195 | ||
187 | for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) { | 196 | for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { |
188 | if (t == *tp) { | 197 | if (t == *tp) { |
189 | write_lock_bh(&ipip_lock); | 198 | write_lock_bh(&ipip_lock); |
190 | *tp = t->next; | 199 | *tp = t->next; |
@@ -194,9 +203,9 @@ static void ipip_tunnel_unlink(struct ip_tunnel *t) | |||
194 | } | 203 | } |
195 | } | 204 | } |
196 | 205 | ||
197 | static void ipip_tunnel_link(struct ip_tunnel *t) | 206 | static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) |
198 | { | 207 | { |
199 | struct ip_tunnel **tp = ipip_bucket(t); | 208 | struct ip_tunnel **tp = ipip_bucket(ipn, t); |
200 | 209 | ||
201 | t->next = *tp; | 210 | t->next = *tp; |
202 | write_lock_bh(&ipip_lock); | 211 | write_lock_bh(&ipip_lock); |
@@ -204,15 +213,17 @@ static void ipip_tunnel_link(struct ip_tunnel *t) | |||
204 | write_unlock_bh(&ipip_lock); | 213 | write_unlock_bh(&ipip_lock); |
205 | } | 214 | } |
206 | 215 | ||
207 | static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create) | 216 | static struct ip_tunnel * ipip_tunnel_locate(struct net *net, |
217 | struct ip_tunnel_parm *parms, int create) | ||
208 | { | 218 | { |
209 | __be32 remote = parms->iph.daddr; | 219 | __be32 remote = parms->iph.daddr; |
210 | __be32 local = parms->iph.saddr; | 220 | __be32 local = parms->iph.saddr; |
211 | struct ip_tunnel *t, **tp, *nt; | 221 | struct ip_tunnel *t, **tp, *nt; |
212 | struct net_device *dev; | 222 | struct net_device *dev; |
213 | char name[IFNAMSIZ]; | 223 | char name[IFNAMSIZ]; |
224 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
214 | 225 | ||
215 | for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) { | 226 | for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) { |
216 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) | 227 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) |
217 | return t; | 228 | return t; |
218 | } | 229 | } |
@@ -228,6 +239,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c | |||
228 | if (dev == NULL) | 239 | if (dev == NULL) |
229 | return NULL; | 240 | return NULL; |
230 | 241 | ||
242 | dev_net_set(dev, net); | ||
243 | |||
231 | if (strchr(name, '%')) { | 244 | if (strchr(name, '%')) { |
232 | if (dev_alloc_name(dev, name) < 0) | 245 | if (dev_alloc_name(dev, name) < 0) |
233 | goto failed_free; | 246 | goto failed_free; |
@@ -241,7 +254,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c | |||
241 | goto failed_free; | 254 | goto failed_free; |
242 | 255 | ||
243 | dev_hold(dev); | 256 | dev_hold(dev); |
244 | ipip_tunnel_link(nt); | 257 | ipip_tunnel_link(ipn, nt); |
245 | return nt; | 258 | return nt; |
246 | 259 | ||
247 | failed_free: | 260 | failed_free: |
@@ -251,12 +264,15 @@ failed_free: | |||
251 | 264 | ||
252 | static void ipip_tunnel_uninit(struct net_device *dev) | 265 | static void ipip_tunnel_uninit(struct net_device *dev) |
253 | { | 266 | { |
254 | if (dev == ipip_fb_tunnel_dev) { | 267 | struct net *net = dev_net(dev); |
268 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
269 | |||
270 | if (dev == ipn->fb_tunnel_dev) { | ||
255 | write_lock_bh(&ipip_lock); | 271 | write_lock_bh(&ipip_lock); |
256 | tunnels_wc[0] = NULL; | 272 | ipn->tunnels_wc[0] = NULL; |
257 | write_unlock_bh(&ipip_lock); | 273 | write_unlock_bh(&ipip_lock); |
258 | } else | 274 | } else |
259 | ipip_tunnel_unlink(netdev_priv(dev)); | 275 | ipip_tunnel_unlink(ipn, netdev_priv(dev)); |
260 | dev_put(dev); | 276 | dev_put(dev); |
261 | } | 277 | } |
262 | 278 | ||
@@ -305,7 +321,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
305 | err = -ENOENT; | 321 | err = -ENOENT; |
306 | 322 | ||
307 | read_lock(&ipip_lock); | 323 | read_lock(&ipip_lock); |
308 | t = ipip_tunnel_lookup(iph->daddr, iph->saddr); | 324 | t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); |
309 | if (t == NULL || t->parms.iph.daddr == 0) | 325 | if (t == NULL || t->parms.iph.daddr == 0) |
310 | goto out; | 326 | goto out; |
311 | 327 | ||
@@ -401,7 +417,7 @@ out: | |||
401 | fl.fl4_daddr = eiph->saddr; | 417 | fl.fl4_daddr = eiph->saddr; |
402 | fl.fl4_tos = RT_TOS(eiph->tos); | 418 | fl.fl4_tos = RT_TOS(eiph->tos); |
403 | fl.proto = IPPROTO_IPIP; | 419 | fl.proto = IPPROTO_IPIP; |
404 | if (ip_route_output_key(&init_net, &rt, &key)) { | 420 | if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) { |
405 | kfree_skb(skb2); | 421 | kfree_skb(skb2); |
406 | return 0; | 422 | return 0; |
407 | } | 423 | } |
@@ -414,7 +430,7 @@ out: | |||
414 | fl.fl4_daddr = eiph->daddr; | 430 | fl.fl4_daddr = eiph->daddr; |
415 | fl.fl4_src = eiph->saddr; | 431 | fl.fl4_src = eiph->saddr; |
416 | fl.fl4_tos = eiph->tos; | 432 | fl.fl4_tos = eiph->tos; |
417 | if (ip_route_output_key(&init_net, &rt, &fl) || | 433 | if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || |
418 | rt->u.dst.dev->type != ARPHRD_TUNNEL) { | 434 | rt->u.dst.dev->type != ARPHRD_TUNNEL) { |
419 | ip_rt_put(rt); | 435 | ip_rt_put(rt); |
420 | kfree_skb(skb2); | 436 | kfree_skb(skb2); |
@@ -465,7 +481,8 @@ static int ipip_rcv(struct sk_buff *skb) | |||
465 | const struct iphdr *iph = ip_hdr(skb); | 481 | const struct iphdr *iph = ip_hdr(skb); |
466 | 482 | ||
467 | read_lock(&ipip_lock); | 483 | read_lock(&ipip_lock); |
468 | if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) { | 484 | if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), |
485 | iph->saddr, iph->daddr)) != NULL) { | ||
469 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 486 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
470 | read_unlock(&ipip_lock); | 487 | read_unlock(&ipip_lock); |
471 | kfree_skb(skb); | 488 | kfree_skb(skb); |
@@ -528,7 +545,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
528 | 545 | ||
529 | if (!dst) { | 546 | if (!dst) { |
530 | /* NBMA tunnel */ | 547 | /* NBMA tunnel */ |
531 | if ((rt = (struct rtable*)skb->dst) == NULL) { | 548 | if ((rt = skb->rtable) == NULL) { |
532 | tunnel->stat.tx_fifo_errors++; | 549 | tunnel->stat.tx_fifo_errors++; |
533 | goto tx_error; | 550 | goto tx_error; |
534 | } | 551 | } |
@@ -543,7 +560,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
543 | .saddr = tiph->saddr, | 560 | .saddr = tiph->saddr, |
544 | .tos = RT_TOS(tos) } }, | 561 | .tos = RT_TOS(tos) } }, |
545 | .proto = IPPROTO_IPIP }; | 562 | .proto = IPPROTO_IPIP }; |
546 | if (ip_route_output_key(&init_net, &rt, &fl)) { | 563 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) { |
547 | tunnel->stat.tx_carrier_errors++; | 564 | tunnel->stat.tx_carrier_errors++; |
548 | goto tx_error_icmp; | 565 | goto tx_error_icmp; |
549 | } | 566 | } |
@@ -664,7 +681,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) | |||
664 | .tos = RT_TOS(iph->tos) } }, | 681 | .tos = RT_TOS(iph->tos) } }, |
665 | .proto = IPPROTO_IPIP }; | 682 | .proto = IPPROTO_IPIP }; |
666 | struct rtable *rt; | 683 | struct rtable *rt; |
667 | if (!ip_route_output_key(&init_net, &rt, &fl)) { | 684 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { |
668 | tdev = rt->u.dst.dev; | 685 | tdev = rt->u.dst.dev; |
669 | ip_rt_put(rt); | 686 | ip_rt_put(rt); |
670 | } | 687 | } |
@@ -672,7 +689,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) | |||
672 | } | 689 | } |
673 | 690 | ||
674 | if (!tdev && tunnel->parms.link) | 691 | if (!tdev && tunnel->parms.link) |
675 | tdev = __dev_get_by_index(&init_net, tunnel->parms.link); | 692 | tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); |
676 | 693 | ||
677 | if (tdev) { | 694 | if (tdev) { |
678 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); | 695 | dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); |
@@ -687,16 +704,18 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
687 | int err = 0; | 704 | int err = 0; |
688 | struct ip_tunnel_parm p; | 705 | struct ip_tunnel_parm p; |
689 | struct ip_tunnel *t; | 706 | struct ip_tunnel *t; |
707 | struct net *net = dev_net(dev); | ||
708 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | ||
690 | 709 | ||
691 | switch (cmd) { | 710 | switch (cmd) { |
692 | case SIOCGETTUNNEL: | 711 | case SIOCGETTUNNEL: |
693 | t = NULL; | 712 | t = NULL; |
694 | if (dev == ipip_fb_tunnel_dev) { | 713 | if (dev == ipn->fb_tunnel_dev) { |
695 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { | 714 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { |
696 | err = -EFAULT; | 715 | err = -EFAULT; |
697 | break; | 716 | break; |
698 | } | 717 | } |
699 | t = ipip_tunnel_locate(&p, 0); | 718 | t = ipip_tunnel_locate(net, &p, 0); |
700 | } | 719 | } |
701 | if (t == NULL) | 720 | if (t == NULL) |
702 | t = netdev_priv(dev); | 721 | t = netdev_priv(dev); |
@@ -722,9 +741,9 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
722 | if (p.iph.ttl) | 741 | if (p.iph.ttl) |
723 | p.iph.frag_off |= htons(IP_DF); | 742 | p.iph.frag_off |= htons(IP_DF); |
724 | 743 | ||
725 | t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL); | 744 | t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); |
726 | 745 | ||
727 | if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { | 746 | if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { |
728 | if (t != NULL) { | 747 | if (t != NULL) { |
729 | if (t->dev != dev) { | 748 | if (t->dev != dev) { |
730 | err = -EEXIST; | 749 | err = -EEXIST; |
@@ -737,12 +756,12 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
737 | break; | 756 | break; |
738 | } | 757 | } |
739 | t = netdev_priv(dev); | 758 | t = netdev_priv(dev); |
740 | ipip_tunnel_unlink(t); | 759 | ipip_tunnel_unlink(ipn, t); |
741 | t->parms.iph.saddr = p.iph.saddr; | 760 | t->parms.iph.saddr = p.iph.saddr; |
742 | t->parms.iph.daddr = p.iph.daddr; | 761 | t->parms.iph.daddr = p.iph.daddr; |
743 | memcpy(dev->dev_addr, &p.iph.saddr, 4); | 762 | memcpy(dev->dev_addr, &p.iph.saddr, 4); |
744 | memcpy(dev->broadcast, &p.iph.daddr, 4); | 763 | memcpy(dev->broadcast, &p.iph.daddr, 4); |
745 | ipip_tunnel_link(t); | 764 | ipip_tunnel_link(ipn, t); |
746 | netdev_state_change(dev); | 765 | netdev_state_change(dev); |
747 | } | 766 | } |
748 | } | 767 | } |
@@ -770,15 +789,15 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) | |||
770 | if (!capable(CAP_NET_ADMIN)) | 789 | if (!capable(CAP_NET_ADMIN)) |
771 | goto done; | 790 | goto done; |
772 | 791 | ||
773 | if (dev == ipip_fb_tunnel_dev) { | 792 | if (dev == ipn->fb_tunnel_dev) { |
774 | err = -EFAULT; | 793 | err = -EFAULT; |
775 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) | 794 | if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
776 | goto done; | 795 | goto done; |
777 | err = -ENOENT; | 796 | err = -ENOENT; |
778 | if ((t = ipip_tunnel_locate(&p, 0)) == NULL) | 797 | if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) |
779 | goto done; | 798 | goto done; |
780 | err = -EPERM; | 799 | err = -EPERM; |
781 | if (t->dev == ipip_fb_tunnel_dev) | 800 | if (t->dev == ipn->fb_tunnel_dev) |
782 | goto done; | 801 | goto done; |
783 | dev = t->dev; | 802 | dev = t->dev; |
784 | } | 803 | } |
@@ -822,6 +841,7 @@ static void ipip_tunnel_setup(struct net_device *dev) | |||
822 | dev->flags = IFF_NOARP; | 841 | dev->flags = IFF_NOARP; |
823 | dev->iflink = 0; | 842 | dev->iflink = 0; |
824 | dev->addr_len = 4; | 843 | dev->addr_len = 4; |
844 | dev->features |= NETIF_F_NETNS_LOCAL; | ||
825 | } | 845 | } |
826 | 846 | ||
827 | static int ipip_tunnel_init(struct net_device *dev) | 847 | static int ipip_tunnel_init(struct net_device *dev) |
@@ -841,10 +861,11 @@ static int ipip_tunnel_init(struct net_device *dev) | |||
841 | return 0; | 861 | return 0; |
842 | } | 862 | } |
843 | 863 | ||
844 | static int __init ipip_fb_tunnel_init(struct net_device *dev) | 864 | static int ipip_fb_tunnel_init(struct net_device *dev) |
845 | { | 865 | { |
846 | struct ip_tunnel *tunnel = netdev_priv(dev); | 866 | struct ip_tunnel *tunnel = netdev_priv(dev); |
847 | struct iphdr *iph = &tunnel->parms.iph; | 867 | struct iphdr *iph = &tunnel->parms.iph; |
868 | struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); | ||
848 | 869 | ||
849 | tunnel->dev = dev; | 870 | tunnel->dev = dev; |
850 | strcpy(tunnel->parms.name, dev->name); | 871 | strcpy(tunnel->parms.name, dev->name); |
@@ -854,7 +875,7 @@ static int __init ipip_fb_tunnel_init(struct net_device *dev) | |||
854 | iph->ihl = 5; | 875 | iph->ihl = 5; |
855 | 876 | ||
856 | dev_hold(dev); | 877 | dev_hold(dev); |
857 | tunnels_wc[0] = tunnel; | 878 | ipn->tunnels_wc[0] = tunnel; |
858 | return 0; | 879 | return 0; |
859 | } | 880 | } |
860 | 881 | ||
@@ -867,50 +888,98 @@ static struct xfrm_tunnel ipip_handler = { | |||
867 | static char banner[] __initdata = | 888 | static char banner[] __initdata = |
868 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; | 889 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; |
869 | 890 | ||
870 | static int __init ipip_init(void) | 891 | static void ipip_destroy_tunnels(struct ipip_net *ipn) |
892 | { | ||
893 | int prio; | ||
894 | |||
895 | for (prio = 1; prio < 4; prio++) { | ||
896 | int h; | ||
897 | for (h = 0; h < HASH_SIZE; h++) { | ||
898 | struct ip_tunnel *t; | ||
899 | while ((t = ipn->tunnels[prio][h]) != NULL) | ||
900 | unregister_netdevice(t->dev); | ||
901 | } | ||
902 | } | ||
903 | } | ||
904 | |||
905 | static int ipip_init_net(struct net *net) | ||
871 | { | 906 | { |
872 | int err; | 907 | int err; |
908 | struct ipip_net *ipn; | ||
873 | 909 | ||
874 | printk(banner); | 910 | err = -ENOMEM; |
911 | ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL); | ||
912 | if (ipn == NULL) | ||
913 | goto err_alloc; | ||
875 | 914 | ||
876 | if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) { | 915 | err = net_assign_generic(net, ipip_net_id, ipn); |
877 | printk(KERN_INFO "ipip init: can't register tunnel\n"); | 916 | if (err < 0) |
878 | return -EAGAIN; | 917 | goto err_assign; |
879 | } | ||
880 | 918 | ||
881 | ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), | 919 | ipn->tunnels[0] = ipn->tunnels_wc; |
920 | ipn->tunnels[1] = ipn->tunnels_l; | ||
921 | ipn->tunnels[2] = ipn->tunnels_r; | ||
922 | ipn->tunnels[3] = ipn->tunnels_r_l; | ||
923 | |||
924 | ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), | ||
882 | "tunl0", | 925 | "tunl0", |
883 | ipip_tunnel_setup); | 926 | ipip_tunnel_setup); |
884 | if (!ipip_fb_tunnel_dev) { | 927 | if (!ipn->fb_tunnel_dev) { |
885 | err = -ENOMEM; | 928 | err = -ENOMEM; |
886 | goto err1; | 929 | goto err_alloc_dev; |
887 | } | 930 | } |
888 | 931 | ||
889 | ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init; | 932 | ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init; |
933 | dev_net_set(ipn->fb_tunnel_dev, net); | ||
934 | |||
935 | if ((err = register_netdev(ipn->fb_tunnel_dev))) | ||
936 | goto err_reg_dev; | ||
937 | |||
938 | return 0; | ||
890 | 939 | ||
891 | if ((err = register_netdev(ipip_fb_tunnel_dev))) | 940 | err_reg_dev: |
892 | goto err2; | 941 | free_netdev(ipn->fb_tunnel_dev); |
893 | out: | 942 | err_alloc_dev: |
943 | /* nothing */ | ||
944 | err_assign: | ||
945 | kfree(ipn); | ||
946 | err_alloc: | ||
894 | return err; | 947 | return err; |
895 | err2: | ||
896 | free_netdev(ipip_fb_tunnel_dev); | ||
897 | err1: | ||
898 | xfrm4_tunnel_deregister(&ipip_handler, AF_INET); | ||
899 | goto out; | ||
900 | } | 948 | } |
901 | 949 | ||
902 | static void __exit ipip_destroy_tunnels(void) | 950 | static void ipip_exit_net(struct net *net) |
903 | { | 951 | { |
904 | int prio; | 952 | struct ipip_net *ipn; |
905 | 953 | ||
906 | for (prio = 1; prio < 4; prio++) { | 954 | ipn = net_generic(net, ipip_net_id); |
907 | int h; | 955 | rtnl_lock(); |
908 | for (h = 0; h < HASH_SIZE; h++) { | 956 | ipip_destroy_tunnels(ipn); |
909 | struct ip_tunnel *t; | 957 | unregister_netdevice(ipn->fb_tunnel_dev); |
910 | while ((t = tunnels[prio][h]) != NULL) | 958 | rtnl_unlock(); |
911 | unregister_netdevice(t->dev); | 959 | kfree(ipn); |
912 | } | 960 | } |
961 | |||
962 | static struct pernet_operations ipip_net_ops = { | ||
963 | .init = ipip_init_net, | ||
964 | .exit = ipip_exit_net, | ||
965 | }; | ||
966 | |||
967 | static int __init ipip_init(void) | ||
968 | { | ||
969 | int err; | ||
970 | |||
971 | printk(banner); | ||
972 | |||
973 | if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) { | ||
974 | printk(KERN_INFO "ipip init: can't register tunnel\n"); | ||
975 | return -EAGAIN; | ||
913 | } | 976 | } |
977 | |||
978 | err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops); | ||
979 | if (err) | ||
980 | xfrm4_tunnel_deregister(&ipip_handler, AF_INET); | ||
981 | |||
982 | return err; | ||
914 | } | 983 | } |
915 | 984 | ||
916 | static void __exit ipip_fini(void) | 985 | static void __exit ipip_fini(void) |
@@ -918,10 +987,7 @@ static void __exit ipip_fini(void) | |||
918 | if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) | 987 | if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) |
919 | printk(KERN_INFO "ipip close: can't deregister tunnel\n"); | 988 | printk(KERN_INFO "ipip close: can't deregister tunnel\n"); |
920 | 989 | ||
921 | rtnl_lock(); | 990 | unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops); |
922 | ipip_destroy_tunnels(); | ||
923 | unregister_netdevice(ipip_fb_tunnel_dev); | ||
924 | rtnl_unlock(); | ||
925 | } | 991 | } |
926 | 992 | ||
927 | module_init(ipip_init); | 993 | module_init(ipip_init); |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a94f52c207a7..11700a4dcd95 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -849,7 +849,7 @@ static void mrtsock_destruct(struct sock *sk) | |||
849 | { | 849 | { |
850 | rtnl_lock(); | 850 | rtnl_lock(); |
851 | if (sk == mroute_socket) { | 851 | if (sk == mroute_socket) { |
852 | IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--; | 852 | IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; |
853 | 853 | ||
854 | write_lock_bh(&mrt_lock); | 854 | write_lock_bh(&mrt_lock); |
855 | mroute_socket=NULL; | 855 | mroute_socket=NULL; |
@@ -898,7 +898,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt | |||
898 | mroute_socket=sk; | 898 | mroute_socket=sk; |
899 | write_unlock_bh(&mrt_lock); | 899 | write_unlock_bh(&mrt_lock); |
900 | 900 | ||
901 | IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++; | 901 | IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; |
902 | } | 902 | } |
903 | rtnl_unlock(); | 903 | rtnl_unlock(); |
904 | return ret; | 904 | return ret; |
@@ -1089,7 +1089,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v | |||
1089 | struct vif_device *v; | 1089 | struct vif_device *v; |
1090 | int ct; | 1090 | int ct; |
1091 | 1091 | ||
1092 | if (dev->nd_net != &init_net) | 1092 | if (dev_net(dev) != &init_net) |
1093 | return NOTIFY_DONE; | 1093 | return NOTIFY_DONE; |
1094 | 1094 | ||
1095 | if (event != NETDEV_UNREGISTER) | 1095 | if (event != NETDEV_UNREGISTER) |
@@ -1283,7 +1283,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
1283 | if (vif_table[vif].dev != skb->dev) { | 1283 | if (vif_table[vif].dev != skb->dev) { |
1284 | int true_vifi; | 1284 | int true_vifi; |
1285 | 1285 | ||
1286 | if (((struct rtable*)skb->dst)->fl.iif == 0) { | 1286 | if (skb->rtable->fl.iif == 0) { |
1287 | /* It is our own packet, looped back. | 1287 | /* It is our own packet, looped back. |
1288 | Very complicated situation... | 1288 | Very complicated situation... |
1289 | 1289 | ||
@@ -1357,7 +1357,7 @@ dont_forward: | |||
1357 | int ip_mr_input(struct sk_buff *skb) | 1357 | int ip_mr_input(struct sk_buff *skb) |
1358 | { | 1358 | { |
1359 | struct mfc_cache *cache; | 1359 | struct mfc_cache *cache; |
1360 | int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL; | 1360 | int local = skb->rtable->rt_flags&RTCF_LOCAL; |
1361 | 1361 | ||
1362 | /* Packet is looped back after forward, it should not be | 1362 | /* Packet is looped back after forward, it should not be |
1363 | forwarded second time, but still can be delivered locally. | 1363 | forwarded second time, but still can be delivered locally. |
@@ -1594,7 +1594,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) | |||
1594 | { | 1594 | { |
1595 | int err; | 1595 | int err; |
1596 | struct mfc_cache *cache; | 1596 | struct mfc_cache *cache; |
1597 | struct rtable *rt = (struct rtable*)skb->dst; | 1597 | struct rtable *rt = skb->rtable; |
1598 | 1598 | ||
1599 | read_lock(&mrt_lock); | 1599 | read_lock(&mrt_lock); |
1600 | cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); | 1600 | cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); |
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index 12dc0d640b6d..620e40ff79a9 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c | |||
@@ -550,7 +550,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) | |||
550 | 550 | ||
551 | IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" | 551 | IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" |
552 | "%u.%u.%u.%u:%u to app %s on port %u\n", | 552 | "%u.%u.%u.%u:%u to app %s on port %u\n", |
553 | __FUNCTION__, | 553 | __func__, |
554 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 554 | NIPQUAD(cp->caddr), ntohs(cp->cport), |
555 | NIPQUAD(cp->vaddr), ntohs(cp->vport), | 555 | NIPQUAD(cp->vaddr), ntohs(cp->vport), |
556 | inc->name, ntohs(inc->port)); | 556 | inc->name, ntohs(inc->port)); |
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 1fa7b330b9ac..1caa2908373f 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c | |||
@@ -344,7 +344,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) | |||
344 | 344 | ||
345 | IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" | 345 | IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" |
346 | "%u.%u.%u.%u:%u to app %s on port %u\n", | 346 | "%u.%u.%u.%u:%u to app %s on port %u\n", |
347 | __FUNCTION__, | 347 | __func__, |
348 | NIPQUAD(cp->caddr), ntohs(cp->cport), | 348 | NIPQUAD(cp->caddr), ntohs(cp->cport), |
349 | NIPQUAD(cp->vaddr), ntohs(cp->vport), | 349 | NIPQUAD(cp->vaddr), ntohs(cp->vport), |
350 | inc->name, ntohs(inc->port)); | 350 | inc->name, ntohs(inc->port)); |
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 948378d0a755..69c56663cc9a 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c | |||
@@ -916,7 +916,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | |||
916 | if (!tinfo) | 916 | if (!tinfo) |
917 | return -ENOMEM; | 917 | return -ENOMEM; |
918 | 918 | ||
919 | IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current)); | 919 | IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); |
920 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", | 920 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", |
921 | sizeof(struct ip_vs_sync_conn)); | 921 | sizeof(struct ip_vs_sync_conn)); |
922 | 922 | ||
@@ -956,7 +956,7 @@ int stop_sync_thread(int state) | |||
956 | (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) | 956 | (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) |
957 | return -ESRCH; | 957 | return -ESRCH; |
958 | 958 | ||
959 | IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current)); | 959 | IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); |
960 | IP_VS_INFO("stopping sync thread %d ...\n", | 960 | IP_VS_INFO("stopping sync thread %d ...\n", |
961 | (state == IP_VS_STATE_MASTER) ? | 961 | (state == IP_VS_STATE_MASTER) ? |
962 | sync_master_pid : sync_backup_pid); | 962 | sync_master_pid : sync_backup_pid); |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 9a904c6c0dc8..f8edacdf991d 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -182,21 +182,44 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, | |||
182 | } | 182 | } |
183 | return csum; | 183 | return csum; |
184 | } | 184 | } |
185 | |||
186 | EXPORT_SYMBOL(nf_ip_checksum); | 185 | EXPORT_SYMBOL(nf_ip_checksum); |
187 | 186 | ||
187 | static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, | ||
188 | unsigned int dataoff, unsigned int len, | ||
189 | u_int8_t protocol) | ||
190 | { | ||
191 | const struct iphdr *iph = ip_hdr(skb); | ||
192 | __sum16 csum = 0; | ||
193 | |||
194 | switch (skb->ip_summed) { | ||
195 | case CHECKSUM_COMPLETE: | ||
196 | if (len == skb->len - dataoff) | ||
197 | return nf_ip_checksum(skb, hook, dataoff, protocol); | ||
198 | /* fall through */ | ||
199 | case CHECKSUM_NONE: | ||
200 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, | ||
201 | skb->len - dataoff, 0); | ||
202 | skb->ip_summed = CHECKSUM_NONE; | ||
203 | csum = __skb_checksum_complete_head(skb, dataoff + len); | ||
204 | if (!csum) | ||
205 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
206 | } | ||
207 | return csum; | ||
208 | } | ||
209 | |||
188 | static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) | 210 | static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) |
189 | { | 211 | { |
190 | return ip_route_output_key(&init_net, (struct rtable **)dst, fl); | 212 | return ip_route_output_key(&init_net, (struct rtable **)dst, fl); |
191 | } | 213 | } |
192 | 214 | ||
193 | static const struct nf_afinfo nf_ip_afinfo = { | 215 | static const struct nf_afinfo nf_ip_afinfo = { |
194 | .family = AF_INET, | 216 | .family = AF_INET, |
195 | .checksum = nf_ip_checksum, | 217 | .checksum = nf_ip_checksum, |
196 | .route = nf_ip_route, | 218 | .checksum_partial = nf_ip_checksum_partial, |
197 | .saveroute = nf_ip_saveroute, | 219 | .route = nf_ip_route, |
198 | .reroute = nf_ip_reroute, | 220 | .saveroute = nf_ip_saveroute, |
199 | .route_key_size = sizeof(struct ip_rt_info), | 221 | .reroute = nf_ip_reroute, |
222 | .route_key_size = sizeof(struct ip_rt_info), | ||
200 | }; | 223 | }; |
201 | 224 | ||
202 | static int ipv4_netfilter_init(void) | 225 | static int ipv4_netfilter_init(void) |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 9a077cb24798..0c95cd5872f3 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -241,10 +241,25 @@ config NF_NAT_SNMP_BASIC | |||
241 | # <expr> '&&' <expr> (6) | 241 | # <expr> '&&' <expr> (6) |
242 | # | 242 | # |
243 | # (6) Returns the result of min(/expr/, /expr/). | 243 | # (6) Returns the result of min(/expr/, /expr/). |
244 | config NF_NAT_PROTO_DCCP | ||
245 | tristate | ||
246 | depends on NF_NAT && NF_CT_PROTO_DCCP | ||
247 | default NF_NAT && NF_CT_PROTO_DCCP | ||
248 | |||
244 | config NF_NAT_PROTO_GRE | 249 | config NF_NAT_PROTO_GRE |
245 | tristate | 250 | tristate |
246 | depends on NF_NAT && NF_CT_PROTO_GRE | 251 | depends on NF_NAT && NF_CT_PROTO_GRE |
247 | 252 | ||
253 | config NF_NAT_PROTO_UDPLITE | ||
254 | tristate | ||
255 | depends on NF_NAT && NF_CT_PROTO_UDPLITE | ||
256 | default NF_NAT && NF_CT_PROTO_UDPLITE | ||
257 | |||
258 | config NF_NAT_PROTO_SCTP | ||
259 | tristate | ||
260 | default NF_NAT && NF_CT_PROTO_SCTP | ||
261 | depends on NF_NAT && NF_CT_PROTO_SCTP | ||
262 | |||
248 | config NF_NAT_FTP | 263 | config NF_NAT_FTP |
249 | tristate | 264 | tristate |
250 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT | 265 | depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 0c7dc78a62e9..d9b92fbf5579 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -10,7 +10,7 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o | |||
10 | endif | 10 | endif |
11 | endif | 11 | endif |
12 | 12 | ||
13 | nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o | 13 | nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o |
14 | iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o | 14 | iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o |
15 | 15 | ||
16 | # connection tracking | 16 | # connection tracking |
@@ -29,7 +29,10 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o | |||
29 | obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o | 29 | obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o |
30 | 30 | ||
31 | # NAT protocols (nf_nat) | 31 | # NAT protocols (nf_nat) |
32 | obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o | ||
32 | obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o | 33 | obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o |
34 | obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o | ||
35 | obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o | ||
33 | 36 | ||
34 | # generic IP tables | 37 | # generic IP tables |
35 | obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o | 38 | obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index a7591ce344d2..03e83a65aec5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -52,14 +52,14 @@ MODULE_DESCRIPTION("arptables core"); | |||
52 | do { \ | 52 | do { \ |
53 | if (!(x)) \ | 53 | if (!(x)) \ |
54 | printk("ARP_NF_ASSERT: %s:%s:%u\n", \ | 54 | printk("ARP_NF_ASSERT: %s:%s:%u\n", \ |
55 | __FUNCTION__, __FILE__, __LINE__); \ | 55 | __func__, __FILE__, __LINE__); \ |
56 | } while(0) | 56 | } while(0) |
57 | #else | 57 | #else |
58 | #define ARP_NF_ASSERT(x) | 58 | #define ARP_NF_ASSERT(x) |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, | 61 | static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, |
62 | char *hdr_addr, int len) | 62 | const char *hdr_addr, int len) |
63 | { | 63 | { |
64 | int i, ret; | 64 | int i, ret; |
65 | 65 | ||
@@ -80,8 +80,8 @@ static inline int arp_packet_match(const struct arphdr *arphdr, | |||
80 | const char *outdev, | 80 | const char *outdev, |
81 | const struct arpt_arp *arpinfo) | 81 | const struct arpt_arp *arpinfo) |
82 | { | 82 | { |
83 | char *arpptr = (char *)(arphdr + 1); | 83 | const char *arpptr = (char *)(arphdr + 1); |
84 | char *src_devaddr, *tgt_devaddr; | 84 | const char *src_devaddr, *tgt_devaddr; |
85 | __be32 src_ipaddr, tgt_ipaddr; | 85 | __be32 src_ipaddr, tgt_ipaddr; |
86 | int i, ret; | 86 | int i, ret; |
87 | 87 | ||
@@ -222,21 +222,18 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
222 | unsigned int hook, | 222 | unsigned int hook, |
223 | const struct net_device *in, | 223 | const struct net_device *in, |
224 | const struct net_device *out, | 224 | const struct net_device *out, |
225 | struct arpt_table *table) | 225 | struct xt_table *table) |
226 | { | 226 | { |
227 | static const char nulldevname[IFNAMSIZ]; | 227 | static const char nulldevname[IFNAMSIZ]; |
228 | unsigned int verdict = NF_DROP; | 228 | unsigned int verdict = NF_DROP; |
229 | struct arphdr *arp; | 229 | const struct arphdr *arp; |
230 | bool hotdrop = false; | 230 | bool hotdrop = false; |
231 | struct arpt_entry *e, *back; | 231 | struct arpt_entry *e, *back; |
232 | const char *indev, *outdev; | 232 | const char *indev, *outdev; |
233 | void *table_base; | 233 | void *table_base; |
234 | struct xt_table_info *private; | 234 | const struct xt_table_info *private; |
235 | 235 | ||
236 | /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ | 236 | if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) |
237 | if (!pskb_may_pull(skb, (sizeof(struct arphdr) + | ||
238 | (2 * skb->dev->addr_len) + | ||
239 | (2 * sizeof(u32))))) | ||
240 | return NF_DROP; | 237 | return NF_DROP; |
241 | 238 | ||
242 | indev = in ? in->name : nulldevname; | 239 | indev = in ? in->name : nulldevname; |
@@ -355,7 +352,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, | |||
355 | e->counters.pcnt = pos; | 352 | e->counters.pcnt = pos; |
356 | 353 | ||
357 | for (;;) { | 354 | for (;;) { |
358 | struct arpt_standard_target *t | 355 | const struct arpt_standard_target *t |
359 | = (void *)arpt_get_target(e); | 356 | = (void *)arpt_get_target(e); |
360 | int visited = e->comefrom & (1 << hook); | 357 | int visited = e->comefrom & (1 << hook); |
361 | 358 | ||
@@ -440,7 +437,7 @@ static int mark_source_chains(struct xt_table_info *newinfo, | |||
440 | 437 | ||
441 | static inline int check_entry(struct arpt_entry *e, const char *name) | 438 | static inline int check_entry(struct arpt_entry *e, const char *name) |
442 | { | 439 | { |
443 | struct arpt_entry_target *t; | 440 | const struct arpt_entry_target *t; |
444 | 441 | ||
445 | if (!arp_checkentry(&e->arp)) { | 442 | if (!arp_checkentry(&e->arp)) { |
446 | duprintf("arp_tables: arp check failed %p %s.\n", e, name); | 443 | duprintf("arp_tables: arp check failed %p %s.\n", e, name); |
@@ -460,7 +457,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name) | |||
460 | static inline int check_target(struct arpt_entry *e, const char *name) | 457 | static inline int check_target(struct arpt_entry *e, const char *name) |
461 | { | 458 | { |
462 | struct arpt_entry_target *t; | 459 | struct arpt_entry_target *t; |
463 | struct arpt_target *target; | 460 | struct xt_target *target; |
464 | int ret; | 461 | int ret; |
465 | 462 | ||
466 | t = arpt_get_target(e); | 463 | t = arpt_get_target(e); |
@@ -483,7 +480,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, | |||
483 | unsigned int *i) | 480 | unsigned int *i) |
484 | { | 481 | { |
485 | struct arpt_entry_target *t; | 482 | struct arpt_entry_target *t; |
486 | struct arpt_target *target; | 483 | struct xt_target *target; |
487 | int ret; | 484 | int ret; |
488 | 485 | ||
489 | ret = check_entry(e, name); | 486 | ret = check_entry(e, name); |
@@ -709,11 +706,11 @@ static void get_counters(const struct xt_table_info *t, | |||
709 | } | 706 | } |
710 | } | 707 | } |
711 | 708 | ||
712 | static inline struct xt_counters *alloc_counters(struct arpt_table *table) | 709 | static inline struct xt_counters *alloc_counters(struct xt_table *table) |
713 | { | 710 | { |
714 | unsigned int countersize; | 711 | unsigned int countersize; |
715 | struct xt_counters *counters; | 712 | struct xt_counters *counters; |
716 | struct xt_table_info *private = table->private; | 713 | const struct xt_table_info *private = table->private; |
717 | 714 | ||
718 | /* We need atomic snapshot of counters: rest doesn't change | 715 | /* We need atomic snapshot of counters: rest doesn't change |
719 | * (other than comefrom, which userspace doesn't care | 716 | * (other than comefrom, which userspace doesn't care |
@@ -734,7 +731,7 @@ static inline struct xt_counters *alloc_counters(struct arpt_table *table) | |||
734 | } | 731 | } |
735 | 732 | ||
736 | static int copy_entries_to_user(unsigned int total_size, | 733 | static int copy_entries_to_user(unsigned int total_size, |
737 | struct arpt_table *table, | 734 | struct xt_table *table, |
738 | void __user *userptr) | 735 | void __user *userptr) |
739 | { | 736 | { |
740 | unsigned int off, num; | 737 | unsigned int off, num; |
@@ -854,7 +851,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
854 | static int get_info(struct net *net, void __user *user, int *len, int compat) | 851 | static int get_info(struct net *net, void __user *user, int *len, int compat) |
855 | { | 852 | { |
856 | char name[ARPT_TABLE_MAXNAMELEN]; | 853 | char name[ARPT_TABLE_MAXNAMELEN]; |
857 | struct arpt_table *t; | 854 | struct xt_table *t; |
858 | int ret; | 855 | int ret; |
859 | 856 | ||
860 | if (*len != sizeof(struct arpt_getinfo)) { | 857 | if (*len != sizeof(struct arpt_getinfo)) { |
@@ -875,7 +872,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) | |||
875 | "arptable_%s", name); | 872 | "arptable_%s", name); |
876 | if (t && !IS_ERR(t)) { | 873 | if (t && !IS_ERR(t)) { |
877 | struct arpt_getinfo info; | 874 | struct arpt_getinfo info; |
878 | struct xt_table_info *private = t->private; | 875 | const struct xt_table_info *private = t->private; |
879 | 876 | ||
880 | #ifdef CONFIG_COMPAT | 877 | #ifdef CONFIG_COMPAT |
881 | if (compat) { | 878 | if (compat) { |
@@ -914,7 +911,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, | |||
914 | { | 911 | { |
915 | int ret; | 912 | int ret; |
916 | struct arpt_get_entries get; | 913 | struct arpt_get_entries get; |
917 | struct arpt_table *t; | 914 | struct xt_table *t; |
918 | 915 | ||
919 | if (*len < sizeof(get)) { | 916 | if (*len < sizeof(get)) { |
920 | duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); | 917 | duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); |
@@ -930,7 +927,8 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, | |||
930 | 927 | ||
931 | t = xt_find_table_lock(net, NF_ARP, get.name); | 928 | t = xt_find_table_lock(net, NF_ARP, get.name); |
932 | if (t && !IS_ERR(t)) { | 929 | if (t && !IS_ERR(t)) { |
933 | struct xt_table_info *private = t->private; | 930 | const struct xt_table_info *private = t->private; |
931 | |||
934 | duprintf("t->private->number = %u\n", | 932 | duprintf("t->private->number = %u\n", |
935 | private->number); | 933 | private->number); |
936 | if (get.size == private->size) | 934 | if (get.size == private->size) |
@@ -939,7 +937,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, | |||
939 | else { | 937 | else { |
940 | duprintf("get_entries: I've got %u not %u!\n", | 938 | duprintf("get_entries: I've got %u not %u!\n", |
941 | private->size, get.size); | 939 | private->size, get.size); |
942 | ret = -EINVAL; | 940 | ret = -EAGAIN; |
943 | } | 941 | } |
944 | module_put(t->me); | 942 | module_put(t->me); |
945 | xt_table_unlock(t); | 943 | xt_table_unlock(t); |
@@ -956,7 +954,7 @@ static int __do_replace(struct net *net, const char *name, | |||
956 | void __user *counters_ptr) | 954 | void __user *counters_ptr) |
957 | { | 955 | { |
958 | int ret; | 956 | int ret; |
959 | struct arpt_table *t; | 957 | struct xt_table *t; |
960 | struct xt_table_info *oldinfo; | 958 | struct xt_table_info *oldinfo; |
961 | struct xt_counters *counters; | 959 | struct xt_counters *counters; |
962 | void *loc_cpu_old_entry; | 960 | void *loc_cpu_old_entry; |
@@ -1090,11 +1088,11 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
1090 | struct xt_counters_info tmp; | 1088 | struct xt_counters_info tmp; |
1091 | struct xt_counters *paddc; | 1089 | struct xt_counters *paddc; |
1092 | unsigned int num_counters; | 1090 | unsigned int num_counters; |
1093 | char *name; | 1091 | const char *name; |
1094 | int size; | 1092 | int size; |
1095 | void *ptmp; | 1093 | void *ptmp; |
1096 | struct arpt_table *t; | 1094 | struct xt_table *t; |
1097 | struct xt_table_info *private; | 1095 | const struct xt_table_info *private; |
1098 | int ret = 0; | 1096 | int ret = 0; |
1099 | void *loc_cpu_entry; | 1097 | void *loc_cpu_entry; |
1100 | #ifdef CONFIG_COMPAT | 1098 | #ifdef CONFIG_COMPAT |
@@ -1499,11 +1497,11 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, | |||
1499 | 1497 | ||
1500 | switch (cmd) { | 1498 | switch (cmd) { |
1501 | case ARPT_SO_SET_REPLACE: | 1499 | case ARPT_SO_SET_REPLACE: |
1502 | ret = compat_do_replace(sk->sk_net, user, len); | 1500 | ret = compat_do_replace(sock_net(sk), user, len); |
1503 | break; | 1501 | break; |
1504 | 1502 | ||
1505 | case ARPT_SO_SET_ADD_COUNTERS: | 1503 | case ARPT_SO_SET_ADD_COUNTERS: |
1506 | ret = do_add_counters(sk->sk_net, user, len, 1); | 1504 | ret = do_add_counters(sock_net(sk), user, len, 1); |
1507 | break; | 1505 | break; |
1508 | 1506 | ||
1509 | default: | 1507 | default: |
@@ -1557,11 +1555,11 @@ out: | |||
1557 | } | 1555 | } |
1558 | 1556 | ||
1559 | static int compat_copy_entries_to_user(unsigned int total_size, | 1557 | static int compat_copy_entries_to_user(unsigned int total_size, |
1560 | struct arpt_table *table, | 1558 | struct xt_table *table, |
1561 | void __user *userptr) | 1559 | void __user *userptr) |
1562 | { | 1560 | { |
1563 | struct xt_counters *counters; | 1561 | struct xt_counters *counters; |
1564 | struct xt_table_info *private = table->private; | 1562 | const struct xt_table_info *private = table->private; |
1565 | void __user *pos; | 1563 | void __user *pos; |
1566 | unsigned int size; | 1564 | unsigned int size; |
1567 | int ret = 0; | 1565 | int ret = 0; |
@@ -1595,7 +1593,7 @@ static int compat_get_entries(struct net *net, | |||
1595 | { | 1593 | { |
1596 | int ret; | 1594 | int ret; |
1597 | struct compat_arpt_get_entries get; | 1595 | struct compat_arpt_get_entries get; |
1598 | struct arpt_table *t; | 1596 | struct xt_table *t; |
1599 | 1597 | ||
1600 | if (*len < sizeof(get)) { | 1598 | if (*len < sizeof(get)) { |
1601 | duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); | 1599 | duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); |
@@ -1612,7 +1610,7 @@ static int compat_get_entries(struct net *net, | |||
1612 | xt_compat_lock(NF_ARP); | 1610 | xt_compat_lock(NF_ARP); |
1613 | t = xt_find_table_lock(net, NF_ARP, get.name); | 1611 | t = xt_find_table_lock(net, NF_ARP, get.name); |
1614 | if (t && !IS_ERR(t)) { | 1612 | if (t && !IS_ERR(t)) { |
1615 | struct xt_table_info *private = t->private; | 1613 | const struct xt_table_info *private = t->private; |
1616 | struct xt_table_info info; | 1614 | struct xt_table_info info; |
1617 | 1615 | ||
1618 | duprintf("t->private->number = %u\n", private->number); | 1616 | duprintf("t->private->number = %u\n", private->number); |
@@ -1623,7 +1621,7 @@ static int compat_get_entries(struct net *net, | |||
1623 | } else if (!ret) { | 1621 | } else if (!ret) { |
1624 | duprintf("compat_get_entries: I've got %u not %u!\n", | 1622 | duprintf("compat_get_entries: I've got %u not %u!\n", |
1625 | private->size, get.size); | 1623 | private->size, get.size); |
1626 | ret = -EINVAL; | 1624 | ret = -EAGAIN; |
1627 | } | 1625 | } |
1628 | xt_compat_flush_offsets(NF_ARP); | 1626 | xt_compat_flush_offsets(NF_ARP); |
1629 | module_put(t->me); | 1627 | module_put(t->me); |
@@ -1647,10 +1645,10 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, | |||
1647 | 1645 | ||
1648 | switch (cmd) { | 1646 | switch (cmd) { |
1649 | case ARPT_SO_GET_INFO: | 1647 | case ARPT_SO_GET_INFO: |
1650 | ret = get_info(sk->sk_net, user, len, 1); | 1648 | ret = get_info(sock_net(sk), user, len, 1); |
1651 | break; | 1649 | break; |
1652 | case ARPT_SO_GET_ENTRIES: | 1650 | case ARPT_SO_GET_ENTRIES: |
1653 | ret = compat_get_entries(sk->sk_net, user, len); | 1651 | ret = compat_get_entries(sock_net(sk), user, len); |
1654 | break; | 1652 | break; |
1655 | default: | 1653 | default: |
1656 | ret = do_arpt_get_ctl(sk, cmd, user, len); | 1654 | ret = do_arpt_get_ctl(sk, cmd, user, len); |
@@ -1668,11 +1666,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned | |||
1668 | 1666 | ||
1669 | switch (cmd) { | 1667 | switch (cmd) { |
1670 | case ARPT_SO_SET_REPLACE: | 1668 | case ARPT_SO_SET_REPLACE: |
1671 | ret = do_replace(sk->sk_net, user, len); | 1669 | ret = do_replace(sock_net(sk), user, len); |
1672 | break; | 1670 | break; |
1673 | 1671 | ||
1674 | case ARPT_SO_SET_ADD_COUNTERS: | 1672 | case ARPT_SO_SET_ADD_COUNTERS: |
1675 | ret = do_add_counters(sk->sk_net, user, len, 0); | 1673 | ret = do_add_counters(sock_net(sk), user, len, 0); |
1676 | break; | 1674 | break; |
1677 | 1675 | ||
1678 | default: | 1676 | default: |
@@ -1692,11 +1690,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len | |||
1692 | 1690 | ||
1693 | switch (cmd) { | 1691 | switch (cmd) { |
1694 | case ARPT_SO_GET_INFO: | 1692 | case ARPT_SO_GET_INFO: |
1695 | ret = get_info(sk->sk_net, user, len, 0); | 1693 | ret = get_info(sock_net(sk), user, len, 0); |
1696 | break; | 1694 | break; |
1697 | 1695 | ||
1698 | case ARPT_SO_GET_ENTRIES: | 1696 | case ARPT_SO_GET_ENTRIES: |
1699 | ret = get_entries(sk->sk_net, user, len); | 1697 | ret = get_entries(sock_net(sk), user, len); |
1700 | break; | 1698 | break; |
1701 | 1699 | ||
1702 | case ARPT_SO_GET_REVISION_TARGET: { | 1700 | case ARPT_SO_GET_REVISION_TARGET: { |
@@ -1725,9 +1723,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len | |||
1725 | return ret; | 1723 | return ret; |
1726 | } | 1724 | } |
1727 | 1725 | ||
1728 | struct arpt_table *arpt_register_table(struct net *net, | 1726 | struct xt_table *arpt_register_table(struct net *net, struct xt_table *table, |
1729 | struct arpt_table *table, | 1727 | const struct arpt_replace *repl) |
1730 | const struct arpt_replace *repl) | ||
1731 | { | 1728 | { |
1732 | int ret; | 1729 | int ret; |
1733 | struct xt_table_info *newinfo; | 1730 | struct xt_table_info *newinfo; |
@@ -1769,7 +1766,7 @@ out: | |||
1769 | return ERR_PTR(ret); | 1766 | return ERR_PTR(ret); |
1770 | } | 1767 | } |
1771 | 1768 | ||
1772 | void arpt_unregister_table(struct arpt_table *table) | 1769 | void arpt_unregister_table(struct xt_table *table) |
1773 | { | 1770 | { |
1774 | struct xt_table_info *private; | 1771 | struct xt_table_info *private; |
1775 | void *loc_cpu_entry; | 1772 | void *loc_cpu_entry; |
@@ -1787,7 +1784,7 @@ void arpt_unregister_table(struct arpt_table *table) | |||
1787 | } | 1784 | } |
1788 | 1785 | ||
1789 | /* The built-in targets: standard (NULL) and error. */ | 1786 | /* The built-in targets: standard (NULL) and error. */ |
1790 | static struct arpt_target arpt_standard_target __read_mostly = { | 1787 | static struct xt_target arpt_standard_target __read_mostly = { |
1791 | .name = ARPT_STANDARD_TARGET, | 1788 | .name = ARPT_STANDARD_TARGET, |
1792 | .targetsize = sizeof(int), | 1789 | .targetsize = sizeof(int), |
1793 | .family = NF_ARP, | 1790 | .family = NF_ARP, |
@@ -1798,7 +1795,7 @@ static struct arpt_target arpt_standard_target __read_mostly = { | |||
1798 | #endif | 1795 | #endif |
1799 | }; | 1796 | }; |
1800 | 1797 | ||
1801 | static struct arpt_target arpt_error_target __read_mostly = { | 1798 | static struct xt_target arpt_error_target __read_mostly = { |
1802 | .name = ARPT_ERROR_TARGET, | 1799 | .name = ARPT_ERROR_TARGET, |
1803 | .target = arpt_error, | 1800 | .target = arpt_error, |
1804 | .targetsize = ARPT_FUNCTION_MAXNAMELEN, | 1801 | .targetsize = ARPT_FUNCTION_MAXNAMELEN, |
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index 3f4222b0a803..a385959d2655 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c | |||
@@ -15,7 +15,7 @@ target(struct sk_buff *skb, | |||
15 | const void *targinfo) | 15 | const void *targinfo) |
16 | { | 16 | { |
17 | const struct arpt_mangle *mangle = targinfo; | 17 | const struct arpt_mangle *mangle = targinfo; |
18 | struct arphdr *arp; | 18 | const struct arphdr *arp; |
19 | unsigned char *arpptr; | 19 | unsigned char *arpptr; |
20 | int pln, hln; | 20 | int pln, hln; |
21 | 21 | ||
@@ -73,8 +73,9 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target, | |||
73 | return true; | 73 | return true; |
74 | } | 74 | } |
75 | 75 | ||
76 | static struct arpt_target arpt_mangle_reg __read_mostly = { | 76 | static struct xt_target arpt_mangle_reg __read_mostly = { |
77 | .name = "mangle", | 77 | .name = "mangle", |
78 | .family = NF_ARP, | ||
78 | .target = target, | 79 | .target = target, |
79 | .targetsize = sizeof(struct arpt_mangle), | 80 | .targetsize = sizeof(struct arpt_mangle), |
80 | .checkentry = checkentry, | 81 | .checkentry = checkentry, |
@@ -83,15 +84,12 @@ static struct arpt_target arpt_mangle_reg __read_mostly = { | |||
83 | 84 | ||
84 | static int __init arpt_mangle_init(void) | 85 | static int __init arpt_mangle_init(void) |
85 | { | 86 | { |
86 | if (arpt_register_target(&arpt_mangle_reg)) | 87 | return xt_register_target(&arpt_mangle_reg); |
87 | return -EINVAL; | ||
88 | |||
89 | return 0; | ||
90 | } | 88 | } |
91 | 89 | ||
92 | static void __exit arpt_mangle_fini(void) | 90 | static void __exit arpt_mangle_fini(void) |
93 | { | 91 | { |
94 | arpt_unregister_target(&arpt_mangle_reg); | 92 | xt_unregister_target(&arpt_mangle_reg); |
95 | } | 93 | } |
96 | 94 | ||
97 | module_init(arpt_mangle_init); | 95 | module_init(arpt_mangle_init); |
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 4e9c496a30c2..3be4d07e7ed9 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c | |||
@@ -45,10 +45,10 @@ static struct | |||
45 | .term = ARPT_ERROR_INIT, | 45 | .term = ARPT_ERROR_INIT, |
46 | }; | 46 | }; |
47 | 47 | ||
48 | static struct arpt_table packet_filter = { | 48 | static struct xt_table packet_filter = { |
49 | .name = "filter", | 49 | .name = "filter", |
50 | .valid_hooks = FILTER_VALID_HOOKS, | 50 | .valid_hooks = FILTER_VALID_HOOKS, |
51 | .lock = RW_LOCK_UNLOCKED, | 51 | .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), |
52 | .private = NULL, | 52 | .private = NULL, |
53 | .me = THIS_MODULE, | 53 | .me = THIS_MODULE, |
54 | .af = NF_ARP, | 54 | .af = NF_ARP, |
@@ -70,18 +70,21 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { | |||
70 | .owner = THIS_MODULE, | 70 | .owner = THIS_MODULE, |
71 | .pf = NF_ARP, | 71 | .pf = NF_ARP, |
72 | .hooknum = NF_ARP_IN, | 72 | .hooknum = NF_ARP_IN, |
73 | .priority = NF_IP_PRI_FILTER, | ||
73 | }, | 74 | }, |
74 | { | 75 | { |
75 | .hook = arpt_hook, | 76 | .hook = arpt_hook, |
76 | .owner = THIS_MODULE, | 77 | .owner = THIS_MODULE, |
77 | .pf = NF_ARP, | 78 | .pf = NF_ARP, |
78 | .hooknum = NF_ARP_OUT, | 79 | .hooknum = NF_ARP_OUT, |
80 | .priority = NF_IP_PRI_FILTER, | ||
79 | }, | 81 | }, |
80 | { | 82 | { |
81 | .hook = arpt_hook, | 83 | .hook = arpt_hook, |
82 | .owner = THIS_MODULE, | 84 | .owner = THIS_MODULE, |
83 | .pf = NF_ARP, | 85 | .pf = NF_ARP, |
84 | .hooknum = NF_ARP_FORWARD, | 86 | .hooknum = NF_ARP_FORWARD, |
87 | .priority = NF_IP_PRI_FILTER, | ||
85 | }, | 88 | }, |
86 | }; | 89 | }; |
87 | 90 | ||
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 4dc162894cb2..719be29f7506 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
@@ -481,7 +481,7 @@ ipq_rcv_dev_event(struct notifier_block *this, | |||
481 | { | 481 | { |
482 | struct net_device *dev = ptr; | 482 | struct net_device *dev = ptr; |
483 | 483 | ||
484 | if (dev->nd_net != &init_net) | 484 | if (dev_net(dev) != &init_net) |
485 | return NOTIFY_DONE; | 485 | return NOTIFY_DONE; |
486 | 486 | ||
487 | /* Drop any packets associated with the downed device */ | 487 | /* Drop any packets associated with the downed device */ |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 600737f122d2..4e7c719445c2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -53,7 +53,7 @@ MODULE_DESCRIPTION("IPv4 packet filter"); | |||
53 | do { \ | 53 | do { \ |
54 | if (!(x)) \ | 54 | if (!(x)) \ |
55 | printk("IP_NF_ASSERT: %s:%s:%u\n", \ | 55 | printk("IP_NF_ASSERT: %s:%s:%u\n", \ |
56 | __FUNCTION__, __FILE__, __LINE__); \ | 56 | __func__, __FILE__, __LINE__); \ |
57 | } while(0) | 57 | } while(0) |
58 | #else | 58 | #else |
59 | #define IP_NF_ASSERT(x) | 59 | #define IP_NF_ASSERT(x) |
@@ -296,7 +296,7 @@ static void trace_packet(struct sk_buff *skb, | |||
296 | struct ipt_entry *e) | 296 | struct ipt_entry *e) |
297 | { | 297 | { |
298 | void *table_base; | 298 | void *table_base; |
299 | struct ipt_entry *root; | 299 | const struct ipt_entry *root; |
300 | char *hookname, *chainname, *comment; | 300 | char *hookname, *chainname, *comment; |
301 | unsigned int rulenum = 0; | 301 | unsigned int rulenum = 0; |
302 | 302 | ||
@@ -327,7 +327,7 @@ ipt_do_table(struct sk_buff *skb, | |||
327 | { | 327 | { |
328 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); | 328 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); |
329 | u_int16_t offset; | 329 | u_int16_t offset; |
330 | struct iphdr *ip; | 330 | const struct iphdr *ip; |
331 | u_int16_t datalen; | 331 | u_int16_t datalen; |
332 | bool hotdrop = false; | 332 | bool hotdrop = false; |
333 | /* Initializing verdict to NF_DROP keeps gcc happy. */ | 333 | /* Initializing verdict to NF_DROP keeps gcc happy. */ |
@@ -926,7 +926,7 @@ static struct xt_counters * alloc_counters(struct xt_table *table) | |||
926 | { | 926 | { |
927 | unsigned int countersize; | 927 | unsigned int countersize; |
928 | struct xt_counters *counters; | 928 | struct xt_counters *counters; |
929 | struct xt_table_info *private = table->private; | 929 | const struct xt_table_info *private = table->private; |
930 | 930 | ||
931 | /* We need atomic snapshot of counters: rest doesn't change | 931 | /* We need atomic snapshot of counters: rest doesn't change |
932 | (other than comefrom, which userspace doesn't care | 932 | (other than comefrom, which userspace doesn't care |
@@ -953,9 +953,9 @@ copy_entries_to_user(unsigned int total_size, | |||
953 | unsigned int off, num; | 953 | unsigned int off, num; |
954 | struct ipt_entry *e; | 954 | struct ipt_entry *e; |
955 | struct xt_counters *counters; | 955 | struct xt_counters *counters; |
956 | struct xt_table_info *private = table->private; | 956 | const struct xt_table_info *private = table->private; |
957 | int ret = 0; | 957 | int ret = 0; |
958 | void *loc_cpu_entry; | 958 | const void *loc_cpu_entry; |
959 | 959 | ||
960 | counters = alloc_counters(table); | 960 | counters = alloc_counters(table); |
961 | if (IS_ERR(counters)) | 961 | if (IS_ERR(counters)) |
@@ -975,8 +975,8 @@ copy_entries_to_user(unsigned int total_size, | |||
975 | /* ... then go back and fix counters and names */ | 975 | /* ... then go back and fix counters and names */ |
976 | for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ | 976 | for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ |
977 | unsigned int i; | 977 | unsigned int i; |
978 | struct ipt_entry_match *m; | 978 | const struct ipt_entry_match *m; |
979 | struct ipt_entry_target *t; | 979 | const struct ipt_entry_target *t; |
980 | 980 | ||
981 | e = (struct ipt_entry *)(loc_cpu_entry + off); | 981 | e = (struct ipt_entry *)(loc_cpu_entry + off); |
982 | if (copy_to_user(userptr + off | 982 | if (copy_to_user(userptr + off |
@@ -1116,7 +1116,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) | |||
1116 | "iptable_%s", name); | 1116 | "iptable_%s", name); |
1117 | if (t && !IS_ERR(t)) { | 1117 | if (t && !IS_ERR(t)) { |
1118 | struct ipt_getinfo info; | 1118 | struct ipt_getinfo info; |
1119 | struct xt_table_info *private = t->private; | 1119 | const struct xt_table_info *private = t->private; |
1120 | 1120 | ||
1121 | #ifdef CONFIG_COMPAT | 1121 | #ifdef CONFIG_COMPAT |
1122 | if (compat) { | 1122 | if (compat) { |
@@ -1172,7 +1172,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) | |||
1172 | 1172 | ||
1173 | t = xt_find_table_lock(net, AF_INET, get.name); | 1173 | t = xt_find_table_lock(net, AF_INET, get.name); |
1174 | if (t && !IS_ERR(t)) { | 1174 | if (t && !IS_ERR(t)) { |
1175 | struct xt_table_info *private = t->private; | 1175 | const struct xt_table_info *private = t->private; |
1176 | duprintf("t->private->number = %u\n", private->number); | 1176 | duprintf("t->private->number = %u\n", private->number); |
1177 | if (get.size == private->size) | 1177 | if (get.size == private->size) |
1178 | ret = copy_entries_to_user(private->size, | 1178 | ret = copy_entries_to_user(private->size, |
@@ -1180,7 +1180,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) | |||
1180 | else { | 1180 | else { |
1181 | duprintf("get_entries: I've got %u not %u!\n", | 1181 | duprintf("get_entries: I've got %u not %u!\n", |
1182 | private->size, get.size); | 1182 | private->size, get.size); |
1183 | ret = -EINVAL; | 1183 | ret = -EAGAIN; |
1184 | } | 1184 | } |
1185 | module_put(t->me); | 1185 | module_put(t->me); |
1186 | xt_table_unlock(t); | 1186 | xt_table_unlock(t); |
@@ -1337,11 +1337,11 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat | |||
1337 | struct xt_counters_info tmp; | 1337 | struct xt_counters_info tmp; |
1338 | struct xt_counters *paddc; | 1338 | struct xt_counters *paddc; |
1339 | unsigned int num_counters; | 1339 | unsigned int num_counters; |
1340 | char *name; | 1340 | const char *name; |
1341 | int size; | 1341 | int size; |
1342 | void *ptmp; | 1342 | void *ptmp; |
1343 | struct xt_table *t; | 1343 | struct xt_table *t; |
1344 | struct xt_table_info *private; | 1344 | const struct xt_table_info *private; |
1345 | int ret = 0; | 1345 | int ret = 0; |
1346 | void *loc_cpu_entry; | 1346 | void *loc_cpu_entry; |
1347 | #ifdef CONFIG_COMPAT | 1347 | #ifdef CONFIG_COMPAT |
@@ -1852,11 +1852,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, | |||
1852 | 1852 | ||
1853 | switch (cmd) { | 1853 | switch (cmd) { |
1854 | case IPT_SO_SET_REPLACE: | 1854 | case IPT_SO_SET_REPLACE: |
1855 | ret = compat_do_replace(sk->sk_net, user, len); | 1855 | ret = compat_do_replace(sock_net(sk), user, len); |
1856 | break; | 1856 | break; |
1857 | 1857 | ||
1858 | case IPT_SO_SET_ADD_COUNTERS: | 1858 | case IPT_SO_SET_ADD_COUNTERS: |
1859 | ret = do_add_counters(sk->sk_net, user, len, 1); | 1859 | ret = do_add_counters(sock_net(sk), user, len, 1); |
1860 | break; | 1860 | break; |
1861 | 1861 | ||
1862 | default: | 1862 | default: |
@@ -1878,11 +1878,11 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, | |||
1878 | void __user *userptr) | 1878 | void __user *userptr) |
1879 | { | 1879 | { |
1880 | struct xt_counters *counters; | 1880 | struct xt_counters *counters; |
1881 | struct xt_table_info *private = table->private; | 1881 | const struct xt_table_info *private = table->private; |
1882 | void __user *pos; | 1882 | void __user *pos; |
1883 | unsigned int size; | 1883 | unsigned int size; |
1884 | int ret = 0; | 1884 | int ret = 0; |
1885 | void *loc_cpu_entry; | 1885 | const void *loc_cpu_entry; |
1886 | unsigned int i = 0; | 1886 | unsigned int i = 0; |
1887 | 1887 | ||
1888 | counters = alloc_counters(table); | 1888 | counters = alloc_counters(table); |
@@ -1929,7 +1929,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, | |||
1929 | xt_compat_lock(AF_INET); | 1929 | xt_compat_lock(AF_INET); |
1930 | t = xt_find_table_lock(net, AF_INET, get.name); | 1930 | t = xt_find_table_lock(net, AF_INET, get.name); |
1931 | if (t && !IS_ERR(t)) { | 1931 | if (t && !IS_ERR(t)) { |
1932 | struct xt_table_info *private = t->private; | 1932 | const struct xt_table_info *private = t->private; |
1933 | struct xt_table_info info; | 1933 | struct xt_table_info info; |
1934 | duprintf("t->private->number = %u\n", private->number); | 1934 | duprintf("t->private->number = %u\n", private->number); |
1935 | ret = compat_table_info(private, &info); | 1935 | ret = compat_table_info(private, &info); |
@@ -1939,7 +1939,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, | |||
1939 | } else if (!ret) { | 1939 | } else if (!ret) { |
1940 | duprintf("compat_get_entries: I've got %u not %u!\n", | 1940 | duprintf("compat_get_entries: I've got %u not %u!\n", |
1941 | private->size, get.size); | 1941 | private->size, get.size); |
1942 | ret = -EINVAL; | 1942 | ret = -EAGAIN; |
1943 | } | 1943 | } |
1944 | xt_compat_flush_offsets(AF_INET); | 1944 | xt_compat_flush_offsets(AF_INET); |
1945 | module_put(t->me); | 1945 | module_put(t->me); |
@@ -1963,10 +1963,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
1963 | 1963 | ||
1964 | switch (cmd) { | 1964 | switch (cmd) { |
1965 | case IPT_SO_GET_INFO: | 1965 | case IPT_SO_GET_INFO: |
1966 | ret = get_info(sk->sk_net, user, len, 1); | 1966 | ret = get_info(sock_net(sk), user, len, 1); |
1967 | break; | 1967 | break; |
1968 | case IPT_SO_GET_ENTRIES: | 1968 | case IPT_SO_GET_ENTRIES: |
1969 | ret = compat_get_entries(sk->sk_net, user, len); | 1969 | ret = compat_get_entries(sock_net(sk), user, len); |
1970 | break; | 1970 | break; |
1971 | default: | 1971 | default: |
1972 | ret = do_ipt_get_ctl(sk, cmd, user, len); | 1972 | ret = do_ipt_get_ctl(sk, cmd, user, len); |
@@ -1985,11 +1985,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
1985 | 1985 | ||
1986 | switch (cmd) { | 1986 | switch (cmd) { |
1987 | case IPT_SO_SET_REPLACE: | 1987 | case IPT_SO_SET_REPLACE: |
1988 | ret = do_replace(sk->sk_net, user, len); | 1988 | ret = do_replace(sock_net(sk), user, len); |
1989 | break; | 1989 | break; |
1990 | 1990 | ||
1991 | case IPT_SO_SET_ADD_COUNTERS: | 1991 | case IPT_SO_SET_ADD_COUNTERS: |
1992 | ret = do_add_counters(sk->sk_net, user, len, 0); | 1992 | ret = do_add_counters(sock_net(sk), user, len, 0); |
1993 | break; | 1993 | break; |
1994 | 1994 | ||
1995 | default: | 1995 | default: |
@@ -2010,11 +2010,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2010 | 2010 | ||
2011 | switch (cmd) { | 2011 | switch (cmd) { |
2012 | case IPT_SO_GET_INFO: | 2012 | case IPT_SO_GET_INFO: |
2013 | ret = get_info(sk->sk_net, user, len, 0); | 2013 | ret = get_info(sock_net(sk), user, len, 0); |
2014 | break; | 2014 | break; |
2015 | 2015 | ||
2016 | case IPT_SO_GET_ENTRIES: | 2016 | case IPT_SO_GET_ENTRIES: |
2017 | ret = get_entries(sk->sk_net, user, len); | 2017 | ret = get_entries(sock_net(sk), user, len); |
2018 | break; | 2018 | break; |
2019 | 2019 | ||
2020 | case IPT_SO_GET_REVISION_MATCH: | 2020 | case IPT_SO_GET_REVISION_MATCH: |
@@ -2130,7 +2130,8 @@ icmp_match(const struct sk_buff *skb, | |||
2130 | unsigned int protoff, | 2130 | unsigned int protoff, |
2131 | bool *hotdrop) | 2131 | bool *hotdrop) |
2132 | { | 2132 | { |
2133 | struct icmphdr _icmph, *ic; | 2133 | const struct icmphdr *ic; |
2134 | struct icmphdr _icmph; | ||
2134 | const struct ipt_icmp *icmpinfo = matchinfo; | 2135 | const struct ipt_icmp *icmpinfo = matchinfo; |
2135 | 2136 | ||
2136 | /* Must not be a fragment. */ | 2137 | /* Must not be a fragment. */ |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index a12dd329e208..22d8e7cd9197 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -144,7 +144,7 @@ clusterip_config_init_nodelist(struct clusterip_config *c, | |||
144 | } | 144 | } |
145 | 145 | ||
146 | static struct clusterip_config * | 146 | static struct clusterip_config * |
147 | clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, | 147 | clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, |
148 | struct net_device *dev) | 148 | struct net_device *dev) |
149 | { | 149 | { |
150 | struct clusterip_config *c; | 150 | struct clusterip_config *c; |
@@ -333,7 +333,7 @@ clusterip_tg(struct sk_buff *skb, const struct net_device *in, | |||
333 | } | 333 | } |
334 | 334 | ||
335 | #ifdef DEBUG | 335 | #ifdef DEBUG |
336 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 336 | nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
337 | #endif | 337 | #endif |
338 | pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); | 338 | pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); |
339 | if (!clusterip_responsible(cipinfo->config, hash)) { | 339 | if (!clusterip_responsible(cipinfo->config, hash)) { |
@@ -418,7 +418,7 @@ clusterip_tg_check(const char *tablename, const void *e_void, | |||
418 | /* drop reference count of cluster config when rule is deleted */ | 418 | /* drop reference count of cluster config when rule is deleted */ |
419 | static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) | 419 | static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) |
420 | { | 420 | { |
421 | struct ipt_clusterip_tgt_info *cipinfo = targinfo; | 421 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; |
422 | 422 | ||
423 | /* if no more entries are referencing the config, remove it | 423 | /* if no more entries are referencing the config, remove it |
424 | * from the list and destroy the proc entry */ | 424 | * from the list and destroy the proc entry */ |
@@ -567,7 +567,7 @@ struct clusterip_seq_position { | |||
567 | 567 | ||
568 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) | 568 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
569 | { | 569 | { |
570 | struct proc_dir_entry *pde = s->private; | 570 | const struct proc_dir_entry *pde = s->private; |
571 | struct clusterip_config *c = pde->data; | 571 | struct clusterip_config *c = pde->data; |
572 | unsigned int weight; | 572 | unsigned int weight; |
573 | u_int32_t local_nodes; | 573 | u_int32_t local_nodes; |
@@ -594,7 +594,7 @@ static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) | |||
594 | 594 | ||
595 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | 595 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) |
596 | { | 596 | { |
597 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; | 597 | struct clusterip_seq_position *idx = v; |
598 | 598 | ||
599 | *pos = ++idx->pos; | 599 | *pos = ++idx->pos; |
600 | if (*pos >= idx->weight) { | 600 | if (*pos >= idx->weight) { |
@@ -613,7 +613,7 @@ static void clusterip_seq_stop(struct seq_file *s, void *v) | |||
613 | 613 | ||
614 | static int clusterip_seq_show(struct seq_file *s, void *v) | 614 | static int clusterip_seq_show(struct seq_file *s, void *v) |
615 | { | 615 | { |
616 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; | 616 | struct clusterip_seq_position *idx = v; |
617 | 617 | ||
618 | if (idx->pos != 0) | 618 | if (idx->pos != 0) |
619 | seq_putc(s, ','); | 619 | seq_putc(s, ','); |
@@ -669,7 +669,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, | |||
669 | { | 669 | { |
670 | #define PROC_WRITELEN 10 | 670 | #define PROC_WRITELEN 10 |
671 | char buffer[PROC_WRITELEN+1]; | 671 | char buffer[PROC_WRITELEN+1]; |
672 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | 672 | const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); |
673 | struct clusterip_config *c = pde->data; | 673 | struct clusterip_config *c = pde->data; |
674 | unsigned long nodenum; | 674 | unsigned long nodenum; |
675 | 675 | ||
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 21395bc2b27f..d60139c134ca 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c | |||
@@ -100,7 +100,7 @@ ecn_tg_check(const char *tablename, const void *e_void, | |||
100 | const struct xt_target *target, void *targinfo, | 100 | const struct xt_target *target, void *targinfo, |
101 | unsigned int hook_mask) | 101 | unsigned int hook_mask) |
102 | { | 102 | { |
103 | const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; | 103 | const struct ipt_ECN_info *einfo = targinfo; |
104 | const struct ipt_entry *e = e_void; | 104 | const struct ipt_entry *e = e_void; |
105 | 105 | ||
106 | if (einfo->operation & IPT_ECN_OP_MASK) { | 106 | if (einfo->operation & IPT_ECN_OP_MASK) { |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index b38d7850f506..0af14137137b 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -76,7 +76,8 @@ static void dump_packet(const struct nf_loginfo *info, | |||
76 | 76 | ||
77 | if ((logflags & IPT_LOG_IPOPT) | 77 | if ((logflags & IPT_LOG_IPOPT) |
78 | && ih->ihl * 4 > sizeof(struct iphdr)) { | 78 | && ih->ihl * 4 > sizeof(struct iphdr)) { |
79 | unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op; | 79 | const unsigned char *op; |
80 | unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; | ||
80 | unsigned int i, optsize; | 81 | unsigned int i, optsize; |
81 | 82 | ||
82 | optsize = ih->ihl * 4 - sizeof(struct iphdr); | 83 | optsize = ih->ihl * 4 - sizeof(struct iphdr); |
@@ -338,12 +339,16 @@ static void dump_packet(const struct nf_loginfo *info, | |||
338 | if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { | 339 | if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { |
339 | read_lock_bh(&skb->sk->sk_callback_lock); | 340 | read_lock_bh(&skb->sk->sk_callback_lock); |
340 | if (skb->sk->sk_socket && skb->sk->sk_socket->file) | 341 | if (skb->sk->sk_socket && skb->sk->sk_socket->file) |
341 | printk("UID=%u GID=%u", | 342 | printk("UID=%u GID=%u ", |
342 | skb->sk->sk_socket->file->f_uid, | 343 | skb->sk->sk_socket->file->f_uid, |
343 | skb->sk->sk_socket->file->f_gid); | 344 | skb->sk->sk_socket->file->f_gid); |
344 | read_unlock_bh(&skb->sk->sk_callback_lock); | 345 | read_unlock_bh(&skb->sk->sk_callback_lock); |
345 | } | 346 | } |
346 | 347 | ||
348 | /* Max length: 16 "MARK=0xFFFFFFFF " */ | ||
349 | if (!iphoff && skb->mark) | ||
350 | printk("MARK=0x%x ", skb->mark); | ||
351 | |||
347 | /* Proto Max log string length */ | 352 | /* Proto Max log string length */ |
348 | /* IP: 40+46+6+11+127 = 230 */ | 353 | /* IP: 40+46+6+11+127 = 230 */ |
349 | /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ | 354 | /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ |
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d80fee8327e4..84c26dd27d81 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
@@ -77,7 +77,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in, | |||
77 | return NF_ACCEPT; | 77 | return NF_ACCEPT; |
78 | 78 | ||
79 | mr = targinfo; | 79 | mr = targinfo; |
80 | rt = (struct rtable *)skb->dst; | 80 | rt = skb->rtable; |
81 | newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); | 81 | newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); |
82 | if (!newsrc) { | 82 | if (!newsrc) { |
83 | printk("MASQUERADE: %s ate my IP address\n", out->name); | 83 | printk("MASQUERADE: %s ate my IP address\n", out->name); |
@@ -120,7 +120,7 @@ static int masq_device_event(struct notifier_block *this, | |||
120 | { | 120 | { |
121 | const struct net_device *dev = ptr; | 121 | const struct net_device *dev = ptr; |
122 | 122 | ||
123 | if (dev->nd_net != &init_net) | 123 | if (dev_net(dev) != &init_net) |
124 | return NOTIFY_DONE; | 124 | return NOTIFY_DONE; |
125 | 125 | ||
126 | if (event == NETDEV_DOWN) { | 126 | if (event == NETDEV_DOWN) { |
@@ -139,18 +139,8 @@ static int masq_inet_event(struct notifier_block *this, | |||
139 | unsigned long event, | 139 | unsigned long event, |
140 | void *ptr) | 140 | void *ptr) |
141 | { | 141 | { |
142 | const struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; | 142 | struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; |
143 | 143 | return masq_device_event(this, event, dev); | |
144 | if (event == NETDEV_DOWN) { | ||
145 | /* IP address was deleted. Search entire table for | ||
146 | conntracks which were associated with that device, | ||
147 | and forget them. */ | ||
148 | NF_CT_ASSERT(dev->ifindex != 0); | ||
149 | |||
150 | nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); | ||
151 | } | ||
152 | |||
153 | return NOTIFY_DONE; | ||
154 | } | 144 | } |
155 | 145 | ||
156 | static struct notifier_block masq_dev_notifier = { | 146 | static struct notifier_block masq_dev_notifier = { |
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 22606e2baa16..2639872849da 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c | |||
@@ -35,8 +35,10 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4"); | |||
35 | static void send_reset(struct sk_buff *oldskb, int hook) | 35 | static void send_reset(struct sk_buff *oldskb, int hook) |
36 | { | 36 | { |
37 | struct sk_buff *nskb; | 37 | struct sk_buff *nskb; |
38 | struct iphdr *oiph, *niph; | 38 | const struct iphdr *oiph; |
39 | struct tcphdr _otcph, *oth, *tcph; | 39 | struct iphdr *niph; |
40 | const struct tcphdr *oth; | ||
41 | struct tcphdr _otcph, *tcph; | ||
40 | unsigned int addr_type; | 42 | unsigned int addr_type; |
41 | 43 | ||
42 | /* IP header checks: fragment. */ | 44 | /* IP header checks: fragment. */ |
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 50e06690eb5b..21cb053f5d7d 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c | |||
@@ -340,7 +340,7 @@ static void *recent_seq_start(struct seq_file *seq, loff_t *pos) | |||
340 | static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 340 | static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
341 | { | 341 | { |
342 | struct recent_iter_state *st = seq->private; | 342 | struct recent_iter_state *st = seq->private; |
343 | struct recent_table *t = st->table; | 343 | const struct recent_table *t = st->table; |
344 | struct recent_entry *e = v; | 344 | struct recent_entry *e = v; |
345 | struct list_head *head = e->list.next; | 345 | struct list_head *head = e->list.next; |
346 | 346 | ||
@@ -361,7 +361,7 @@ static void recent_seq_stop(struct seq_file *s, void *v) | |||
361 | 361 | ||
362 | static int recent_seq_show(struct seq_file *seq, void *v) | 362 | static int recent_seq_show(struct seq_file *seq, void *v) |
363 | { | 363 | { |
364 | struct recent_entry *e = v; | 364 | const struct recent_entry *e = v; |
365 | unsigned int i; | 365 | unsigned int i; |
366 | 366 | ||
367 | i = (e->index - 1) % ip_pkt_list_tot; | 367 | i = (e->index - 1) % ip_pkt_list_tot; |
@@ -396,7 +396,7 @@ static int recent_seq_open(struct inode *inode, struct file *file) | |||
396 | static ssize_t recent_proc_write(struct file *file, const char __user *input, | 396 | static ssize_t recent_proc_write(struct file *file, const char __user *input, |
397 | size_t size, loff_t *loff) | 397 | size_t size, loff_t *loff) |
398 | { | 398 | { |
399 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); | 399 | const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); |
400 | struct recent_table *t = pde->data; | 400 | struct recent_table *t = pde->data; |
401 | struct recent_entry *e; | 401 | struct recent_entry *e; |
402 | char buf[sizeof("+255.255.255.255")], *c = buf; | 402 | char buf[sizeof("+255.255.255.255")], *c = buf; |
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 69f3d7e6e96f..1ea677dcf845 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c | |||
@@ -56,20 +56,32 @@ static struct | |||
56 | static struct xt_table packet_filter = { | 56 | static struct xt_table packet_filter = { |
57 | .name = "filter", | 57 | .name = "filter", |
58 | .valid_hooks = FILTER_VALID_HOOKS, | 58 | .valid_hooks = FILTER_VALID_HOOKS, |
59 | .lock = RW_LOCK_UNLOCKED, | 59 | .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), |
60 | .me = THIS_MODULE, | 60 | .me = THIS_MODULE, |
61 | .af = AF_INET, | 61 | .af = AF_INET, |
62 | }; | 62 | }; |
63 | 63 | ||
64 | /* The work comes in here from netfilter.c. */ | 64 | /* The work comes in here from netfilter.c. */ |
65 | static unsigned int | 65 | static unsigned int |
66 | ipt_local_in_hook(unsigned int hook, | ||
67 | struct sk_buff *skb, | ||
68 | const struct net_device *in, | ||
69 | const struct net_device *out, | ||
70 | int (*okfn)(struct sk_buff *)) | ||
71 | { | ||
72 | return ipt_do_table(skb, hook, in, out, | ||
73 | nf_local_in_net(in, out)->ipv4.iptable_filter); | ||
74 | } | ||
75 | |||
76 | static unsigned int | ||
66 | ipt_hook(unsigned int hook, | 77 | ipt_hook(unsigned int hook, |
67 | struct sk_buff *skb, | 78 | struct sk_buff *skb, |
68 | const struct net_device *in, | 79 | const struct net_device *in, |
69 | const struct net_device *out, | 80 | const struct net_device *out, |
70 | int (*okfn)(struct sk_buff *)) | 81 | int (*okfn)(struct sk_buff *)) |
71 | { | 82 | { |
72 | return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); | 83 | return ipt_do_table(skb, hook, in, out, |
84 | nf_forward_net(in, out)->ipv4.iptable_filter); | ||
73 | } | 85 | } |
74 | 86 | ||
75 | static unsigned int | 87 | static unsigned int |
@@ -88,12 +100,13 @@ ipt_local_out_hook(unsigned int hook, | |||
88 | return NF_ACCEPT; | 100 | return NF_ACCEPT; |
89 | } | 101 | } |
90 | 102 | ||
91 | return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); | 103 | return ipt_do_table(skb, hook, in, out, |
104 | nf_local_out_net(in, out)->ipv4.iptable_filter); | ||
92 | } | 105 | } |
93 | 106 | ||
94 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | 107 | static struct nf_hook_ops ipt_ops[] __read_mostly = { |
95 | { | 108 | { |
96 | .hook = ipt_hook, | 109 | .hook = ipt_local_in_hook, |
97 | .owner = THIS_MODULE, | 110 | .owner = THIS_MODULE, |
98 | .pf = PF_INET, | 111 | .pf = PF_INET, |
99 | .hooknum = NF_INET_LOCAL_IN, | 112 | .hooknum = NF_INET_LOCAL_IN, |
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index c55a210853a7..da59182f2226 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -67,20 +67,54 @@ static struct | |||
67 | static struct xt_table packet_mangler = { | 67 | static struct xt_table packet_mangler = { |
68 | .name = "mangle", | 68 | .name = "mangle", |
69 | .valid_hooks = MANGLE_VALID_HOOKS, | 69 | .valid_hooks = MANGLE_VALID_HOOKS, |
70 | .lock = RW_LOCK_UNLOCKED, | 70 | .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock), |
71 | .me = THIS_MODULE, | 71 | .me = THIS_MODULE, |
72 | .af = AF_INET, | 72 | .af = AF_INET, |
73 | }; | 73 | }; |
74 | 74 | ||
75 | /* The work comes in here from netfilter.c. */ | 75 | /* The work comes in here from netfilter.c. */ |
76 | static unsigned int | 76 | static unsigned int |
77 | ipt_route_hook(unsigned int hook, | 77 | ipt_pre_routing_hook(unsigned int hook, |
78 | struct sk_buff *skb, | ||
79 | const struct net_device *in, | ||
80 | const struct net_device *out, | ||
81 | int (*okfn)(struct sk_buff *)) | ||
82 | { | ||
83 | return ipt_do_table(skb, hook, in, out, | ||
84 | nf_pre_routing_net(in, out)->ipv4.iptable_mangle); | ||
85 | } | ||
86 | |||
87 | static unsigned int | ||
88 | ipt_post_routing_hook(unsigned int hook, | ||
89 | struct sk_buff *skb, | ||
90 | const struct net_device *in, | ||
91 | const struct net_device *out, | ||
92 | int (*okfn)(struct sk_buff *)) | ||
93 | { | ||
94 | return ipt_do_table(skb, hook, in, out, | ||
95 | nf_post_routing_net(in, out)->ipv4.iptable_mangle); | ||
96 | } | ||
97 | |||
98 | static unsigned int | ||
99 | ipt_local_in_hook(unsigned int hook, | ||
100 | struct sk_buff *skb, | ||
101 | const struct net_device *in, | ||
102 | const struct net_device *out, | ||
103 | int (*okfn)(struct sk_buff *)) | ||
104 | { | ||
105 | return ipt_do_table(skb, hook, in, out, | ||
106 | nf_local_in_net(in, out)->ipv4.iptable_mangle); | ||
107 | } | ||
108 | |||
109 | static unsigned int | ||
110 | ipt_forward_hook(unsigned int hook, | ||
78 | struct sk_buff *skb, | 111 | struct sk_buff *skb, |
79 | const struct net_device *in, | 112 | const struct net_device *in, |
80 | const struct net_device *out, | 113 | const struct net_device *out, |
81 | int (*okfn)(struct sk_buff *)) | 114 | int (*okfn)(struct sk_buff *)) |
82 | { | 115 | { |
83 | return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); | 116 | return ipt_do_table(skb, hook, in, out, |
117 | nf_forward_net(in, out)->ipv4.iptable_mangle); | ||
84 | } | 118 | } |
85 | 119 | ||
86 | static unsigned int | 120 | static unsigned int |
@@ -112,7 +146,8 @@ ipt_local_hook(unsigned int hook, | |||
112 | daddr = iph->daddr; | 146 | daddr = iph->daddr; |
113 | tos = iph->tos; | 147 | tos = iph->tos; |
114 | 148 | ||
115 | ret = ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); | 149 | ret = ipt_do_table(skb, hook, in, out, |
150 | nf_local_out_net(in, out)->ipv4.iptable_mangle); | ||
116 | /* Reroute for ANY change. */ | 151 | /* Reroute for ANY change. */ |
117 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { | 152 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { |
118 | iph = ip_hdr(skb); | 153 | iph = ip_hdr(skb); |
@@ -130,21 +165,21 @@ ipt_local_hook(unsigned int hook, | |||
130 | 165 | ||
131 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | 166 | static struct nf_hook_ops ipt_ops[] __read_mostly = { |
132 | { | 167 | { |
133 | .hook = ipt_route_hook, | 168 | .hook = ipt_pre_routing_hook, |
134 | .owner = THIS_MODULE, | 169 | .owner = THIS_MODULE, |
135 | .pf = PF_INET, | 170 | .pf = PF_INET, |
136 | .hooknum = NF_INET_PRE_ROUTING, | 171 | .hooknum = NF_INET_PRE_ROUTING, |
137 | .priority = NF_IP_PRI_MANGLE, | 172 | .priority = NF_IP_PRI_MANGLE, |
138 | }, | 173 | }, |
139 | { | 174 | { |
140 | .hook = ipt_route_hook, | 175 | .hook = ipt_local_in_hook, |
141 | .owner = THIS_MODULE, | 176 | .owner = THIS_MODULE, |
142 | .pf = PF_INET, | 177 | .pf = PF_INET, |
143 | .hooknum = NF_INET_LOCAL_IN, | 178 | .hooknum = NF_INET_LOCAL_IN, |
144 | .priority = NF_IP_PRI_MANGLE, | 179 | .priority = NF_IP_PRI_MANGLE, |
145 | }, | 180 | }, |
146 | { | 181 | { |
147 | .hook = ipt_route_hook, | 182 | .hook = ipt_forward_hook, |
148 | .owner = THIS_MODULE, | 183 | .owner = THIS_MODULE, |
149 | .pf = PF_INET, | 184 | .pf = PF_INET, |
150 | .hooknum = NF_INET_FORWARD, | 185 | .hooknum = NF_INET_FORWARD, |
@@ -158,7 +193,7 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { | |||
158 | .priority = NF_IP_PRI_MANGLE, | 193 | .priority = NF_IP_PRI_MANGLE, |
159 | }, | 194 | }, |
160 | { | 195 | { |
161 | .hook = ipt_route_hook, | 196 | .hook = ipt_post_routing_hook, |
162 | .owner = THIS_MODULE, | 197 | .owner = THIS_MODULE, |
163 | .pf = PF_INET, | 198 | .pf = PF_INET, |
164 | .hooknum = NF_INET_POST_ROUTING, | 199 | .hooknum = NF_INET_POST_ROUTING, |
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index e41fe8ca4e1c..fddce7754b72 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c | |||
@@ -39,7 +39,7 @@ static struct | |||
39 | static struct xt_table packet_raw = { | 39 | static struct xt_table packet_raw = { |
40 | .name = "raw", | 40 | .name = "raw", |
41 | .valid_hooks = RAW_VALID_HOOKS, | 41 | .valid_hooks = RAW_VALID_HOOKS, |
42 | .lock = RW_LOCK_UNLOCKED, | 42 | .lock = __RW_LOCK_UNLOCKED(packet_raw.lock), |
43 | .me = THIS_MODULE, | 43 | .me = THIS_MODULE, |
44 | .af = AF_INET, | 44 | .af = AF_INET, |
45 | }; | 45 | }; |
@@ -52,7 +52,8 @@ ipt_hook(unsigned int hook, | |||
52 | const struct net_device *out, | 52 | const struct net_device *out, |
53 | int (*okfn)(struct sk_buff *)) | 53 | int (*okfn)(struct sk_buff *)) |
54 | { | 54 | { |
55 | return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); | 55 | return ipt_do_table(skb, hook, in, out, |
56 | nf_pre_routing_net(in, out)->ipv4.iptable_raw); | ||
56 | } | 57 | } |
57 | 58 | ||
58 | static unsigned int | 59 | static unsigned int |
@@ -70,7 +71,8 @@ ipt_local_hook(unsigned int hook, | |||
70 | "packet.\n"); | 71 | "packet.\n"); |
71 | return NF_ACCEPT; | 72 | return NF_ACCEPT; |
72 | } | 73 | } |
73 | return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); | 74 | return ipt_do_table(skb, hook, in, out, |
75 | nf_local_out_net(in, out)->ipv4.iptable_raw); | ||
74 | } | 76 | } |
75 | 77 | ||
76 | /* 'raw' is the very first table. */ | 78 | /* 'raw' is the very first table. */ |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index a65b845c5f15..cacb9cb27dab 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -23,30 +23,36 @@ | |||
23 | #include <net/netfilter/nf_conntrack_l3proto.h> | 23 | #include <net/netfilter/nf_conntrack_l3proto.h> |
24 | #include <net/netfilter/nf_conntrack_core.h> | 24 | #include <net/netfilter/nf_conntrack_core.h> |
25 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | 25 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> |
26 | #include <net/netfilter/nf_nat_helper.h> | ||
26 | 27 | ||
27 | static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | 28 | int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, |
28 | struct nf_conntrack_tuple *tuple) | 29 | struct nf_conn *ct, |
30 | enum ip_conntrack_info ctinfo); | ||
31 | EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); | ||
32 | |||
33 | static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | ||
34 | struct nf_conntrack_tuple *tuple) | ||
29 | { | 35 | { |
30 | const __be32 *ap; | 36 | const __be32 *ap; |
31 | __be32 _addrs[2]; | 37 | __be32 _addrs[2]; |
32 | ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), | 38 | ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), |
33 | sizeof(u_int32_t) * 2, _addrs); | 39 | sizeof(u_int32_t) * 2, _addrs); |
34 | if (ap == NULL) | 40 | if (ap == NULL) |
35 | return 0; | 41 | return false; |
36 | 42 | ||
37 | tuple->src.u3.ip = ap[0]; | 43 | tuple->src.u3.ip = ap[0]; |
38 | tuple->dst.u3.ip = ap[1]; | 44 | tuple->dst.u3.ip = ap[1]; |
39 | 45 | ||
40 | return 1; | 46 | return true; |
41 | } | 47 | } |
42 | 48 | ||
43 | static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, | 49 | static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, |
44 | const struct nf_conntrack_tuple *orig) | 50 | const struct nf_conntrack_tuple *orig) |
45 | { | 51 | { |
46 | tuple->src.u3.ip = orig->dst.u3.ip; | 52 | tuple->src.u3.ip = orig->dst.u3.ip; |
47 | tuple->dst.u3.ip = orig->src.u3.ip; | 53 | tuple->dst.u3.ip = orig->src.u3.ip; |
48 | 54 | ||
49 | return 1; | 55 | return true; |
50 | } | 56 | } |
51 | 57 | ||
52 | static int ipv4_print_tuple(struct seq_file *s, | 58 | static int ipv4_print_tuple(struct seq_file *s, |
@@ -101,35 +107,41 @@ static unsigned int ipv4_confirm(unsigned int hooknum, | |||
101 | const struct net_device *out, | 107 | const struct net_device *out, |
102 | int (*okfn)(struct sk_buff *)) | 108 | int (*okfn)(struct sk_buff *)) |
103 | { | 109 | { |
104 | /* We've seen it coming out the other side: confirm it */ | ||
105 | return nf_conntrack_confirm(skb); | ||
106 | } | ||
107 | |||
108 | static unsigned int ipv4_conntrack_help(unsigned int hooknum, | ||
109 | struct sk_buff *skb, | ||
110 | const struct net_device *in, | ||
111 | const struct net_device *out, | ||
112 | int (*okfn)(struct sk_buff *)) | ||
113 | { | ||
114 | struct nf_conn *ct; | 110 | struct nf_conn *ct; |
115 | enum ip_conntrack_info ctinfo; | 111 | enum ip_conntrack_info ctinfo; |
116 | const struct nf_conn_help *help; | 112 | const struct nf_conn_help *help; |
117 | const struct nf_conntrack_helper *helper; | 113 | const struct nf_conntrack_helper *helper; |
114 | unsigned int ret; | ||
118 | 115 | ||
119 | /* This is where we call the helper: as the packet goes out. */ | 116 | /* This is where we call the helper: as the packet goes out. */ |
120 | ct = nf_ct_get(skb, &ctinfo); | 117 | ct = nf_ct_get(skb, &ctinfo); |
121 | if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) | 118 | if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) |
122 | return NF_ACCEPT; | 119 | goto out; |
123 | 120 | ||
124 | help = nfct_help(ct); | 121 | help = nfct_help(ct); |
125 | if (!help) | 122 | if (!help) |
126 | return NF_ACCEPT; | 123 | goto out; |
124 | |||
127 | /* rcu_read_lock()ed by nf_hook_slow */ | 125 | /* rcu_read_lock()ed by nf_hook_slow */ |
128 | helper = rcu_dereference(help->helper); | 126 | helper = rcu_dereference(help->helper); |
129 | if (!helper) | 127 | if (!helper) |
130 | return NF_ACCEPT; | 128 | goto out; |
131 | return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), | 129 | |
132 | ct, ctinfo); | 130 | ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), |
131 | ct, ctinfo); | ||
132 | if (ret != NF_ACCEPT) | ||
133 | return ret; | ||
134 | |||
135 | if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { | ||
136 | typeof(nf_nat_seq_adjust_hook) seq_adjust; | ||
137 | |||
138 | seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); | ||
139 | if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) | ||
140 | return NF_DROP; | ||
141 | } | ||
142 | out: | ||
143 | /* We've seen it coming out the other side: confirm it */ | ||
144 | return nf_conntrack_confirm(skb); | ||
133 | } | 145 | } |
134 | 146 | ||
135 | static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | 147 | static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, |
@@ -211,20 +223,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { | |||
211 | .priority = NF_IP_PRI_CONNTRACK, | 223 | .priority = NF_IP_PRI_CONNTRACK, |
212 | }, | 224 | }, |
213 | { | 225 | { |
214 | .hook = ipv4_conntrack_help, | ||
215 | .owner = THIS_MODULE, | ||
216 | .pf = PF_INET, | ||
217 | .hooknum = NF_INET_POST_ROUTING, | ||
218 | .priority = NF_IP_PRI_CONNTRACK_HELPER, | ||
219 | }, | ||
220 | { | ||
221 | .hook = ipv4_conntrack_help, | ||
222 | .owner = THIS_MODULE, | ||
223 | .pf = PF_INET, | ||
224 | .hooknum = NF_INET_LOCAL_IN, | ||
225 | .priority = NF_IP_PRI_CONNTRACK_HELPER, | ||
226 | }, | ||
227 | { | ||
228 | .hook = ipv4_confirm, | 226 | .hook = ipv4_confirm, |
229 | .owner = THIS_MODULE, | 227 | .owner = THIS_MODULE, |
230 | .pf = PF_INET, | 228 | .pf = PF_INET, |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index f500b0fdaef4..40a46d482490 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -106,21 +106,16 @@ static int ct_seq_show(struct seq_file *s, void *v) | |||
106 | /* we only want to print DIR_ORIGINAL */ | 106 | /* we only want to print DIR_ORIGINAL */ |
107 | if (NF_CT_DIRECTION(hash)) | 107 | if (NF_CT_DIRECTION(hash)) |
108 | return 0; | 108 | return 0; |
109 | if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num != AF_INET) | 109 | if (nf_ct_l3num(ct) != AF_INET) |
110 | return 0; | 110 | return 0; |
111 | 111 | ||
112 | l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL] | 112 | l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); |
113 | .tuple.src.l3num); | ||
114 | NF_CT_ASSERT(l3proto); | 113 | NF_CT_ASSERT(l3proto); |
115 | l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL] | 114 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); |
116 | .tuple.src.l3num, | ||
117 | ct->tuplehash[IP_CT_DIR_ORIGINAL] | ||
118 | .tuple.dst.protonum); | ||
119 | NF_CT_ASSERT(l4proto); | 115 | NF_CT_ASSERT(l4proto); |
120 | 116 | ||
121 | if (seq_printf(s, "%-8s %u %ld ", | 117 | if (seq_printf(s, "%-8s %u %ld ", |
122 | l4proto->name, | 118 | l4proto->name, nf_ct_protonum(ct), |
123 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, | ||
124 | timer_pending(&ct->timeout) | 119 | timer_pending(&ct->timeout) |
125 | ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) | 120 | ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) |
126 | return -ENOSPC; | 121 | return -ENOSPC; |
@@ -379,7 +374,7 @@ static const struct file_operations ct_cpu_seq_fops = { | |||
379 | .open = ct_cpu_seq_open, | 374 | .open = ct_cpu_seq_open, |
380 | .read = seq_read, | 375 | .read = seq_read, |
381 | .llseek = seq_lseek, | 376 | .llseek = seq_lseek, |
382 | .release = seq_release_private, | 377 | .release = seq_release, |
383 | }; | 378 | }; |
384 | 379 | ||
385 | int __init nf_conntrack_ipv4_compat_init(void) | 380 | int __init nf_conntrack_ipv4_compat_init(void) |
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 6873fddb3529..78ab19accace 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -22,22 +22,21 @@ | |||
22 | 22 | ||
23 | static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; | 23 | static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; |
24 | 24 | ||
25 | static int icmp_pkt_to_tuple(const struct sk_buff *skb, | 25 | static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, |
26 | unsigned int dataoff, | 26 | struct nf_conntrack_tuple *tuple) |
27 | struct nf_conntrack_tuple *tuple) | ||
28 | { | 27 | { |
29 | const struct icmphdr *hp; | 28 | const struct icmphdr *hp; |
30 | struct icmphdr _hdr; | 29 | struct icmphdr _hdr; |
31 | 30 | ||
32 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | 31 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); |
33 | if (hp == NULL) | 32 | if (hp == NULL) |
34 | return 0; | 33 | return false; |
35 | 34 | ||
36 | tuple->dst.u.icmp.type = hp->type; | 35 | tuple->dst.u.icmp.type = hp->type; |
37 | tuple->src.u.icmp.id = hp->un.echo.id; | 36 | tuple->src.u.icmp.id = hp->un.echo.id; |
38 | tuple->dst.u.icmp.code = hp->code; | 37 | tuple->dst.u.icmp.code = hp->code; |
39 | 38 | ||
40 | return 1; | 39 | return true; |
41 | } | 40 | } |
42 | 41 | ||
43 | /* Add 1; spaces filled with 0. */ | 42 | /* Add 1; spaces filled with 0. */ |
@@ -52,17 +51,17 @@ static const u_int8_t invmap[] = { | |||
52 | [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 | 51 | [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 |
53 | }; | 52 | }; |
54 | 53 | ||
55 | static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple, | 54 | static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, |
56 | const struct nf_conntrack_tuple *orig) | 55 | const struct nf_conntrack_tuple *orig) |
57 | { | 56 | { |
58 | if (orig->dst.u.icmp.type >= sizeof(invmap) | 57 | if (orig->dst.u.icmp.type >= sizeof(invmap) |
59 | || !invmap[orig->dst.u.icmp.type]) | 58 | || !invmap[orig->dst.u.icmp.type]) |
60 | return 0; | 59 | return false; |
61 | 60 | ||
62 | tuple->src.u.icmp.id = orig->src.u.icmp.id; | 61 | tuple->src.u.icmp.id = orig->src.u.icmp.id; |
63 | tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; | 62 | tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; |
64 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; | 63 | tuple->dst.u.icmp.code = orig->dst.u.icmp.code; |
65 | return 1; | 64 | return true; |
66 | } | 65 | } |
67 | 66 | ||
68 | /* Print out the per-protocol part of the tuple. */ | 67 | /* Print out the per-protocol part of the tuple. */ |
@@ -101,8 +100,8 @@ static int icmp_packet(struct nf_conn *ct, | |||
101 | } | 100 | } |
102 | 101 | ||
103 | /* Called when a new connection for this protocol found. */ | 102 | /* Called when a new connection for this protocol found. */ |
104 | static int icmp_new(struct nf_conn *ct, | 103 | static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, |
105 | const struct sk_buff *skb, unsigned int dataoff) | 104 | unsigned int dataoff) |
106 | { | 105 | { |
107 | static const u_int8_t valid_new[] = { | 106 | static const u_int8_t valid_new[] = { |
108 | [ICMP_ECHO] = 1, | 107 | [ICMP_ECHO] = 1, |
@@ -116,11 +115,11 @@ static int icmp_new(struct nf_conn *ct, | |||
116 | /* Can't create a new ICMP `conn' with this. */ | 115 | /* Can't create a new ICMP `conn' with this. */ |
117 | pr_debug("icmp: can't create new conn with type %u\n", | 116 | pr_debug("icmp: can't create new conn with type %u\n", |
118 | ct->tuplehash[0].tuple.dst.u.icmp.type); | 117 | ct->tuplehash[0].tuple.dst.u.icmp.type); |
119 | NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple); | 118 | nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple); |
120 | return 0; | 119 | return false; |
121 | } | 120 | } |
122 | atomic_set(&ct->proto.icmp.count, 0); | 121 | atomic_set(&ct->proto.icmp.count, 0); |
123 | return 1; | 122 | return true; |
124 | } | 123 | } |
125 | 124 | ||
126 | /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ | 125 | /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 36b4e3bb056f..04578593e100 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -150,9 +150,9 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, | |||
150 | const struct nf_nat_range *range) | 150 | const struct nf_nat_range *range) |
151 | { | 151 | { |
152 | unsigned int h = hash_by_src(tuple); | 152 | unsigned int h = hash_by_src(tuple); |
153 | struct nf_conn_nat *nat; | 153 | const struct nf_conn_nat *nat; |
154 | struct nf_conn *ct; | 154 | const struct nf_conn *ct; |
155 | struct hlist_node *n; | 155 | const struct hlist_node *n; |
156 | 156 | ||
157 | rcu_read_lock(); | 157 | rcu_read_lock(); |
158 | hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { | 158 | hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { |
@@ -349,7 +349,7 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
349 | EXPORT_SYMBOL(nf_nat_setup_info); | 349 | EXPORT_SYMBOL(nf_nat_setup_info); |
350 | 350 | ||
351 | /* Returns true if succeeded. */ | 351 | /* Returns true if succeeded. */ |
352 | static int | 352 | static bool |
353 | manip_pkt(u_int16_t proto, | 353 | manip_pkt(u_int16_t proto, |
354 | struct sk_buff *skb, | 354 | struct sk_buff *skb, |
355 | unsigned int iphdroff, | 355 | unsigned int iphdroff, |
@@ -360,7 +360,7 @@ manip_pkt(u_int16_t proto, | |||
360 | const struct nf_nat_protocol *p; | 360 | const struct nf_nat_protocol *p; |
361 | 361 | ||
362 | if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) | 362 | if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) |
363 | return 0; | 363 | return false; |
364 | 364 | ||
365 | iph = (void *)skb->data + iphdroff; | 365 | iph = (void *)skb->data + iphdroff; |
366 | 366 | ||
@@ -369,7 +369,7 @@ manip_pkt(u_int16_t proto, | |||
369 | /* rcu_read_lock()ed by nf_hook_slow */ | 369 | /* rcu_read_lock()ed by nf_hook_slow */ |
370 | p = __nf_nat_proto_find(proto); | 370 | p = __nf_nat_proto_find(proto); |
371 | if (!p->manip_pkt(skb, iphdroff, target, maniptype)) | 371 | if (!p->manip_pkt(skb, iphdroff, target, maniptype)) |
372 | return 0; | 372 | return false; |
373 | 373 | ||
374 | iph = (void *)skb->data + iphdroff; | 374 | iph = (void *)skb->data + iphdroff; |
375 | 375 | ||
@@ -380,7 +380,7 @@ manip_pkt(u_int16_t proto, | |||
380 | csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); | 380 | csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); |
381 | iph->daddr = target->dst.u3.ip; | 381 | iph->daddr = target->dst.u3.ip; |
382 | } | 382 | } |
383 | return 1; | 383 | return true; |
384 | } | 384 | } |
385 | 385 | ||
386 | /* Do packet manipulations according to nf_nat_setup_info. */ | 386 | /* Do packet manipulations according to nf_nat_setup_info. */ |
@@ -426,7 +426,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
426 | struct icmphdr icmp; | 426 | struct icmphdr icmp; |
427 | struct iphdr ip; | 427 | struct iphdr ip; |
428 | } *inside; | 428 | } *inside; |
429 | struct nf_conntrack_l4proto *l4proto; | 429 | const struct nf_conntrack_l4proto *l4proto; |
430 | struct nf_conntrack_tuple inner, target; | 430 | struct nf_conntrack_tuple inner, target; |
431 | int hdrlen = ip_hdrlen(skb); | 431 | int hdrlen = ip_hdrlen(skb); |
432 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 432 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
@@ -544,46 +544,6 @@ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) | |||
544 | } | 544 | } |
545 | EXPORT_SYMBOL(nf_nat_protocol_unregister); | 545 | EXPORT_SYMBOL(nf_nat_protocol_unregister); |
546 | 546 | ||
547 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
548 | int | ||
549 | nf_nat_port_range_to_nlattr(struct sk_buff *skb, | ||
550 | const struct nf_nat_range *range) | ||
551 | { | ||
552 | NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.tcp.port); | ||
553 | NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.tcp.port); | ||
554 | |||
555 | return 0; | ||
556 | |||
557 | nla_put_failure: | ||
558 | return -1; | ||
559 | } | ||
560 | EXPORT_SYMBOL_GPL(nf_nat_port_nlattr_to_range); | ||
561 | |||
562 | int | ||
563 | nf_nat_port_nlattr_to_range(struct nlattr *tb[], struct nf_nat_range *range) | ||
564 | { | ||
565 | int ret = 0; | ||
566 | |||
567 | /* we have to return whether we actually parsed something or not */ | ||
568 | |||
569 | if (tb[CTA_PROTONAT_PORT_MIN]) { | ||
570 | ret = 1; | ||
571 | range->min.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); | ||
572 | } | ||
573 | |||
574 | if (!tb[CTA_PROTONAT_PORT_MAX]) { | ||
575 | if (ret) | ||
576 | range->max.tcp.port = range->min.tcp.port; | ||
577 | } else { | ||
578 | ret = 1; | ||
579 | range->max.tcp.port = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); | ||
580 | } | ||
581 | |||
582 | return ret; | ||
583 | } | ||
584 | EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nlattr); | ||
585 | #endif | ||
586 | |||
587 | /* Noone using conntrack by the time this called. */ | 547 | /* Noone using conntrack by the time this called. */ |
588 | static void nf_nat_cleanup_conntrack(struct nf_conn *ct) | 548 | static void nf_nat_cleanup_conntrack(struct nf_conn *ct) |
589 | { | 549 | { |
@@ -660,6 +620,9 @@ static int __init nf_nat_init(void) | |||
660 | nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; | 620 | nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; |
661 | 621 | ||
662 | l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); | 622 | l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); |
623 | |||
624 | BUG_ON(nf_nat_seq_adjust_hook != NULL); | ||
625 | rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); | ||
663 | return 0; | 626 | return 0; |
664 | 627 | ||
665 | cleanup_extend: | 628 | cleanup_extend: |
@@ -686,6 +649,8 @@ static void __exit nf_nat_cleanup(void) | |||
686 | nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); | 649 | nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); |
687 | nf_ct_l3proto_put(l3proto); | 650 | nf_ct_l3proto_put(l3proto); |
688 | nf_ct_extend_unregister(&nat_extend); | 651 | nf_ct_extend_unregister(&nat_extend); |
652 | rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); | ||
653 | synchronize_net(); | ||
689 | } | 654 | } |
690 | 655 | ||
691 | MODULE_LICENSE("GPL"); | 656 | MODULE_LICENSE("GPL"); |
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index ca57f47bbd25..11976ea29884 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c | |||
@@ -139,7 +139,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, | |||
139 | const char *rep_buffer, | 139 | const char *rep_buffer, |
140 | unsigned int rep_len) | 140 | unsigned int rep_len) |
141 | { | 141 | { |
142 | struct rtable *rt = (struct rtable *)skb->dst; | 142 | struct rtable *rt = skb->rtable; |
143 | struct iphdr *iph; | 143 | struct iphdr *iph; |
144 | struct tcphdr *tcph; | 144 | struct tcphdr *tcph; |
145 | int oldlen, datalen; | 145 | int oldlen, datalen; |
@@ -217,7 +217,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, | |||
217 | const char *rep_buffer, | 217 | const char *rep_buffer, |
218 | unsigned int rep_len) | 218 | unsigned int rep_len) |
219 | { | 219 | { |
220 | struct rtable *rt = (struct rtable *)skb->dst; | 220 | struct rtable *rt = skb->rtable; |
221 | struct iphdr *iph; | 221 | struct iphdr *iph; |
222 | struct udphdr *udph; | 222 | struct udphdr *udph; |
223 | int datalen, oldlen; | 223 | int datalen, oldlen; |
@@ -416,7 +416,6 @@ nf_nat_seq_adjust(struct sk_buff *skb, | |||
416 | 416 | ||
417 | return 1; | 417 | return 1; |
418 | } | 418 | } |
419 | EXPORT_SYMBOL(nf_nat_seq_adjust); | ||
420 | 419 | ||
421 | /* Setup NAT on this expected conntrack so it follows master. */ | 420 | /* Setup NAT on this expected conntrack so it follows master. */ |
422 | /* If we fail to get a free NAT slot, we'll get dropped on confirm */ | 421 | /* If we fail to get a free NAT slot, we'll get dropped on confirm */ |
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 3a1e6d6afc0a..da3d91a5ef5c 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c | |||
@@ -72,7 +72,7 @@ static void pptp_nat_expected(struct nf_conn *ct, | |||
72 | } | 72 | } |
73 | 73 | ||
74 | pr_debug("trying to unexpect other dir: "); | 74 | pr_debug("trying to unexpect other dir: "); |
75 | NF_CT_DUMP_TUPLE(&t); | 75 | nf_ct_dump_tuple_ip(&t); |
76 | other_exp = nf_ct_expect_find_get(&t); | 76 | other_exp = nf_ct_expect_find_get(&t); |
77 | if (other_exp) { | 77 | if (other_exp) { |
78 | nf_ct_unexpect_related(other_exp); | 78 | nf_ct_unexpect_related(other_exp); |
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c new file mode 100644 index 000000000000..91537f11273f --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_common.c | |||
@@ -0,0 +1,120 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * (C) 2008 Patrick McHardy <kaber@trash.net> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | #include <linux/random.h> | ||
12 | #include <linux/ip.h> | ||
13 | |||
14 | #include <linux/netfilter.h> | ||
15 | #include <net/netfilter/nf_nat.h> | ||
16 | #include <net/netfilter/nf_nat_core.h> | ||
17 | #include <net/netfilter/nf_nat_rule.h> | ||
18 | #include <net/netfilter/nf_nat_protocol.h> | ||
19 | |||
20 | bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, | ||
21 | enum nf_nat_manip_type maniptype, | ||
22 | const union nf_conntrack_man_proto *min, | ||
23 | const union nf_conntrack_man_proto *max) | ||
24 | { | ||
25 | __be16 port; | ||
26 | |||
27 | if (maniptype == IP_NAT_MANIP_SRC) | ||
28 | port = tuple->src.u.all; | ||
29 | else | ||
30 | port = tuple->dst.u.all; | ||
31 | |||
32 | return ntohs(port) >= ntohs(min->all) && | ||
33 | ntohs(port) <= ntohs(max->all); | ||
34 | } | ||
35 | EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); | ||
36 | |||
37 | bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
38 | const struct nf_nat_range *range, | ||
39 | enum nf_nat_manip_type maniptype, | ||
40 | const struct nf_conn *ct, | ||
41 | u_int16_t *rover) | ||
42 | { | ||
43 | unsigned int range_size, min, i; | ||
44 | __be16 *portptr; | ||
45 | u_int16_t off; | ||
46 | |||
47 | if (maniptype == IP_NAT_MANIP_SRC) | ||
48 | portptr = &tuple->src.u.all; | ||
49 | else | ||
50 | portptr = &tuple->dst.u.all; | ||
51 | |||
52 | /* If no range specified... */ | ||
53 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | ||
54 | /* If it's dst rewrite, can't change port */ | ||
55 | if (maniptype == IP_NAT_MANIP_DST) | ||
56 | return false; | ||
57 | |||
58 | if (ntohs(*portptr) < 1024) { | ||
59 | /* Loose convention: >> 512 is credential passing */ | ||
60 | if (ntohs(*portptr) < 512) { | ||
61 | min = 1; | ||
62 | range_size = 511 - min + 1; | ||
63 | } else { | ||
64 | min = 600; | ||
65 | range_size = 1023 - min + 1; | ||
66 | } | ||
67 | } else { | ||
68 | min = 1024; | ||
69 | range_size = 65535 - 1024 + 1; | ||
70 | } | ||
71 | } else { | ||
72 | min = ntohs(range->min.all); | ||
73 | range_size = ntohs(range->max.all) - min + 1; | ||
74 | } | ||
75 | |||
76 | off = *rover; | ||
77 | if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) | ||
78 | off = net_random(); | ||
79 | |||
80 | for (i = 0; i < range_size; i++, off++) { | ||
81 | *portptr = htons(min + off % range_size); | ||
82 | if (nf_nat_used_tuple(tuple, ct)) | ||
83 | continue; | ||
84 | if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) | ||
85 | *rover = off; | ||
86 | return true; | ||
87 | } | ||
88 | return false; | ||
89 | } | ||
90 | EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); | ||
91 | |||
92 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
93 | int nf_nat_proto_range_to_nlattr(struct sk_buff *skb, | ||
94 | const struct nf_nat_range *range) | ||
95 | { | ||
96 | NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all); | ||
97 | NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all); | ||
98 | return 0; | ||
99 | |||
100 | nla_put_failure: | ||
101 | return -1; | ||
102 | } | ||
103 | EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); | ||
104 | |||
105 | int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], | ||
106 | struct nf_nat_range *range) | ||
107 | { | ||
108 | if (tb[CTA_PROTONAT_PORT_MIN]) { | ||
109 | range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); | ||
110 | range->max.all = range->min.tcp.port; | ||
111 | range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | ||
112 | } | ||
113 | if (tb[CTA_PROTONAT_PORT_MAX]) { | ||
114 | range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); | ||
115 | range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; | ||
116 | } | ||
117 | return 0; | ||
118 | } | ||
119 | EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr); | ||
120 | #endif | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c new file mode 100644 index 000000000000..22485ce306d4 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c | |||
@@ -0,0 +1,108 @@ | |||
1 | /* | ||
2 | * DCCP NAT protocol helper | ||
3 | * | ||
4 | * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/skbuff.h> | ||
16 | #include <linux/ip.h> | ||
17 | #include <linux/dccp.h> | ||
18 | |||
19 | #include <net/netfilter/nf_conntrack.h> | ||
20 | #include <net/netfilter/nf_nat.h> | ||
21 | #include <net/netfilter/nf_nat_protocol.h> | ||
22 | |||
23 | static u_int16_t dccp_port_rover; | ||
24 | |||
25 | static bool | ||
26 | dccp_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
27 | const struct nf_nat_range *range, | ||
28 | enum nf_nat_manip_type maniptype, | ||
29 | const struct nf_conn *ct) | ||
30 | { | ||
31 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | ||
32 | &dccp_port_rover); | ||
33 | } | ||
34 | |||
35 | static bool | ||
36 | dccp_manip_pkt(struct sk_buff *skb, | ||
37 | unsigned int iphdroff, | ||
38 | const struct nf_conntrack_tuple *tuple, | ||
39 | enum nf_nat_manip_type maniptype) | ||
40 | { | ||
41 | const struct iphdr *iph = (const void *)(skb->data + iphdroff); | ||
42 | struct dccp_hdr *hdr; | ||
43 | unsigned int hdroff = iphdroff + iph->ihl * 4; | ||
44 | __be32 oldip, newip; | ||
45 | __be16 *portptr, oldport, newport; | ||
46 | int hdrsize = 8; /* DCCP connection tracking guarantees this much */ | ||
47 | |||
48 | if (skb->len >= hdroff + sizeof(struct dccp_hdr)) | ||
49 | hdrsize = sizeof(struct dccp_hdr); | ||
50 | |||
51 | if (!skb_make_writable(skb, hdroff + hdrsize)) | ||
52 | return false; | ||
53 | |||
54 | iph = (struct iphdr *)(skb->data + iphdroff); | ||
55 | hdr = (struct dccp_hdr *)(skb->data + hdroff); | ||
56 | |||
57 | if (maniptype == IP_NAT_MANIP_SRC) { | ||
58 | oldip = iph->saddr; | ||
59 | newip = tuple->src.u3.ip; | ||
60 | newport = tuple->src.u.dccp.port; | ||
61 | portptr = &hdr->dccph_sport; | ||
62 | } else { | ||
63 | oldip = iph->daddr; | ||
64 | newip = tuple->dst.u3.ip; | ||
65 | newport = tuple->dst.u.dccp.port; | ||
66 | portptr = &hdr->dccph_dport; | ||
67 | } | ||
68 | |||
69 | oldport = *portptr; | ||
70 | *portptr = newport; | ||
71 | |||
72 | if (hdrsize < sizeof(*hdr)) | ||
73 | return true; | ||
74 | |||
75 | inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1); | ||
76 | inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, | ||
77 | 0); | ||
78 | return true; | ||
79 | } | ||
80 | |||
81 | static const struct nf_nat_protocol nf_nat_protocol_dccp = { | ||
82 | .protonum = IPPROTO_DCCP, | ||
83 | .me = THIS_MODULE, | ||
84 | .manip_pkt = dccp_manip_pkt, | ||
85 | .in_range = nf_nat_proto_in_range, | ||
86 | .unique_tuple = dccp_unique_tuple, | ||
87 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
88 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, | ||
89 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | ||
90 | #endif | ||
91 | }; | ||
92 | |||
93 | static int __init nf_nat_proto_dccp_init(void) | ||
94 | { | ||
95 | return nf_nat_protocol_register(&nf_nat_protocol_dccp); | ||
96 | } | ||
97 | |||
98 | static void __exit nf_nat_proto_dccp_fini(void) | ||
99 | { | ||
100 | nf_nat_protocol_unregister(&nf_nat_protocol_dccp); | ||
101 | } | ||
102 | |||
103 | module_init(nf_nat_proto_dccp_init); | ||
104 | module_exit(nf_nat_proto_dccp_fini); | ||
105 | |||
106 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
107 | MODULE_DESCRIPTION("DCCP NAT protocol helper"); | ||
108 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index a1e4da16da2e..d7e89201351e 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c | |||
@@ -36,26 +36,8 @@ MODULE_LICENSE("GPL"); | |||
36 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | 36 | MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); |
37 | MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); | 37 | MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); |
38 | 38 | ||
39 | /* is key in given range between min and max */ | ||
40 | static int | ||
41 | gre_in_range(const struct nf_conntrack_tuple *tuple, | ||
42 | enum nf_nat_manip_type maniptype, | ||
43 | const union nf_conntrack_man_proto *min, | ||
44 | const union nf_conntrack_man_proto *max) | ||
45 | { | ||
46 | __be16 key; | ||
47 | |||
48 | if (maniptype == IP_NAT_MANIP_SRC) | ||
49 | key = tuple->src.u.gre.key; | ||
50 | else | ||
51 | key = tuple->dst.u.gre.key; | ||
52 | |||
53 | return ntohs(key) >= ntohs(min->gre.key) && | ||
54 | ntohs(key) <= ntohs(max->gre.key); | ||
55 | } | ||
56 | |||
57 | /* generate unique tuple ... */ | 39 | /* generate unique tuple ... */ |
58 | static int | 40 | static bool |
59 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, | 41 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, |
60 | const struct nf_nat_range *range, | 42 | const struct nf_nat_range *range, |
61 | enum nf_nat_manip_type maniptype, | 43 | enum nf_nat_manip_type maniptype, |
@@ -68,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
68 | /* If there is no master conntrack we are not PPTP, | 50 | /* If there is no master conntrack we are not PPTP, |
69 | do not change tuples */ | 51 | do not change tuples */ |
70 | if (!ct->master) | 52 | if (!ct->master) |
71 | return 0; | 53 | return false; |
72 | 54 | ||
73 | if (maniptype == IP_NAT_MANIP_SRC) | 55 | if (maniptype == IP_NAT_MANIP_SRC) |
74 | keyptr = &tuple->src.u.gre.key; | 56 | keyptr = &tuple->src.u.gre.key; |
@@ -89,20 +71,20 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
89 | for (i = 0; i < range_size; i++, key++) { | 71 | for (i = 0; i < range_size; i++, key++) { |
90 | *keyptr = htons(min + key % range_size); | 72 | *keyptr = htons(min + key % range_size); |
91 | if (!nf_nat_used_tuple(tuple, ct)) | 73 | if (!nf_nat_used_tuple(tuple, ct)) |
92 | return 1; | 74 | return true; |
93 | } | 75 | } |
94 | 76 | ||
95 | pr_debug("%p: no NAT mapping\n", ct); | 77 | pr_debug("%p: no NAT mapping\n", ct); |
96 | return 0; | 78 | return false; |
97 | } | 79 | } |
98 | 80 | ||
99 | /* manipulate a GRE packet according to maniptype */ | 81 | /* manipulate a GRE packet according to maniptype */ |
100 | static int | 82 | static bool |
101 | gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, | 83 | gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, |
102 | const struct nf_conntrack_tuple *tuple, | 84 | const struct nf_conntrack_tuple *tuple, |
103 | enum nf_nat_manip_type maniptype) | 85 | enum nf_nat_manip_type maniptype) |
104 | { | 86 | { |
105 | struct gre_hdr *greh; | 87 | const struct gre_hdr *greh; |
106 | struct gre_hdr_pptp *pgreh; | 88 | struct gre_hdr_pptp *pgreh; |
107 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | 89 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); |
108 | unsigned int hdroff = iphdroff + iph->ihl * 4; | 90 | unsigned int hdroff = iphdroff + iph->ihl * 4; |
@@ -110,7 +92,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, | |||
110 | /* pgreh includes two optional 32bit fields which are not required | 92 | /* pgreh includes two optional 32bit fields which are not required |
111 | * to be there. That's where the magic '8' comes from */ | 93 | * to be there. That's where the magic '8' comes from */ |
112 | if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) | 94 | if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) |
113 | return 0; | 95 | return false; |
114 | 96 | ||
115 | greh = (void *)skb->data + hdroff; | 97 | greh = (void *)skb->data + hdroff; |
116 | pgreh = (struct gre_hdr_pptp *)greh; | 98 | pgreh = (struct gre_hdr_pptp *)greh; |
@@ -118,7 +100,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, | |||
118 | /* we only have destination manip of a packet, since 'source key' | 100 | /* we only have destination manip of a packet, since 'source key' |
119 | * is not present in the packet itself */ | 101 | * is not present in the packet itself */ |
120 | if (maniptype != IP_NAT_MANIP_DST) | 102 | if (maniptype != IP_NAT_MANIP_DST) |
121 | return 1; | 103 | return true; |
122 | switch (greh->version) { | 104 | switch (greh->version) { |
123 | case GRE_VERSION_1701: | 105 | case GRE_VERSION_1701: |
124 | /* We do not currently NAT any GREv0 packets. | 106 | /* We do not currently NAT any GREv0 packets. |
@@ -130,21 +112,20 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, | |||
130 | break; | 112 | break; |
131 | default: | 113 | default: |
132 | pr_debug("can't nat unknown GRE version\n"); | 114 | pr_debug("can't nat unknown GRE version\n"); |
133 | return 0; | 115 | return false; |
134 | } | 116 | } |
135 | return 1; | 117 | return true; |
136 | } | 118 | } |
137 | 119 | ||
138 | static const struct nf_nat_protocol gre = { | 120 | static const struct nf_nat_protocol gre = { |
139 | .name = "GRE", | ||
140 | .protonum = IPPROTO_GRE, | 121 | .protonum = IPPROTO_GRE, |
141 | .me = THIS_MODULE, | 122 | .me = THIS_MODULE, |
142 | .manip_pkt = gre_manip_pkt, | 123 | .manip_pkt = gre_manip_pkt, |
143 | .in_range = gre_in_range, | 124 | .in_range = nf_nat_proto_in_range, |
144 | .unique_tuple = gre_unique_tuple, | 125 | .unique_tuple = gre_unique_tuple, |
145 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 126 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
146 | .range_to_nlattr = nf_nat_port_range_to_nlattr, | 127 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, |
147 | .nlattr_to_range = nf_nat_port_nlattr_to_range, | 128 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
148 | #endif | 129 | #endif |
149 | }; | 130 | }; |
150 | 131 | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 03a02969aa57..19a8b0b07d8e 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <net/netfilter/nf_nat_rule.h> | 17 | #include <net/netfilter/nf_nat_rule.h> |
18 | #include <net/netfilter/nf_nat_protocol.h> | 18 | #include <net/netfilter/nf_nat_protocol.h> |
19 | 19 | ||
20 | static int | 20 | static bool |
21 | icmp_in_range(const struct nf_conntrack_tuple *tuple, | 21 | icmp_in_range(const struct nf_conntrack_tuple *tuple, |
22 | enum nf_nat_manip_type maniptype, | 22 | enum nf_nat_manip_type maniptype, |
23 | const union nf_conntrack_man_proto *min, | 23 | const union nf_conntrack_man_proto *min, |
@@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, | |||
27 | ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); | 27 | ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); |
28 | } | 28 | } |
29 | 29 | ||
30 | static int | 30 | static bool |
31 | icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | 31 | icmp_unique_tuple(struct nf_conntrack_tuple *tuple, |
32 | const struct nf_nat_range *range, | 32 | const struct nf_nat_range *range, |
33 | enum nf_nat_manip_type maniptype, | 33 | enum nf_nat_manip_type maniptype, |
@@ -46,12 +46,12 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
46 | tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + | 46 | tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + |
47 | (id % range_size)); | 47 | (id % range_size)); |
48 | if (!nf_nat_used_tuple(tuple, ct)) | 48 | if (!nf_nat_used_tuple(tuple, ct)) |
49 | return 1; | 49 | return true; |
50 | } | 50 | } |
51 | return 0; | 51 | return false; |
52 | } | 52 | } |
53 | 53 | ||
54 | static int | 54 | static bool |
55 | icmp_manip_pkt(struct sk_buff *skb, | 55 | icmp_manip_pkt(struct sk_buff *skb, |
56 | unsigned int iphdroff, | 56 | unsigned int iphdroff, |
57 | const struct nf_conntrack_tuple *tuple, | 57 | const struct nf_conntrack_tuple *tuple, |
@@ -62,24 +62,23 @@ icmp_manip_pkt(struct sk_buff *skb, | |||
62 | unsigned int hdroff = iphdroff + iph->ihl*4; | 62 | unsigned int hdroff = iphdroff + iph->ihl*4; |
63 | 63 | ||
64 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) | 64 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) |
65 | return 0; | 65 | return false; |
66 | 66 | ||
67 | hdr = (struct icmphdr *)(skb->data + hdroff); | 67 | hdr = (struct icmphdr *)(skb->data + hdroff); |
68 | inet_proto_csum_replace2(&hdr->checksum, skb, | 68 | inet_proto_csum_replace2(&hdr->checksum, skb, |
69 | hdr->un.echo.id, tuple->src.u.icmp.id, 0); | 69 | hdr->un.echo.id, tuple->src.u.icmp.id, 0); |
70 | hdr->un.echo.id = tuple->src.u.icmp.id; | 70 | hdr->un.echo.id = tuple->src.u.icmp.id; |
71 | return 1; | 71 | return true; |
72 | } | 72 | } |
73 | 73 | ||
74 | const struct nf_nat_protocol nf_nat_protocol_icmp = { | 74 | const struct nf_nat_protocol nf_nat_protocol_icmp = { |
75 | .name = "ICMP", | ||
76 | .protonum = IPPROTO_ICMP, | 75 | .protonum = IPPROTO_ICMP, |
77 | .me = THIS_MODULE, | 76 | .me = THIS_MODULE, |
78 | .manip_pkt = icmp_manip_pkt, | 77 | .manip_pkt = icmp_manip_pkt, |
79 | .in_range = icmp_in_range, | 78 | .in_range = icmp_in_range, |
80 | .unique_tuple = icmp_unique_tuple, | 79 | .unique_tuple = icmp_unique_tuple, |
81 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 80 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
82 | .range_to_nlattr = nf_nat_port_range_to_nlattr, | 81 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, |
83 | .nlattr_to_range = nf_nat_port_nlattr_to_range, | 82 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
84 | #endif | 83 | #endif |
85 | }; | 84 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c new file mode 100644 index 000000000000..82e4c0e286b8 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/ip.h> | ||
12 | #include <linux/sctp.h> | ||
13 | #include <net/sctp/checksum.h> | ||
14 | |||
15 | #include <net/netfilter/nf_nat_protocol.h> | ||
16 | |||
17 | static u_int16_t nf_sctp_port_rover; | ||
18 | |||
19 | static bool | ||
20 | sctp_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
21 | const struct nf_nat_range *range, | ||
22 | enum nf_nat_manip_type maniptype, | ||
23 | const struct nf_conn *ct) | ||
24 | { | ||
25 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | ||
26 | &nf_sctp_port_rover); | ||
27 | } | ||
28 | |||
29 | static bool | ||
30 | sctp_manip_pkt(struct sk_buff *skb, | ||
31 | unsigned int iphdroff, | ||
32 | const struct nf_conntrack_tuple *tuple, | ||
33 | enum nf_nat_manip_type maniptype) | ||
34 | { | ||
35 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | ||
36 | sctp_sctphdr_t *hdr; | ||
37 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
38 | __be32 oldip, newip; | ||
39 | u32 crc32; | ||
40 | |||
41 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) | ||
42 | return false; | ||
43 | |||
44 | iph = (struct iphdr *)(skb->data + iphdroff); | ||
45 | hdr = (struct sctphdr *)(skb->data + hdroff); | ||
46 | |||
47 | if (maniptype == IP_NAT_MANIP_SRC) { | ||
48 | /* Get rid of src ip and src pt */ | ||
49 | oldip = iph->saddr; | ||
50 | newip = tuple->src.u3.ip; | ||
51 | hdr->source = tuple->src.u.sctp.port; | ||
52 | } else { | ||
53 | /* Get rid of dst ip and dst pt */ | ||
54 | oldip = iph->daddr; | ||
55 | newip = tuple->dst.u3.ip; | ||
56 | hdr->dest = tuple->dst.u.sctp.port; | ||
57 | } | ||
58 | |||
59 | crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff); | ||
60 | for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) | ||
61 | crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb), | ||
62 | crc32); | ||
63 | crc32 = sctp_end_cksum(crc32); | ||
64 | hdr->checksum = htonl(crc32); | ||
65 | |||
66 | return true; | ||
67 | } | ||
68 | |||
69 | static const struct nf_nat_protocol nf_nat_protocol_sctp = { | ||
70 | .protonum = IPPROTO_SCTP, | ||
71 | .me = THIS_MODULE, | ||
72 | .manip_pkt = sctp_manip_pkt, | ||
73 | .in_range = nf_nat_proto_in_range, | ||
74 | .unique_tuple = sctp_unique_tuple, | ||
75 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
76 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, | ||
77 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | ||
78 | #endif | ||
79 | }; | ||
80 | |||
81 | static int __init nf_nat_proto_sctp_init(void) | ||
82 | { | ||
83 | return nf_nat_protocol_register(&nf_nat_protocol_sctp); | ||
84 | } | ||
85 | |||
86 | static void __exit nf_nat_proto_sctp_exit(void) | ||
87 | { | ||
88 | nf_nat_protocol_unregister(&nf_nat_protocol_sctp); | ||
89 | } | ||
90 | |||
91 | module_init(nf_nat_proto_sctp_init); | ||
92 | module_exit(nf_nat_proto_sctp_exit); | ||
93 | |||
94 | MODULE_LICENSE("GPL"); | ||
95 | MODULE_DESCRIPTION("SCTP NAT protocol helper"); | ||
96 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index ffd5d1589eca..399e2cfa263b 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c | |||
@@ -8,7 +8,6 @@ | |||
8 | 8 | ||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/random.h> | ||
12 | #include <linux/ip.h> | 11 | #include <linux/ip.h> |
13 | #include <linux/tcp.h> | 12 | #include <linux/tcp.h> |
14 | 13 | ||
@@ -19,75 +18,19 @@ | |||
19 | #include <net/netfilter/nf_nat_protocol.h> | 18 | #include <net/netfilter/nf_nat_protocol.h> |
20 | #include <net/netfilter/nf_nat_core.h> | 19 | #include <net/netfilter/nf_nat_core.h> |
21 | 20 | ||
22 | static int | 21 | static u_int16_t tcp_port_rover; |
23 | tcp_in_range(const struct nf_conntrack_tuple *tuple, | ||
24 | enum nf_nat_manip_type maniptype, | ||
25 | const union nf_conntrack_man_proto *min, | ||
26 | const union nf_conntrack_man_proto *max) | ||
27 | { | ||
28 | __be16 port; | ||
29 | |||
30 | if (maniptype == IP_NAT_MANIP_SRC) | ||
31 | port = tuple->src.u.tcp.port; | ||
32 | else | ||
33 | port = tuple->dst.u.tcp.port; | ||
34 | |||
35 | return ntohs(port) >= ntohs(min->tcp.port) && | ||
36 | ntohs(port) <= ntohs(max->tcp.port); | ||
37 | } | ||
38 | 22 | ||
39 | static int | 23 | static bool |
40 | tcp_unique_tuple(struct nf_conntrack_tuple *tuple, | 24 | tcp_unique_tuple(struct nf_conntrack_tuple *tuple, |
41 | const struct nf_nat_range *range, | 25 | const struct nf_nat_range *range, |
42 | enum nf_nat_manip_type maniptype, | 26 | enum nf_nat_manip_type maniptype, |
43 | const struct nf_conn *ct) | 27 | const struct nf_conn *ct) |
44 | { | 28 | { |
45 | static u_int16_t port; | 29 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, |
46 | __be16 *portptr; | 30 | &tcp_port_rover); |
47 | unsigned int range_size, min, i; | ||
48 | |||
49 | if (maniptype == IP_NAT_MANIP_SRC) | ||
50 | portptr = &tuple->src.u.tcp.port; | ||
51 | else | ||
52 | portptr = &tuple->dst.u.tcp.port; | ||
53 | |||
54 | /* If no range specified... */ | ||
55 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | ||
56 | /* If it's dst rewrite, can't change port */ | ||
57 | if (maniptype == IP_NAT_MANIP_DST) | ||
58 | return 0; | ||
59 | |||
60 | /* Map privileged onto privileged. */ | ||
61 | if (ntohs(*portptr) < 1024) { | ||
62 | /* Loose convention: >> 512 is credential passing */ | ||
63 | if (ntohs(*portptr)<512) { | ||
64 | min = 1; | ||
65 | range_size = 511 - min + 1; | ||
66 | } else { | ||
67 | min = 600; | ||
68 | range_size = 1023 - min + 1; | ||
69 | } | ||
70 | } else { | ||
71 | min = 1024; | ||
72 | range_size = 65535 - 1024 + 1; | ||
73 | } | ||
74 | } else { | ||
75 | min = ntohs(range->min.tcp.port); | ||
76 | range_size = ntohs(range->max.tcp.port) - min + 1; | ||
77 | } | ||
78 | |||
79 | if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) | ||
80 | port = net_random(); | ||
81 | |||
82 | for (i = 0; i < range_size; i++, port++) { | ||
83 | *portptr = htons(min + port % range_size); | ||
84 | if (!nf_nat_used_tuple(tuple, ct)) | ||
85 | return 1; | ||
86 | } | ||
87 | return 0; | ||
88 | } | 31 | } |
89 | 32 | ||
90 | static int | 33 | static bool |
91 | tcp_manip_pkt(struct sk_buff *skb, | 34 | tcp_manip_pkt(struct sk_buff *skb, |
92 | unsigned int iphdroff, | 35 | unsigned int iphdroff, |
93 | const struct nf_conntrack_tuple *tuple, | 36 | const struct nf_conntrack_tuple *tuple, |
@@ -107,7 +50,7 @@ tcp_manip_pkt(struct sk_buff *skb, | |||
107 | hdrsize = sizeof(struct tcphdr); | 50 | hdrsize = sizeof(struct tcphdr); |
108 | 51 | ||
109 | if (!skb_make_writable(skb, hdroff + hdrsize)) | 52 | if (!skb_make_writable(skb, hdroff + hdrsize)) |
110 | return 0; | 53 | return false; |
111 | 54 | ||
112 | iph = (struct iphdr *)(skb->data + iphdroff); | 55 | iph = (struct iphdr *)(skb->data + iphdroff); |
113 | hdr = (struct tcphdr *)(skb->data + hdroff); | 56 | hdr = (struct tcphdr *)(skb->data + hdroff); |
@@ -130,22 +73,21 @@ tcp_manip_pkt(struct sk_buff *skb, | |||
130 | *portptr = newport; | 73 | *portptr = newport; |
131 | 74 | ||
132 | if (hdrsize < sizeof(*hdr)) | 75 | if (hdrsize < sizeof(*hdr)) |
133 | return 1; | 76 | return true; |
134 | 77 | ||
135 | inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); | 78 | inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); |
136 | inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); | 79 | inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); |
137 | return 1; | 80 | return true; |
138 | } | 81 | } |
139 | 82 | ||
140 | const struct nf_nat_protocol nf_nat_protocol_tcp = { | 83 | const struct nf_nat_protocol nf_nat_protocol_tcp = { |
141 | .name = "TCP", | ||
142 | .protonum = IPPROTO_TCP, | 84 | .protonum = IPPROTO_TCP, |
143 | .me = THIS_MODULE, | 85 | .me = THIS_MODULE, |
144 | .manip_pkt = tcp_manip_pkt, | 86 | .manip_pkt = tcp_manip_pkt, |
145 | .in_range = tcp_in_range, | 87 | .in_range = nf_nat_proto_in_range, |
146 | .unique_tuple = tcp_unique_tuple, | 88 | .unique_tuple = tcp_unique_tuple, |
147 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 89 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
148 | .range_to_nlattr = nf_nat_port_range_to_nlattr, | 90 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, |
149 | .nlattr_to_range = nf_nat_port_nlattr_to_range, | 91 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
150 | #endif | 92 | #endif |
151 | }; | 93 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 4b8f49910ff2..9e61c79492e4 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c | |||
@@ -8,7 +8,6 @@ | |||
8 | 8 | ||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/init.h> | 10 | #include <linux/init.h> |
11 | #include <linux/random.h> | ||
12 | #include <linux/ip.h> | 11 | #include <linux/ip.h> |
13 | #include <linux/udp.h> | 12 | #include <linux/udp.h> |
14 | 13 | ||
@@ -18,74 +17,19 @@ | |||
18 | #include <net/netfilter/nf_nat_rule.h> | 17 | #include <net/netfilter/nf_nat_rule.h> |
19 | #include <net/netfilter/nf_nat_protocol.h> | 18 | #include <net/netfilter/nf_nat_protocol.h> |
20 | 19 | ||
21 | static int | 20 | static u_int16_t udp_port_rover; |
22 | udp_in_range(const struct nf_conntrack_tuple *tuple, | ||
23 | enum nf_nat_manip_type maniptype, | ||
24 | const union nf_conntrack_man_proto *min, | ||
25 | const union nf_conntrack_man_proto *max) | ||
26 | { | ||
27 | __be16 port; | ||
28 | |||
29 | if (maniptype == IP_NAT_MANIP_SRC) | ||
30 | port = tuple->src.u.udp.port; | ||
31 | else | ||
32 | port = tuple->dst.u.udp.port; | ||
33 | |||
34 | return ntohs(port) >= ntohs(min->udp.port) && | ||
35 | ntohs(port) <= ntohs(max->udp.port); | ||
36 | } | ||
37 | 21 | ||
38 | static int | 22 | static bool |
39 | udp_unique_tuple(struct nf_conntrack_tuple *tuple, | 23 | udp_unique_tuple(struct nf_conntrack_tuple *tuple, |
40 | const struct nf_nat_range *range, | 24 | const struct nf_nat_range *range, |
41 | enum nf_nat_manip_type maniptype, | 25 | enum nf_nat_manip_type maniptype, |
42 | const struct nf_conn *ct) | 26 | const struct nf_conn *ct) |
43 | { | 27 | { |
44 | static u_int16_t port; | 28 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, |
45 | __be16 *portptr; | 29 | &udp_port_rover); |
46 | unsigned int range_size, min, i; | ||
47 | |||
48 | if (maniptype == IP_NAT_MANIP_SRC) | ||
49 | portptr = &tuple->src.u.udp.port; | ||
50 | else | ||
51 | portptr = &tuple->dst.u.udp.port; | ||
52 | |||
53 | /* If no range specified... */ | ||
54 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | ||
55 | /* If it's dst rewrite, can't change port */ | ||
56 | if (maniptype == IP_NAT_MANIP_DST) | ||
57 | return 0; | ||
58 | |||
59 | if (ntohs(*portptr) < 1024) { | ||
60 | /* Loose convention: >> 512 is credential passing */ | ||
61 | if (ntohs(*portptr)<512) { | ||
62 | min = 1; | ||
63 | range_size = 511 - min + 1; | ||
64 | } else { | ||
65 | min = 600; | ||
66 | range_size = 1023 - min + 1; | ||
67 | } | ||
68 | } else { | ||
69 | min = 1024; | ||
70 | range_size = 65535 - 1024 + 1; | ||
71 | } | ||
72 | } else { | ||
73 | min = ntohs(range->min.udp.port); | ||
74 | range_size = ntohs(range->max.udp.port) - min + 1; | ||
75 | } | ||
76 | |||
77 | if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) | ||
78 | port = net_random(); | ||
79 | |||
80 | for (i = 0; i < range_size; i++, port++) { | ||
81 | *portptr = htons(min + port % range_size); | ||
82 | if (!nf_nat_used_tuple(tuple, ct)) | ||
83 | return 1; | ||
84 | } | ||
85 | return 0; | ||
86 | } | 30 | } |
87 | 31 | ||
88 | static int | 32 | static bool |
89 | udp_manip_pkt(struct sk_buff *skb, | 33 | udp_manip_pkt(struct sk_buff *skb, |
90 | unsigned int iphdroff, | 34 | unsigned int iphdroff, |
91 | const struct nf_conntrack_tuple *tuple, | 35 | const struct nf_conntrack_tuple *tuple, |
@@ -98,7 +42,7 @@ udp_manip_pkt(struct sk_buff *skb, | |||
98 | __be16 *portptr, newport; | 42 | __be16 *portptr, newport; |
99 | 43 | ||
100 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) | 44 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) |
101 | return 0; | 45 | return false; |
102 | 46 | ||
103 | iph = (struct iphdr *)(skb->data + iphdroff); | 47 | iph = (struct iphdr *)(skb->data + iphdroff); |
104 | hdr = (struct udphdr *)(skb->data + hdroff); | 48 | hdr = (struct udphdr *)(skb->data + hdroff); |
@@ -124,18 +68,17 @@ udp_manip_pkt(struct sk_buff *skb, | |||
124 | hdr->check = CSUM_MANGLED_0; | 68 | hdr->check = CSUM_MANGLED_0; |
125 | } | 69 | } |
126 | *portptr = newport; | 70 | *portptr = newport; |
127 | return 1; | 71 | return true; |
128 | } | 72 | } |
129 | 73 | ||
130 | const struct nf_nat_protocol nf_nat_protocol_udp = { | 74 | const struct nf_nat_protocol nf_nat_protocol_udp = { |
131 | .name = "UDP", | ||
132 | .protonum = IPPROTO_UDP, | 75 | .protonum = IPPROTO_UDP, |
133 | .me = THIS_MODULE, | 76 | .me = THIS_MODULE, |
134 | .manip_pkt = udp_manip_pkt, | 77 | .manip_pkt = udp_manip_pkt, |
135 | .in_range = udp_in_range, | 78 | .in_range = nf_nat_proto_in_range, |
136 | .unique_tuple = udp_unique_tuple, | 79 | .unique_tuple = udp_unique_tuple, |
137 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 80 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
138 | .range_to_nlattr = nf_nat_port_range_to_nlattr, | 81 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, |
139 | .nlattr_to_range = nf_nat_port_nlattr_to_range, | 82 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, |
140 | #endif | 83 | #endif |
141 | }; | 84 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c new file mode 100644 index 000000000000..440a229bbd87 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c | |||
@@ -0,0 +1,99 @@ | |||
1 | /* (C) 1999-2001 Paul `Rusty' Russell | ||
2 | * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> | ||
3 | * (C) 2008 Patrick McHardy <kaber@trash.net> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/ip.h> | ||
13 | #include <linux/udp.h> | ||
14 | |||
15 | #include <linux/netfilter.h> | ||
16 | #include <net/netfilter/nf_nat.h> | ||
17 | #include <net/netfilter/nf_nat_protocol.h> | ||
18 | |||
19 | static u_int16_t udplite_port_rover; | ||
20 | |||
21 | static bool | ||
22 | udplite_unique_tuple(struct nf_conntrack_tuple *tuple, | ||
23 | const struct nf_nat_range *range, | ||
24 | enum nf_nat_manip_type maniptype, | ||
25 | const struct nf_conn *ct) | ||
26 | { | ||
27 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | ||
28 | &udplite_port_rover); | ||
29 | } | ||
30 | |||
31 | static bool | ||
32 | udplite_manip_pkt(struct sk_buff *skb, | ||
33 | unsigned int iphdroff, | ||
34 | const struct nf_conntrack_tuple *tuple, | ||
35 | enum nf_nat_manip_type maniptype) | ||
36 | { | ||
37 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | ||
38 | struct udphdr *hdr; | ||
39 | unsigned int hdroff = iphdroff + iph->ihl*4; | ||
40 | __be32 oldip, newip; | ||
41 | __be16 *portptr, newport; | ||
42 | |||
43 | if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) | ||
44 | return false; | ||
45 | |||
46 | iph = (struct iphdr *)(skb->data + iphdroff); | ||
47 | hdr = (struct udphdr *)(skb->data + hdroff); | ||
48 | |||
49 | if (maniptype == IP_NAT_MANIP_SRC) { | ||
50 | /* Get rid of src ip and src pt */ | ||
51 | oldip = iph->saddr; | ||
52 | newip = tuple->src.u3.ip; | ||
53 | newport = tuple->src.u.udp.port; | ||
54 | portptr = &hdr->source; | ||
55 | } else { | ||
56 | /* Get rid of dst ip and dst pt */ | ||
57 | oldip = iph->daddr; | ||
58 | newip = tuple->dst.u3.ip; | ||
59 | newport = tuple->dst.u.udp.port; | ||
60 | portptr = &hdr->dest; | ||
61 | } | ||
62 | |||
63 | inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); | ||
64 | inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); | ||
65 | if (!hdr->check) | ||
66 | hdr->check = CSUM_MANGLED_0; | ||
67 | |||
68 | *portptr = newport; | ||
69 | return true; | ||
70 | } | ||
71 | |||
72 | static const struct nf_nat_protocol nf_nat_protocol_udplite = { | ||
73 | .protonum = IPPROTO_UDPLITE, | ||
74 | .me = THIS_MODULE, | ||
75 | .manip_pkt = udplite_manip_pkt, | ||
76 | .in_range = nf_nat_proto_in_range, | ||
77 | .unique_tuple = udplite_unique_tuple, | ||
78 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | ||
79 | .range_to_nlattr = nf_nat_proto_range_to_nlattr, | ||
80 | .nlattr_to_range = nf_nat_proto_nlattr_to_range, | ||
81 | #endif | ||
82 | }; | ||
83 | |||
84 | static int __init nf_nat_proto_udplite_init(void) | ||
85 | { | ||
86 | return nf_nat_protocol_register(&nf_nat_protocol_udplite); | ||
87 | } | ||
88 | |||
89 | static void __exit nf_nat_proto_udplite_fini(void) | ||
90 | { | ||
91 | nf_nat_protocol_unregister(&nf_nat_protocol_udplite); | ||
92 | } | ||
93 | |||
94 | module_init(nf_nat_proto_udplite_init); | ||
95 | module_exit(nf_nat_proto_udplite_fini); | ||
96 | |||
97 | MODULE_LICENSE("GPL"); | ||
98 | MODULE_DESCRIPTION("UDP-Lite NAT protocol helper"); | ||
99 | MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index a26efeb073cb..14381c62acea 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c | |||
@@ -18,35 +18,34 @@ | |||
18 | #include <net/netfilter/nf_nat_rule.h> | 18 | #include <net/netfilter/nf_nat_rule.h> |
19 | #include <net/netfilter/nf_nat_protocol.h> | 19 | #include <net/netfilter/nf_nat_protocol.h> |
20 | 20 | ||
21 | static int unknown_in_range(const struct nf_conntrack_tuple *tuple, | 21 | static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, |
22 | enum nf_nat_manip_type manip_type, | 22 | enum nf_nat_manip_type manip_type, |
23 | const union nf_conntrack_man_proto *min, | 23 | const union nf_conntrack_man_proto *min, |
24 | const union nf_conntrack_man_proto *max) | 24 | const union nf_conntrack_man_proto *max) |
25 | { | 25 | { |
26 | return 1; | 26 | return true; |
27 | } | 27 | } |
28 | 28 | ||
29 | static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple, | 29 | static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, |
30 | const struct nf_nat_range *range, | 30 | const struct nf_nat_range *range, |
31 | enum nf_nat_manip_type maniptype, | 31 | enum nf_nat_manip_type maniptype, |
32 | const struct nf_conn *ct) | 32 | const struct nf_conn *ct) |
33 | { | 33 | { |
34 | /* Sorry: we can't help you; if it's not unique, we can't frob | 34 | /* Sorry: we can't help you; if it's not unique, we can't frob |
35 | anything. */ | 35 | anything. */ |
36 | return 0; | 36 | return false; |
37 | } | 37 | } |
38 | 38 | ||
39 | static int | 39 | static bool |
40 | unknown_manip_pkt(struct sk_buff *skb, | 40 | unknown_manip_pkt(struct sk_buff *skb, |
41 | unsigned int iphdroff, | 41 | unsigned int iphdroff, |
42 | const struct nf_conntrack_tuple *tuple, | 42 | const struct nf_conntrack_tuple *tuple, |
43 | enum nf_nat_manip_type maniptype) | 43 | enum nf_nat_manip_type maniptype) |
44 | { | 44 | { |
45 | return 1; | 45 | return true; |
46 | } | 46 | } |
47 | 47 | ||
48 | const struct nf_nat_protocol nf_nat_unknown_protocol = { | 48 | const struct nf_nat_protocol nf_nat_unknown_protocol = { |
49 | .name = "unknown", | ||
50 | /* .me isn't set: getting a ref to this cannot fail. */ | 49 | /* .me isn't set: getting a ref to this cannot fail. */ |
51 | .manip_pkt = unknown_manip_pkt, | 50 | .manip_pkt = unknown_manip_pkt, |
52 | .in_range = unknown_in_range, | 51 | .in_range = unknown_in_range, |
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index f8fda57ba20b..e8b4d0d4439e 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
@@ -61,7 +61,7 @@ static struct | |||
61 | static struct xt_table __nat_table = { | 61 | static struct xt_table __nat_table = { |
62 | .name = "nat", | 62 | .name = "nat", |
63 | .valid_hooks = NAT_VALID_HOOKS, | 63 | .valid_hooks = NAT_VALID_HOOKS, |
64 | .lock = RW_LOCK_UNLOCKED, | 64 | .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), |
65 | .me = THIS_MODULE, | 65 | .me = THIS_MODULE, |
66 | .af = AF_INET, | 66 | .af = AF_INET, |
67 | }; | 67 | }; |
@@ -143,7 +143,7 @@ static bool ipt_snat_checkentry(const char *tablename, | |||
143 | void *targinfo, | 143 | void *targinfo, |
144 | unsigned int hook_mask) | 144 | unsigned int hook_mask) |
145 | { | 145 | { |
146 | struct nf_nat_multi_range_compat *mr = targinfo; | 146 | const struct nf_nat_multi_range_compat *mr = targinfo; |
147 | 147 | ||
148 | /* Must be a valid range */ | 148 | /* Must be a valid range */ |
149 | if (mr->rangesize != 1) { | 149 | if (mr->rangesize != 1) { |
@@ -159,7 +159,7 @@ static bool ipt_dnat_checkentry(const char *tablename, | |||
159 | void *targinfo, | 159 | void *targinfo, |
160 | unsigned int hook_mask) | 160 | unsigned int hook_mask) |
161 | { | 161 | { |
162 | struct nf_nat_multi_range_compat *mr = targinfo; | 162 | const struct nf_nat_multi_range_compat *mr = targinfo; |
163 | 163 | ||
164 | /* Must be a valid range */ | 164 | /* Must be a valid range */ |
165 | if (mr->rangesize != 1) { | 165 | if (mr->rangesize != 1) { |
@@ -188,25 +188,6 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) | |||
188 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); | 188 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); |
189 | } | 189 | } |
190 | 190 | ||
191 | unsigned int | ||
192 | alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum) | ||
193 | { | ||
194 | __be32 ip | ||
195 | = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC | ||
196 | ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip | ||
197 | : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); | ||
198 | __be16 all | ||
199 | = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC | ||
200 | ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all | ||
201 | : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all); | ||
202 | struct nf_nat_range range | ||
203 | = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } }; | ||
204 | |||
205 | pr_debug("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n", | ||
206 | ct, NIPQUAD(ip)); | ||
207 | return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); | ||
208 | } | ||
209 | |||
210 | int nf_nat_rule_find(struct sk_buff *skb, | 191 | int nf_nat_rule_find(struct sk_buff *skb, |
211 | unsigned int hooknum, | 192 | unsigned int hooknum, |
212 | const struct net_device *in, | 193 | const struct net_device *in, |
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index b4c8d4968bb2..4334d5cabc5b 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c | |||
@@ -2,6 +2,8 @@ | |||
2 | * | 2 | * |
3 | * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> | 3 | * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar> |
4 | * based on RR's ip_nat_ftp.c and other modules. | 4 | * based on RR's ip_nat_ftp.c and other modules. |
5 | * (C) 2007 United Security Providers | ||
6 | * (C) 2007, 2008 Patrick McHardy <kaber@trash.net> | ||
5 | * | 7 | * |
6 | * This program is free software; you can redistribute it and/or modify | 8 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License version 2 as | 9 | * it under the terms of the GNU General Public License version 2 as |
@@ -26,275 +28,461 @@ MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>"); | |||
26 | MODULE_DESCRIPTION("SIP NAT helper"); | 28 | MODULE_DESCRIPTION("SIP NAT helper"); |
27 | MODULE_ALIAS("ip_nat_sip"); | 29 | MODULE_ALIAS("ip_nat_sip"); |
28 | 30 | ||
29 | struct addr_map { | ||
30 | struct { | ||
31 | char src[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | ||
32 | char dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | ||
33 | unsigned int srclen, srciplen; | ||
34 | unsigned int dstlen, dstiplen; | ||
35 | } addr[IP_CT_DIR_MAX]; | ||
36 | }; | ||
37 | 31 | ||
38 | static void addr_map_init(const struct nf_conn *ct, struct addr_map *map) | 32 | static unsigned int mangle_packet(struct sk_buff *skb, |
33 | const char **dptr, unsigned int *datalen, | ||
34 | unsigned int matchoff, unsigned int matchlen, | ||
35 | const char *buffer, unsigned int buflen) | ||
39 | { | 36 | { |
40 | const struct nf_conntrack_tuple *t; | 37 | enum ip_conntrack_info ctinfo; |
41 | enum ip_conntrack_dir dir; | 38 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
42 | unsigned int n; | 39 | |
43 | 40 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, | |
44 | for (dir = 0; dir < IP_CT_DIR_MAX; dir++) { | 41 | buffer, buflen)) |
45 | t = &ct->tuplehash[dir].tuple; | 42 | return 0; |
46 | 43 | ||
47 | n = sprintf(map->addr[dir].src, "%u.%u.%u.%u", | 44 | /* Reload data pointer and adjust datalen value */ |
48 | NIPQUAD(t->src.u3.ip)); | 45 | *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); |
49 | map->addr[dir].srciplen = n; | 46 | *datalen += buflen - matchlen; |
50 | n += sprintf(map->addr[dir].src + n, ":%u", | 47 | return 1; |
51 | ntohs(t->src.u.udp.port)); | ||
52 | map->addr[dir].srclen = n; | ||
53 | |||
54 | n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u", | ||
55 | NIPQUAD(t->dst.u3.ip)); | ||
56 | map->addr[dir].dstiplen = n; | ||
57 | n += sprintf(map->addr[dir].dst + n, ":%u", | ||
58 | ntohs(t->dst.u.udp.port)); | ||
59 | map->addr[dir].dstlen = n; | ||
60 | } | ||
61 | } | 48 | } |
62 | 49 | ||
63 | static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, | 50 | static int map_addr(struct sk_buff *skb, |
64 | struct nf_conn *ct, const char **dptr, size_t dlen, | 51 | const char **dptr, unsigned int *datalen, |
65 | enum sip_header_pos pos, struct addr_map *map) | 52 | unsigned int matchoff, unsigned int matchlen, |
53 | union nf_inet_addr *addr, __be16 port) | ||
66 | { | 54 | { |
55 | enum ip_conntrack_info ctinfo; | ||
56 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
67 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 57 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
68 | unsigned int matchlen, matchoff, addrlen; | 58 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; |
69 | char *addr; | 59 | unsigned int buflen; |
70 | 60 | __be32 newaddr; | |
71 | if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) | 61 | __be16 newport; |
62 | |||
63 | if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip && | ||
64 | ct->tuplehash[dir].tuple.src.u.udp.port == port) { | ||
65 | newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip; | ||
66 | newport = ct->tuplehash[!dir].tuple.dst.u.udp.port; | ||
67 | } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip && | ||
68 | ct->tuplehash[dir].tuple.dst.u.udp.port == port) { | ||
69 | newaddr = ct->tuplehash[!dir].tuple.src.u3.ip; | ||
70 | newport = ct->tuplehash[!dir].tuple.src.u.udp.port; | ||
71 | } else | ||
72 | return 1; | 72 | return 1; |
73 | 73 | ||
74 | if ((matchlen == map->addr[dir].srciplen || | 74 | if (newaddr == addr->ip && newport == port) |
75 | matchlen == map->addr[dir].srclen) && | ||
76 | memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) { | ||
77 | addr = map->addr[!dir].dst; | ||
78 | addrlen = map->addr[!dir].dstlen; | ||
79 | } else if ((matchlen == map->addr[dir].dstiplen || | ||
80 | matchlen == map->addr[dir].dstlen) && | ||
81 | memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) { | ||
82 | addr = map->addr[!dir].src; | ||
83 | addrlen = map->addr[!dir].srclen; | ||
84 | } else | ||
85 | return 1; | 75 | return 1; |
86 | 76 | ||
87 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, | 77 | buflen = sprintf(buffer, "%u.%u.%u.%u:%u", |
88 | matchoff, matchlen, addr, addrlen)) | 78 | NIPQUAD(newaddr), ntohs(newport)); |
89 | return 0; | ||
90 | *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); | ||
91 | return 1; | ||
92 | 79 | ||
80 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, | ||
81 | buffer, buflen); | ||
93 | } | 82 | } |
94 | 83 | ||
95 | static unsigned int ip_nat_sip(struct sk_buff *skb, | 84 | static int map_sip_addr(struct sk_buff *skb, |
96 | enum ip_conntrack_info ctinfo, | 85 | const char **dptr, unsigned int *datalen, |
97 | struct nf_conn *ct, | 86 | enum sip_header_types type) |
98 | const char **dptr) | ||
99 | { | 87 | { |
100 | enum sip_header_pos pos; | 88 | enum ip_conntrack_info ctinfo; |
101 | struct addr_map map; | 89 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
102 | int dataoff, datalen; | 90 | unsigned int matchlen, matchoff; |
91 | union nf_inet_addr addr; | ||
92 | __be16 port; | ||
103 | 93 | ||
104 | dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); | 94 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, |
105 | datalen = skb->len - dataoff; | 95 | &matchoff, &matchlen, &addr, &port) <= 0) |
106 | if (datalen < sizeof("SIP/2.0") - 1) | 96 | return 1; |
107 | return NF_ACCEPT; | 97 | return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port); |
98 | } | ||
108 | 99 | ||
109 | addr_map_init(ct, &map); | 100 | static unsigned int ip_nat_sip(struct sk_buff *skb, |
101 | const char **dptr, unsigned int *datalen) | ||
102 | { | ||
103 | enum ip_conntrack_info ctinfo; | ||
104 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
105 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | ||
106 | unsigned int dataoff, matchoff, matchlen; | ||
107 | union nf_inet_addr addr; | ||
108 | __be16 port; | ||
109 | int request, in_header; | ||
110 | 110 | ||
111 | /* Basic rules: requests and responses. */ | 111 | /* Basic rules: requests and responses. */ |
112 | if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) { | 112 | if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) { |
113 | /* 10.2: Constructing the REGISTER Request: | 113 | if (ct_sip_parse_request(ct, *dptr, *datalen, |
114 | * | 114 | &matchoff, &matchlen, |
115 | * The "userinfo" and "@" components of the SIP URI MUST NOT | 115 | &addr, &port) > 0 && |
116 | * be present. | 116 | !map_addr(skb, dptr, datalen, matchoff, matchlen, |
117 | */ | 117 | &addr, port)) |
118 | if (datalen >= sizeof("REGISTER") - 1 && | 118 | return NF_DROP; |
119 | strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0) | 119 | request = 1; |
120 | pos = POS_REG_REQ_URI; | 120 | } else |
121 | else | 121 | request = 0; |
122 | pos = POS_REQ_URI; | 122 | |
123 | 123 | /* Translate topmost Via header and parameters */ | |
124 | if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map)) | 124 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, |
125 | SIP_HDR_VIA, NULL, &matchoff, &matchlen, | ||
126 | &addr, &port) > 0) { | ||
127 | unsigned int matchend, poff, plen, buflen, n; | ||
128 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | ||
129 | |||
130 | /* We're only interested in headers related to this | ||
131 | * connection */ | ||
132 | if (request) { | ||
133 | if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip || | ||
134 | port != ct->tuplehash[dir].tuple.src.u.udp.port) | ||
135 | goto next; | ||
136 | } else { | ||
137 | if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip || | ||
138 | port != ct->tuplehash[dir].tuple.dst.u.udp.port) | ||
139 | goto next; | ||
140 | } | ||
141 | |||
142 | if (!map_addr(skb, dptr, datalen, matchoff, matchlen, | ||
143 | &addr, port)) | ||
125 | return NF_DROP; | 144 | return NF_DROP; |
145 | |||
146 | matchend = matchoff + matchlen; | ||
147 | |||
148 | /* The maddr= parameter (RFC 2361) specifies where to send | ||
149 | * the reply. */ | ||
150 | if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, | ||
151 | "maddr=", &poff, &plen, | ||
152 | &addr) > 0 && | ||
153 | addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && | ||
154 | addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { | ||
155 | __be32 ip = ct->tuplehash[!dir].tuple.dst.u3.ip; | ||
156 | buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); | ||
157 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | ||
158 | buffer, buflen)) | ||
159 | return NF_DROP; | ||
160 | } | ||
161 | |||
162 | /* The received= parameter (RFC 2361) contains the address | ||
163 | * from which the server received the request. */ | ||
164 | if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, | ||
165 | "received=", &poff, &plen, | ||
166 | &addr) > 0 && | ||
167 | addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && | ||
168 | addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { | ||
169 | __be32 ip = ct->tuplehash[!dir].tuple.src.u3.ip; | ||
170 | buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); | ||
171 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | ||
172 | buffer, buflen)) | ||
173 | return NF_DROP; | ||
174 | } | ||
175 | |||
176 | /* The rport= parameter (RFC 3581) contains the port number | ||
177 | * from which the server received the request. */ | ||
178 | if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen, | ||
179 | "rport=", &poff, &plen, | ||
180 | &n) > 0 && | ||
181 | htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port && | ||
182 | htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { | ||
183 | __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; | ||
184 | buflen = sprintf(buffer, "%u", ntohs(p)); | ||
185 | if (!mangle_packet(skb, dptr, datalen, poff, plen, | ||
186 | buffer, buflen)) | ||
187 | return NF_DROP; | ||
188 | } | ||
126 | } | 189 | } |
127 | 190 | ||
128 | if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || | 191 | next: |
129 | !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) || | 192 | /* Translate Contact headers */ |
130 | !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || | 193 | dataoff = 0; |
131 | !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) | 194 | in_header = 0; |
195 | while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen, | ||
196 | SIP_HDR_CONTACT, &in_header, | ||
197 | &matchoff, &matchlen, | ||
198 | &addr, &port) > 0) { | ||
199 | if (!map_addr(skb, dptr, datalen, matchoff, matchlen, | ||
200 | &addr, port)) | ||
201 | return NF_DROP; | ||
202 | } | ||
203 | |||
204 | if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) || | ||
205 | !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO)) | ||
132 | return NF_DROP; | 206 | return NF_DROP; |
133 | return NF_ACCEPT; | 207 | return NF_ACCEPT; |
134 | } | 208 | } |
135 | 209 | ||
136 | static unsigned int mangle_sip_packet(struct sk_buff *skb, | 210 | /* Handles expected signalling connections and media streams */ |
137 | enum ip_conntrack_info ctinfo, | 211 | static void ip_nat_sip_expected(struct nf_conn *ct, |
138 | struct nf_conn *ct, | 212 | struct nf_conntrack_expect *exp) |
139 | const char **dptr, size_t dlen, | ||
140 | char *buffer, int bufflen, | ||
141 | enum sip_header_pos pos) | ||
142 | { | 213 | { |
143 | unsigned int matchlen, matchoff; | 214 | struct nf_nat_range range; |
144 | 215 | ||
145 | if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) | 216 | /* This must be a fresh one. */ |
146 | return 0; | 217 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); |
147 | 218 | ||
148 | if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, | 219 | /* For DST manip, map port here to where it's expected. */ |
149 | matchoff, matchlen, buffer, bufflen)) | 220 | range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); |
150 | return 0; | 221 | range.min = range.max = exp->saved_proto; |
222 | range.min_ip = range.max_ip = exp->saved_ip; | ||
223 | nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); | ||
151 | 224 | ||
152 | /* We need to reload this. Thanks Patrick. */ | 225 | /* Change src to where master sends to, but only if the connection |
153 | *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); | 226 | * actually came from the same source. */ |
154 | return 1; | 227 | if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == |
228 | ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { | ||
229 | range.flags = IP_NAT_RANGE_MAP_IPS; | ||
230 | range.min_ip = range.max_ip | ||
231 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; | ||
232 | nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); | ||
233 | } | ||
155 | } | 234 | } |
156 | 235 | ||
157 | static int mangle_content_len(struct sk_buff *skb, | 236 | static unsigned int ip_nat_sip_expect(struct sk_buff *skb, |
158 | enum ip_conntrack_info ctinfo, | 237 | const char **dptr, unsigned int *datalen, |
159 | struct nf_conn *ct, | 238 | struct nf_conntrack_expect *exp, |
160 | const char *dptr) | 239 | unsigned int matchoff, |
240 | unsigned int matchlen) | ||
161 | { | 241 | { |
162 | unsigned int dataoff, matchoff, matchlen; | 242 | enum ip_conntrack_info ctinfo; |
163 | char buffer[sizeof("65536")]; | 243 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
164 | int bufflen; | 244 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
245 | __be32 newip; | ||
246 | u_int16_t port; | ||
247 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | ||
248 | unsigned buflen; | ||
165 | 249 | ||
166 | dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); | 250 | /* Connection will come from reply */ |
251 | if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip) | ||
252 | newip = exp->tuple.dst.u3.ip; | ||
253 | else | ||
254 | newip = ct->tuplehash[!dir].tuple.dst.u3.ip; | ||
167 | 255 | ||
168 | /* Get actual SDP length */ | 256 | /* If the signalling port matches the connection's source port in the |
169 | if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, | 257 | * original direction, try to use the destination port in the opposite |
170 | &matchlen, POS_SDP_HEADER) > 0) { | 258 | * direction. */ |
259 | if (exp->tuple.dst.u.udp.port == | ||
260 | ct->tuplehash[dir].tuple.src.u.udp.port) | ||
261 | port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port); | ||
262 | else | ||
263 | port = ntohs(exp->tuple.dst.u.udp.port); | ||
264 | |||
265 | exp->saved_ip = exp->tuple.dst.u3.ip; | ||
266 | exp->tuple.dst.u3.ip = newip; | ||
267 | exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; | ||
268 | exp->dir = !dir; | ||
269 | exp->expectfn = ip_nat_sip_expected; | ||
171 | 270 | ||
172 | /* since ct_sip_get_info() give us a pointer passing 'v=' | 271 | for (; port != 0; port++) { |
173 | we need to add 2 bytes in this count. */ | 272 | exp->tuple.dst.u.udp.port = htons(port); |
174 | int c_len = skb->len - dataoff - matchoff + 2; | 273 | if (nf_ct_expect_related(exp) == 0) |
274 | break; | ||
275 | } | ||
175 | 276 | ||
176 | /* Now, update SDP length */ | 277 | if (port == 0) |
177 | if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, | 278 | return NF_DROP; |
178 | &matchlen, POS_CONTENT) > 0) { | ||
179 | 279 | ||
180 | bufflen = sprintf(buffer, "%u", c_len); | 280 | if (exp->tuple.dst.u3.ip != exp->saved_ip || |
181 | return nf_nat_mangle_udp_packet(skb, ct, ctinfo, | 281 | exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { |
182 | matchoff, matchlen, | 282 | buflen = sprintf(buffer, "%u.%u.%u.%u:%u", |
183 | buffer, bufflen); | 283 | NIPQUAD(newip), port); |
184 | } | 284 | if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, |
285 | buffer, buflen)) | ||
286 | goto err; | ||
185 | } | 287 | } |
186 | return 0; | 288 | return NF_ACCEPT; |
289 | |||
290 | err: | ||
291 | nf_ct_unexpect_related(exp); | ||
292 | return NF_DROP; | ||
187 | } | 293 | } |
188 | 294 | ||
189 | static unsigned int mangle_sdp(struct sk_buff *skb, | 295 | static int mangle_content_len(struct sk_buff *skb, |
190 | enum ip_conntrack_info ctinfo, | 296 | const char **dptr, unsigned int *datalen) |
191 | struct nf_conn *ct, | ||
192 | __be32 newip, u_int16_t port, | ||
193 | const char *dptr) | ||
194 | { | 297 | { |
195 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; | 298 | enum ip_conntrack_info ctinfo; |
196 | unsigned int dataoff, bufflen; | 299 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); |
300 | unsigned int matchoff, matchlen; | ||
301 | char buffer[sizeof("65536")]; | ||
302 | int buflen, c_len; | ||
197 | 303 | ||
198 | dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); | 304 | /* Get actual SDP length */ |
305 | if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, | ||
306 | SDP_HDR_VERSION, SDP_HDR_UNSPEC, | ||
307 | &matchoff, &matchlen) <= 0) | ||
308 | return 0; | ||
309 | c_len = *datalen - matchoff + strlen("v="); | ||
199 | 310 | ||
200 | /* Mangle owner and contact info. */ | 311 | /* Now, update SDP length */ |
201 | bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); | 312 | if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH, |
202 | if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, | 313 | &matchoff, &matchlen) <= 0) |
203 | buffer, bufflen, POS_OWNER_IP4)) | ||
204 | return 0; | 314 | return 0; |
205 | 315 | ||
206 | if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, | 316 | buflen = sprintf(buffer, "%u", c_len); |
207 | buffer, bufflen, POS_CONNECTION_IP4)) | 317 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, |
318 | buffer, buflen); | ||
319 | } | ||
320 | |||
321 | static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, | ||
322 | unsigned int dataoff, unsigned int *datalen, | ||
323 | enum sdp_header_types type, | ||
324 | enum sdp_header_types term, | ||
325 | char *buffer, int buflen) | ||
326 | { | ||
327 | enum ip_conntrack_info ctinfo; | ||
328 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
329 | unsigned int matchlen, matchoff; | ||
330 | |||
331 | if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, | ||
332 | &matchoff, &matchlen) <= 0) | ||
208 | return 0; | 333 | return 0; |
334 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, | ||
335 | buffer, buflen); | ||
336 | } | ||
209 | 337 | ||
210 | /* Mangle media port. */ | 338 | static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, |
211 | bufflen = sprintf(buffer, "%u", port); | 339 | unsigned int dataoff, |
212 | if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, | 340 | unsigned int *datalen, |
213 | buffer, bufflen, POS_MEDIA)) | 341 | enum sdp_header_types type, |
342 | enum sdp_header_types term, | ||
343 | const union nf_inet_addr *addr) | ||
344 | { | ||
345 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; | ||
346 | unsigned int buflen; | ||
347 | |||
348 | buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip)); | ||
349 | if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, | ||
350 | buffer, buflen)) | ||
214 | return 0; | 351 | return 0; |
215 | 352 | ||
216 | return mangle_content_len(skb, ctinfo, ct, dptr); | 353 | return mangle_content_len(skb, dptr, datalen); |
217 | } | 354 | } |
218 | 355 | ||
219 | static void ip_nat_sdp_expect(struct nf_conn *ct, | 356 | static unsigned int ip_nat_sdp_port(struct sk_buff *skb, |
220 | struct nf_conntrack_expect *exp) | 357 | const char **dptr, |
358 | unsigned int *datalen, | ||
359 | unsigned int matchoff, | ||
360 | unsigned int matchlen, | ||
361 | u_int16_t port) | ||
221 | { | 362 | { |
222 | struct nf_nat_range range; | 363 | char buffer[sizeof("nnnnn")]; |
364 | unsigned int buflen; | ||
223 | 365 | ||
224 | /* This must be a fresh one. */ | 366 | buflen = sprintf(buffer, "%u", port); |
225 | BUG_ON(ct->status & IPS_NAT_DONE_MASK); | 367 | if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, |
368 | buffer, buflen)) | ||
369 | return 0; | ||
226 | 370 | ||
227 | /* Change src to where master sends to */ | 371 | return mangle_content_len(skb, dptr, datalen); |
228 | range.flags = IP_NAT_RANGE_MAP_IPS; | 372 | } |
229 | range.min_ip = range.max_ip | ||
230 | = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; | ||
231 | nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); | ||
232 | 373 | ||
233 | /* For DST manip, map port here to where it's expected. */ | 374 | static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, |
234 | range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); | 375 | unsigned int dataoff, |
235 | range.min = range.max = exp->saved_proto; | 376 | unsigned int *datalen, |
236 | range.min_ip = range.max_ip = exp->saved_ip; | 377 | const union nf_inet_addr *addr) |
237 | nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); | 378 | { |
379 | char buffer[sizeof("nnn.nnn.nnn.nnn")]; | ||
380 | unsigned int buflen; | ||
381 | |||
382 | /* Mangle session description owner and contact addresses */ | ||
383 | buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip)); | ||
384 | if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, | ||
385 | SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, | ||
386 | buffer, buflen)) | ||
387 | return 0; | ||
388 | |||
389 | if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, | ||
390 | SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, | ||
391 | buffer, buflen)) | ||
392 | return 0; | ||
393 | |||
394 | return mangle_content_len(skb, dptr, datalen); | ||
238 | } | 395 | } |
239 | 396 | ||
240 | /* So, this packet has hit the connection tracking matching code. | 397 | /* So, this packet has hit the connection tracking matching code. |
241 | Mangle it, and change the expectation to match the new version. */ | 398 | Mangle it, and change the expectation to match the new version. */ |
242 | static unsigned int ip_nat_sdp(struct sk_buff *skb, | 399 | static unsigned int ip_nat_sdp_media(struct sk_buff *skb, |
243 | enum ip_conntrack_info ctinfo, | 400 | const char **dptr, |
244 | struct nf_conntrack_expect *exp, | 401 | unsigned int *datalen, |
245 | const char *dptr) | 402 | struct nf_conntrack_expect *rtp_exp, |
403 | struct nf_conntrack_expect *rtcp_exp, | ||
404 | unsigned int mediaoff, | ||
405 | unsigned int medialen, | ||
406 | union nf_inet_addr *rtp_addr) | ||
246 | { | 407 | { |
247 | struct nf_conn *ct = exp->master; | 408 | enum ip_conntrack_info ctinfo; |
409 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
248 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); | 410 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
249 | __be32 newip; | ||
250 | u_int16_t port; | 411 | u_int16_t port; |
251 | 412 | ||
252 | /* Connection will come from reply */ | 413 | /* Connection will come from reply */ |
253 | if (ct->tuplehash[dir].tuple.src.u3.ip == | 414 | if (ct->tuplehash[dir].tuple.src.u3.ip == |
254 | ct->tuplehash[!dir].tuple.dst.u3.ip) | 415 | ct->tuplehash[!dir].tuple.dst.u3.ip) |
255 | newip = exp->tuple.dst.u3.ip; | 416 | rtp_addr->ip = rtp_exp->tuple.dst.u3.ip; |
256 | else | 417 | else |
257 | newip = ct->tuplehash[!dir].tuple.dst.u3.ip; | 418 | rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip; |
258 | 419 | ||
259 | exp->saved_ip = exp->tuple.dst.u3.ip; | 420 | rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip; |
260 | exp->tuple.dst.u3.ip = newip; | 421 | rtp_exp->tuple.dst.u3.ip = rtp_addr->ip; |
261 | exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; | 422 | rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; |
262 | exp->dir = !dir; | 423 | rtp_exp->dir = !dir; |
263 | 424 | rtp_exp->expectfn = ip_nat_sip_expected; | |
264 | /* When you see the packet, we need to NAT it the same as the | 425 | |
265 | this one. */ | 426 | rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip; |
266 | exp->expectfn = ip_nat_sdp_expect; | 427 | rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip; |
267 | 428 | rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; | |
268 | /* Try to get same port: if not, try to change it. */ | 429 | rtcp_exp->dir = !dir; |
269 | for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { | 430 | rtcp_exp->expectfn = ip_nat_sip_expected; |
270 | exp->tuple.dst.u.udp.port = htons(port); | 431 | |
271 | if (nf_ct_expect_related(exp) == 0) | 432 | /* Try to get same pair of ports: if not, try to change them. */ |
433 | for (port = ntohs(rtp_exp->tuple.dst.u.udp.port); | ||
434 | port != 0; port += 2) { | ||
435 | rtp_exp->tuple.dst.u.udp.port = htons(port); | ||
436 | if (nf_ct_expect_related(rtp_exp) != 0) | ||
437 | continue; | ||
438 | rtcp_exp->tuple.dst.u.udp.port = htons(port + 1); | ||
439 | if (nf_ct_expect_related(rtcp_exp) == 0) | ||
272 | break; | 440 | break; |
441 | nf_ct_unexpect_related(rtp_exp); | ||
273 | } | 442 | } |
274 | 443 | ||
275 | if (port == 0) | 444 | if (port == 0) |
276 | return NF_DROP; | 445 | goto err1; |
446 | |||
447 | /* Update media port. */ | ||
448 | if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && | ||
449 | !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port)) | ||
450 | goto err2; | ||
277 | 451 | ||
278 | if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) { | ||
279 | nf_ct_unexpect_related(exp); | ||
280 | return NF_DROP; | ||
281 | } | ||
282 | return NF_ACCEPT; | 452 | return NF_ACCEPT; |
453 | |||
454 | err2: | ||
455 | nf_ct_unexpect_related(rtp_exp); | ||
456 | nf_ct_unexpect_related(rtcp_exp); | ||
457 | err1: | ||
458 | return NF_DROP; | ||
283 | } | 459 | } |
284 | 460 | ||
285 | static void __exit nf_nat_sip_fini(void) | 461 | static void __exit nf_nat_sip_fini(void) |
286 | { | 462 | { |
287 | rcu_assign_pointer(nf_nat_sip_hook, NULL); | 463 | rcu_assign_pointer(nf_nat_sip_hook, NULL); |
288 | rcu_assign_pointer(nf_nat_sdp_hook, NULL); | 464 | rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); |
465 | rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); | ||
466 | rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); | ||
467 | rcu_assign_pointer(nf_nat_sdp_session_hook, NULL); | ||
468 | rcu_assign_pointer(nf_nat_sdp_media_hook, NULL); | ||
289 | synchronize_rcu(); | 469 | synchronize_rcu(); |
290 | } | 470 | } |
291 | 471 | ||
292 | static int __init nf_nat_sip_init(void) | 472 | static int __init nf_nat_sip_init(void) |
293 | { | 473 | { |
294 | BUG_ON(nf_nat_sip_hook != NULL); | 474 | BUG_ON(nf_nat_sip_hook != NULL); |
295 | BUG_ON(nf_nat_sdp_hook != NULL); | 475 | BUG_ON(nf_nat_sip_expect_hook != NULL); |
476 | BUG_ON(nf_nat_sdp_addr_hook != NULL); | ||
477 | BUG_ON(nf_nat_sdp_port_hook != NULL); | ||
478 | BUG_ON(nf_nat_sdp_session_hook != NULL); | ||
479 | BUG_ON(nf_nat_sdp_media_hook != NULL); | ||
296 | rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); | 480 | rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); |
297 | rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp); | 481 | rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); |
482 | rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); | ||
483 | rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); | ||
484 | rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session); | ||
485 | rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media); | ||
298 | return 0; | 486 | return 0; |
299 | } | 487 | } |
300 | 488 | ||
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 540ce6ae887c..5daefad3d193 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <net/udp.h> | 50 | #include <net/udp.h> |
51 | 51 | ||
52 | #include <net/netfilter/nf_nat.h> | 52 | #include <net/netfilter/nf_nat.h> |
53 | #include <net/netfilter/nf_conntrack_expect.h> | ||
53 | #include <net/netfilter/nf_conntrack_helper.h> | 54 | #include <net/netfilter/nf_conntrack_helper.h> |
54 | #include <net/netfilter/nf_nat_helper.h> | 55 | #include <net/netfilter/nf_nat_helper.h> |
55 | 56 | ||
@@ -219,7 +220,7 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx, | |||
219 | if (ch < 0x80) | 220 | if (ch < 0x80) |
220 | *len = ch; | 221 | *len = ch; |
221 | else { | 222 | else { |
222 | cnt = (unsigned char) (ch & 0x7F); | 223 | cnt = ch & 0x7F; |
223 | *len = 0; | 224 | *len = 0; |
224 | 225 | ||
225 | while (cnt > 0) { | 226 | while (cnt > 0) { |
@@ -617,8 +618,7 @@ struct snmp_cnv | |||
617 | int syntax; | 618 | int syntax; |
618 | }; | 619 | }; |
619 | 620 | ||
620 | static struct snmp_cnv snmp_conv [] = | 621 | static const struct snmp_cnv snmp_conv[] = { |
621 | { | ||
622 | {ASN1_UNI, ASN1_NUL, SNMP_NULL}, | 622 | {ASN1_UNI, ASN1_NUL, SNMP_NULL}, |
623 | {ASN1_UNI, ASN1_INT, SNMP_INTEGER}, | 623 | {ASN1_UNI, ASN1_INT, SNMP_INTEGER}, |
624 | {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR}, | 624 | {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR}, |
@@ -643,7 +643,7 @@ static unsigned char snmp_tag_cls2syntax(unsigned int tag, | |||
643 | unsigned int cls, | 643 | unsigned int cls, |
644 | unsigned short *syntax) | 644 | unsigned short *syntax) |
645 | { | 645 | { |
646 | struct snmp_cnv *cnv; | 646 | const struct snmp_cnv *cnv; |
647 | 647 | ||
648 | cnv = snmp_conv; | 648 | cnv = snmp_conv; |
649 | 649 | ||
@@ -903,7 +903,7 @@ static inline void mangle_address(unsigned char *begin, | |||
903 | u_int32_t old; | 903 | u_int32_t old; |
904 | 904 | ||
905 | if (debug) | 905 | if (debug) |
906 | memcpy(&old, (unsigned char *)addr, sizeof(old)); | 906 | memcpy(&old, addr, sizeof(old)); |
907 | 907 | ||
908 | *addr = map->to; | 908 | *addr = map->to; |
909 | 909 | ||
@@ -998,7 +998,7 @@ err_id_free: | |||
998 | * | 998 | * |
999 | *****************************************************************************/ | 999 | *****************************************************************************/ |
1000 | 1000 | ||
1001 | static void hex_dump(unsigned char *buf, size_t len) | 1001 | static void hex_dump(const unsigned char *buf, size_t len) |
1002 | { | 1002 | { |
1003 | size_t i; | 1003 | size_t i; |
1004 | 1004 | ||
@@ -1079,7 +1079,7 @@ static int snmp_parse_mangle(unsigned char *msg, | |||
1079 | if (cls != ASN1_CTX || con != ASN1_CON) | 1079 | if (cls != ASN1_CTX || con != ASN1_CON) |
1080 | return 0; | 1080 | return 0; |
1081 | if (debug > 1) { | 1081 | if (debug > 1) { |
1082 | unsigned char *pdus[] = { | 1082 | static const unsigned char *const pdus[] = { |
1083 | [SNMP_PDU_GET] = "get", | 1083 | [SNMP_PDU_GET] = "get", |
1084 | [SNMP_PDU_NEXT] = "get-next", | 1084 | [SNMP_PDU_NEXT] = "get-next", |
1085 | [SNMP_PDU_RESPONSE] = "response", | 1085 | [SNMP_PDU_RESPONSE] = "response", |
@@ -1231,8 +1231,8 @@ static int help(struct sk_buff *skb, unsigned int protoff, | |||
1231 | { | 1231 | { |
1232 | int dir = CTINFO2DIR(ctinfo); | 1232 | int dir = CTINFO2DIR(ctinfo); |
1233 | unsigned int ret; | 1233 | unsigned int ret; |
1234 | struct iphdr *iph = ip_hdr(skb); | 1234 | const struct iphdr *iph = ip_hdr(skb); |
1235 | struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); | 1235 | const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); |
1236 | 1236 | ||
1237 | /* SNMP replies and originating SNMP traps get mangled */ | 1237 | /* SNMP replies and originating SNMP traps get mangled */ |
1238 | if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) | 1238 | if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) |
@@ -1267,11 +1267,15 @@ static int help(struct sk_buff *skb, unsigned int protoff, | |||
1267 | return ret; | 1267 | return ret; |
1268 | } | 1268 | } |
1269 | 1269 | ||
1270 | static const struct nf_conntrack_expect_policy snmp_exp_policy = { | ||
1271 | .max_expected = 0, | ||
1272 | .timeout = 180, | ||
1273 | }; | ||
1274 | |||
1270 | static struct nf_conntrack_helper snmp_helper __read_mostly = { | 1275 | static struct nf_conntrack_helper snmp_helper __read_mostly = { |
1271 | .max_expected = 0, | ||
1272 | .timeout = 180, | ||
1273 | .me = THIS_MODULE, | 1276 | .me = THIS_MODULE, |
1274 | .help = help, | 1277 | .help = help, |
1278 | .expect_policy = &snmp_exp_policy, | ||
1275 | .name = "snmp", | 1279 | .name = "snmp", |
1276 | .tuple.src.l3num = AF_INET, | 1280 | .tuple.src.l3num = AF_INET, |
1277 | .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), | 1281 | .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), |
@@ -1279,10 +1283,9 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = { | |||
1279 | }; | 1283 | }; |
1280 | 1284 | ||
1281 | static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { | 1285 | static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { |
1282 | .max_expected = 0, | ||
1283 | .timeout = 180, | ||
1284 | .me = THIS_MODULE, | 1286 | .me = THIS_MODULE, |
1285 | .help = help, | 1287 | .help = help, |
1288 | .expect_policy = &snmp_exp_policy, | ||
1286 | .name = "snmp_trap", | 1289 | .name = "snmp_trap", |
1287 | .tuple.src.l3num = AF_INET, | 1290 | .tuple.src.l3num = AF_INET, |
1288 | .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), | 1291 | .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), |
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 99b2c788d5a8..b7dd695691a0 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
@@ -30,8 +30,8 @@ | |||
30 | #ifdef CONFIG_XFRM | 30 | #ifdef CONFIG_XFRM |
31 | static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) | 31 | static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) |
32 | { | 32 | { |
33 | struct nf_conn *ct; | 33 | const struct nf_conn *ct; |
34 | struct nf_conntrack_tuple *t; | 34 | const struct nf_conntrack_tuple *t; |
35 | enum ip_conntrack_info ctinfo; | 35 | enum ip_conntrack_info ctinfo; |
36 | enum ip_conntrack_dir dir; | 36 | enum ip_conntrack_dir dir; |
37 | unsigned long statusbit; | 37 | unsigned long statusbit; |
@@ -50,7 +50,10 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) | |||
50 | if (ct->status & statusbit) { | 50 | if (ct->status & statusbit) { |
51 | fl->fl4_dst = t->dst.u3.ip; | 51 | fl->fl4_dst = t->dst.u3.ip; |
52 | if (t->dst.protonum == IPPROTO_TCP || | 52 | if (t->dst.protonum == IPPROTO_TCP || |
53 | t->dst.protonum == IPPROTO_UDP) | 53 | t->dst.protonum == IPPROTO_UDP || |
54 | t->dst.protonum == IPPROTO_UDPLITE || | ||
55 | t->dst.protonum == IPPROTO_DCCP || | ||
56 | t->dst.protonum == IPPROTO_SCTP) | ||
54 | fl->fl_ip_dport = t->dst.u.tcp.port; | 57 | fl->fl_ip_dport = t->dst.u.tcp.port; |
55 | } | 58 | } |
56 | 59 | ||
@@ -59,7 +62,10 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) | |||
59 | if (ct->status & statusbit) { | 62 | if (ct->status & statusbit) { |
60 | fl->fl4_src = t->src.u3.ip; | 63 | fl->fl4_src = t->src.u3.ip; |
61 | if (t->dst.protonum == IPPROTO_TCP || | 64 | if (t->dst.protonum == IPPROTO_TCP || |
62 | t->dst.protonum == IPPROTO_UDP) | 65 | t->dst.protonum == IPPROTO_UDP || |
66 | t->dst.protonum == IPPROTO_UDPLITE || | ||
67 | t->dst.protonum == IPPROTO_DCCP || | ||
68 | t->dst.protonum == IPPROTO_SCTP) | ||
63 | fl->fl_ip_sport = t->src.u.tcp.port; | 69 | fl->fl_ip_sport = t->src.u.tcp.port; |
64 | } | 70 | } |
65 | } | 71 | } |
@@ -87,21 +93,8 @@ nf_nat_fn(unsigned int hooknum, | |||
87 | have dropped it. Hence it's the user's responsibilty to | 93 | have dropped it. Hence it's the user's responsibilty to |
88 | packet filter it out, or implement conntrack/NAT for that | 94 | packet filter it out, or implement conntrack/NAT for that |
89 | protocol. 8) --RR */ | 95 | protocol. 8) --RR */ |
90 | if (!ct) { | 96 | if (!ct) |
91 | /* Exception: ICMP redirect to new connection (not in | ||
92 | hash table yet). We must not let this through, in | ||
93 | case we're doing NAT to the same network. */ | ||
94 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { | ||
95 | struct icmphdr _hdr, *hp; | ||
96 | |||
97 | hp = skb_header_pointer(skb, ip_hdrlen(skb), | ||
98 | sizeof(_hdr), &_hdr); | ||
99 | if (hp != NULL && | ||
100 | hp->type == ICMP_REDIRECT) | ||
101 | return NF_DROP; | ||
102 | } | ||
103 | return NF_ACCEPT; | 97 | return NF_ACCEPT; |
104 | } | ||
105 | 98 | ||
106 | /* Don't try to NAT if this packet is not conntracked */ | 99 | /* Don't try to NAT if this packet is not conntracked */ |
107 | if (ct == &nf_conntrack_untracked) | 100 | if (ct == &nf_conntrack_untracked) |
@@ -109,6 +102,9 @@ nf_nat_fn(unsigned int hooknum, | |||
109 | 102 | ||
110 | nat = nfct_nat(ct); | 103 | nat = nfct_nat(ct); |
111 | if (!nat) { | 104 | if (!nat) { |
105 | /* NAT module was loaded late. */ | ||
106 | if (nf_ct_is_confirmed(ct)) | ||
107 | return NF_ACCEPT; | ||
112 | nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); | 108 | nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); |
113 | if (nat == NULL) { | 109 | if (nat == NULL) { |
114 | pr_debug("failed to add NAT extension\n"); | 110 | pr_debug("failed to add NAT extension\n"); |
@@ -134,10 +130,7 @@ nf_nat_fn(unsigned int hooknum, | |||
134 | if (!nf_nat_initialized(ct, maniptype)) { | 130 | if (!nf_nat_initialized(ct, maniptype)) { |
135 | unsigned int ret; | 131 | unsigned int ret; |
136 | 132 | ||
137 | if (unlikely(nf_ct_is_confirmed(ct))) | 133 | if (hooknum == NF_INET_LOCAL_IN) |
138 | /* NAT module was loaded late */ | ||
139 | ret = alloc_null_binding_confirmed(ct, hooknum); | ||
140 | else if (hooknum == NF_INET_LOCAL_IN) | ||
141 | /* LOCAL_IN hook doesn't have a chain! */ | 134 | /* LOCAL_IN hook doesn't have a chain! */ |
142 | ret = alloc_null_binding(ct, hooknum); | 135 | ret = alloc_null_binding(ct, hooknum); |
143 | else | 136 | else |
@@ -189,7 +182,7 @@ nf_nat_out(unsigned int hooknum, | |||
189 | int (*okfn)(struct sk_buff *)) | 182 | int (*okfn)(struct sk_buff *)) |
190 | { | 183 | { |
191 | #ifdef CONFIG_XFRM | 184 | #ifdef CONFIG_XFRM |
192 | struct nf_conn *ct; | 185 | const struct nf_conn *ct; |
193 | enum ip_conntrack_info ctinfo; | 186 | enum ip_conntrack_info ctinfo; |
194 | #endif | 187 | #endif |
195 | unsigned int ret; | 188 | unsigned int ret; |
@@ -223,7 +216,7 @@ nf_nat_local_fn(unsigned int hooknum, | |||
223 | const struct net_device *out, | 216 | const struct net_device *out, |
224 | int (*okfn)(struct sk_buff *)) | 217 | int (*okfn)(struct sk_buff *)) |
225 | { | 218 | { |
226 | struct nf_conn *ct; | 219 | const struct nf_conn *ct; |
227 | enum ip_conntrack_info ctinfo; | 220 | enum ip_conntrack_info ctinfo; |
228 | unsigned int ret; | 221 | unsigned int ret; |
229 | 222 | ||
@@ -252,25 +245,6 @@ nf_nat_local_fn(unsigned int hooknum, | |||
252 | return ret; | 245 | return ret; |
253 | } | 246 | } |
254 | 247 | ||
255 | static unsigned int | ||
256 | nf_nat_adjust(unsigned int hooknum, | ||
257 | struct sk_buff *skb, | ||
258 | const struct net_device *in, | ||
259 | const struct net_device *out, | ||
260 | int (*okfn)(struct sk_buff *)) | ||
261 | { | ||
262 | struct nf_conn *ct; | ||
263 | enum ip_conntrack_info ctinfo; | ||
264 | |||
265 | ct = nf_ct_get(skb, &ctinfo); | ||
266 | if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { | ||
267 | pr_debug("nf_nat_standalone: adjusting sequence number\n"); | ||
268 | if (!nf_nat_seq_adjust(skb, ct, ctinfo)) | ||
269 | return NF_DROP; | ||
270 | } | ||
271 | return NF_ACCEPT; | ||
272 | } | ||
273 | |||
274 | /* We must be after connection tracking and before packet filtering. */ | 248 | /* We must be after connection tracking and before packet filtering. */ |
275 | 249 | ||
276 | static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | 250 | static struct nf_hook_ops nf_nat_ops[] __read_mostly = { |
@@ -290,14 +264,6 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | |||
290 | .hooknum = NF_INET_POST_ROUTING, | 264 | .hooknum = NF_INET_POST_ROUTING, |
291 | .priority = NF_IP_PRI_NAT_SRC, | 265 | .priority = NF_IP_PRI_NAT_SRC, |
292 | }, | 266 | }, |
293 | /* After conntrack, adjust sequence number */ | ||
294 | { | ||
295 | .hook = nf_nat_adjust, | ||
296 | .owner = THIS_MODULE, | ||
297 | .pf = PF_INET, | ||
298 | .hooknum = NF_INET_POST_ROUTING, | ||
299 | .priority = NF_IP_PRI_NAT_SEQ_ADJUST, | ||
300 | }, | ||
301 | /* Before packet filtering, change destination */ | 267 | /* Before packet filtering, change destination */ |
302 | { | 268 | { |
303 | .hook = nf_nat_local_fn, | 269 | .hook = nf_nat_local_fn, |
@@ -314,14 +280,6 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | |||
314 | .hooknum = NF_INET_LOCAL_IN, | 280 | .hooknum = NF_INET_LOCAL_IN, |
315 | .priority = NF_IP_PRI_NAT_SRC, | 281 | .priority = NF_IP_PRI_NAT_SRC, |
316 | }, | 282 | }, |
317 | /* After conntrack, adjust sequence number */ | ||
318 | { | ||
319 | .hook = nf_nat_adjust, | ||
320 | .owner = THIS_MODULE, | ||
321 | .pf = PF_INET, | ||
322 | .hooknum = NF_INET_LOCAL_IN, | ||
323 | .priority = NF_IP_PRI_NAT_SEQ_ADJUST, | ||
324 | }, | ||
325 | }; | 283 | }; |
326 | 284 | ||
327 | static int __init nf_nat_standalone_init(void) | 285 | static int __init nf_nat_standalone_init(void) |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d63474c6b400..552169b41b16 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -51,24 +51,54 @@ | |||
51 | */ | 51 | */ |
52 | static int sockstat_seq_show(struct seq_file *seq, void *v) | 52 | static int sockstat_seq_show(struct seq_file *seq, void *v) |
53 | { | 53 | { |
54 | struct net *net = seq->private; | ||
55 | |||
54 | socket_seq_show(seq); | 56 | socket_seq_show(seq); |
55 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", | 57 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", |
56 | sock_prot_inuse_get(&tcp_prot), | 58 | sock_prot_inuse_get(net, &tcp_prot), |
57 | atomic_read(&tcp_orphan_count), | 59 | atomic_read(&tcp_orphan_count), |
58 | tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), | 60 | tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), |
59 | atomic_read(&tcp_memory_allocated)); | 61 | atomic_read(&tcp_memory_allocated)); |
60 | seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), | 62 | seq_printf(seq, "UDP: inuse %d mem %d\n", |
63 | sock_prot_inuse_get(net, &udp_prot), | ||
61 | atomic_read(&udp_memory_allocated)); | 64 | atomic_read(&udp_memory_allocated)); |
62 | seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); | 65 | seq_printf(seq, "UDPLITE: inuse %d\n", |
63 | seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); | 66 | sock_prot_inuse_get(net, &udplite_prot)); |
67 | seq_printf(seq, "RAW: inuse %d\n", | ||
68 | sock_prot_inuse_get(net, &raw_prot)); | ||
64 | seq_printf(seq, "FRAG: inuse %d memory %d\n", | 69 | seq_printf(seq, "FRAG: inuse %d memory %d\n", |
65 | ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); | 70 | ip_frag_nqueues(net), ip_frag_mem(net)); |
66 | return 0; | 71 | return 0; |
67 | } | 72 | } |
68 | 73 | ||
69 | static int sockstat_seq_open(struct inode *inode, struct file *file) | 74 | static int sockstat_seq_open(struct inode *inode, struct file *file) |
70 | { | 75 | { |
71 | return single_open(file, sockstat_seq_show, NULL); | 76 | int err; |
77 | struct net *net; | ||
78 | |||
79 | err = -ENXIO; | ||
80 | net = get_proc_net(inode); | ||
81 | if (net == NULL) | ||
82 | goto err_net; | ||
83 | |||
84 | err = single_open(file, sockstat_seq_show, net); | ||
85 | if (err < 0) | ||
86 | goto err_open; | ||
87 | |||
88 | return 0; | ||
89 | |||
90 | err_open: | ||
91 | put_net(net); | ||
92 | err_net: | ||
93 | return err; | ||
94 | } | ||
95 | |||
96 | static int sockstat_seq_release(struct inode *inode, struct file *file) | ||
97 | { | ||
98 | struct net *net = ((struct seq_file *)file->private_data)->private; | ||
99 | |||
100 | put_net(net); | ||
101 | return single_release(inode, file); | ||
72 | } | 102 | } |
73 | 103 | ||
74 | static const struct file_operations sockstat_seq_fops = { | 104 | static const struct file_operations sockstat_seq_fops = { |
@@ -76,7 +106,7 @@ static const struct file_operations sockstat_seq_fops = { | |||
76 | .open = sockstat_seq_open, | 106 | .open = sockstat_seq_open, |
77 | .read = seq_read, | 107 | .read = seq_read, |
78 | .llseek = seq_lseek, | 108 | .llseek = seq_lseek, |
79 | .release = single_release, | 109 | .release = sockstat_seq_release, |
80 | }; | 110 | }; |
81 | 111 | ||
82 | /* snmp items */ | 112 | /* snmp items */ |
@@ -423,25 +453,42 @@ static const struct file_operations netstat_seq_fops = { | |||
423 | .release = single_release, | 453 | .release = single_release, |
424 | }; | 454 | }; |
425 | 455 | ||
456 | static __net_init int ip_proc_init_net(struct net *net) | ||
457 | { | ||
458 | if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops)) | ||
459 | return -ENOMEM; | ||
460 | return 0; | ||
461 | } | ||
462 | |||
463 | static __net_exit void ip_proc_exit_net(struct net *net) | ||
464 | { | ||
465 | proc_net_remove(net, "sockstat"); | ||
466 | } | ||
467 | |||
468 | static __net_initdata struct pernet_operations ip_proc_ops = { | ||
469 | .init = ip_proc_init_net, | ||
470 | .exit = ip_proc_exit_net, | ||
471 | }; | ||
472 | |||
426 | int __init ip_misc_proc_init(void) | 473 | int __init ip_misc_proc_init(void) |
427 | { | 474 | { |
428 | int rc = 0; | 475 | int rc = 0; |
429 | 476 | ||
477 | if (register_pernet_subsys(&ip_proc_ops)) | ||
478 | goto out_pernet; | ||
479 | |||
430 | if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) | 480 | if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) |
431 | goto out_netstat; | 481 | goto out_netstat; |
432 | 482 | ||
433 | if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) | 483 | if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) |
434 | goto out_snmp; | 484 | goto out_snmp; |
435 | |||
436 | if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops)) | ||
437 | goto out_sockstat; | ||
438 | out: | 485 | out: |
439 | return rc; | 486 | return rc; |
440 | out_sockstat: | ||
441 | proc_net_remove(&init_net, "snmp"); | ||
442 | out_snmp: | 487 | out_snmp: |
443 | proc_net_remove(&init_net, "netstat"); | 488 | proc_net_remove(&init_net, "netstat"); |
444 | out_netstat: | 489 | out_netstat: |
490 | unregister_pernet_subsys(&ip_proc_ops); | ||
491 | out_pernet: | ||
445 | rc = -ENOMEM; | 492 | rc = -ENOMEM; |
446 | goto out; | 493 | goto out; |
447 | } | 494 | } |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a3002fe65b7f..11d7f753a820 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -81,41 +81,34 @@ | |||
81 | #include <linux/netfilter_ipv4.h> | 81 | #include <linux/netfilter_ipv4.h> |
82 | 82 | ||
83 | static struct raw_hashinfo raw_v4_hashinfo = { | 83 | static struct raw_hashinfo raw_v4_hashinfo = { |
84 | .lock = __RW_LOCK_UNLOCKED(), | 84 | .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), |
85 | }; | 85 | }; |
86 | 86 | ||
87 | void raw_hash_sk(struct sock *sk, struct raw_hashinfo *h) | 87 | void raw_hash_sk(struct sock *sk) |
88 | { | 88 | { |
89 | struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; | ||
89 | struct hlist_head *head; | 90 | struct hlist_head *head; |
90 | 91 | ||
91 | head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)]; | 92 | head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)]; |
92 | 93 | ||
93 | write_lock_bh(&h->lock); | 94 | write_lock_bh(&h->lock); |
94 | sk_add_node(sk, head); | 95 | sk_add_node(sk, head); |
95 | sock_prot_inuse_add(sk->sk_prot, 1); | 96 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
96 | write_unlock_bh(&h->lock); | 97 | write_unlock_bh(&h->lock); |
97 | } | 98 | } |
98 | EXPORT_SYMBOL_GPL(raw_hash_sk); | 99 | EXPORT_SYMBOL_GPL(raw_hash_sk); |
99 | 100 | ||
100 | void raw_unhash_sk(struct sock *sk, struct raw_hashinfo *h) | 101 | void raw_unhash_sk(struct sock *sk) |
101 | { | 102 | { |
103 | struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; | ||
104 | |||
102 | write_lock_bh(&h->lock); | 105 | write_lock_bh(&h->lock); |
103 | if (sk_del_node_init(sk)) | 106 | if (sk_del_node_init(sk)) |
104 | sock_prot_inuse_add(sk->sk_prot, -1); | 107 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); |
105 | write_unlock_bh(&h->lock); | 108 | write_unlock_bh(&h->lock); |
106 | } | 109 | } |
107 | EXPORT_SYMBOL_GPL(raw_unhash_sk); | 110 | EXPORT_SYMBOL_GPL(raw_unhash_sk); |
108 | 111 | ||
109 | static void raw_v4_hash(struct sock *sk) | ||
110 | { | ||
111 | raw_hash_sk(sk, &raw_v4_hashinfo); | ||
112 | } | ||
113 | |||
114 | static void raw_v4_unhash(struct sock *sk) | ||
115 | { | ||
116 | raw_unhash_sk(sk, &raw_v4_hashinfo); | ||
117 | } | ||
118 | |||
119 | static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, | 112 | static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, |
120 | unsigned short num, __be32 raddr, __be32 laddr, int dif) | 113 | unsigned short num, __be32 raddr, __be32 laddr, int dif) |
121 | { | 114 | { |
@@ -124,7 +117,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, | |||
124 | sk_for_each_from(sk, node) { | 117 | sk_for_each_from(sk, node) { |
125 | struct inet_sock *inet = inet_sk(sk); | 118 | struct inet_sock *inet = inet_sk(sk); |
126 | 119 | ||
127 | if (sk->sk_net == net && inet->num == num && | 120 | if (net_eq(sock_net(sk), net) && inet->num == num && |
128 | !(inet->daddr && inet->daddr != raddr) && | 121 | !(inet->daddr && inet->daddr != raddr) && |
129 | !(inet->rcv_saddr && inet->rcv_saddr != laddr) && | 122 | !(inet->rcv_saddr && inet->rcv_saddr != laddr) && |
130 | !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) | 123 | !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) |
@@ -175,7 +168,7 @@ static int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) | |||
175 | if (hlist_empty(head)) | 168 | if (hlist_empty(head)) |
176 | goto out; | 169 | goto out; |
177 | 170 | ||
178 | net = skb->dev->nd_net; | 171 | net = dev_net(skb->dev); |
179 | sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, | 172 | sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, |
180 | iph->saddr, iph->daddr, | 173 | iph->saddr, iph->daddr, |
181 | skb->dev->ifindex); | 174 | skb->dev->ifindex); |
@@ -283,7 +276,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) | |||
283 | raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); | 276 | raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); |
284 | if (raw_sk != NULL) { | 277 | if (raw_sk != NULL) { |
285 | iph = (struct iphdr *)skb->data; | 278 | iph = (struct iphdr *)skb->data; |
286 | net = skb->dev->nd_net; | 279 | net = dev_net(skb->dev); |
287 | 280 | ||
288 | while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, | 281 | while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, |
289 | iph->daddr, iph->saddr, | 282 | iph->daddr, iph->saddr, |
@@ -506,7 +499,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
506 | ipc.oif = sk->sk_bound_dev_if; | 499 | ipc.oif = sk->sk_bound_dev_if; |
507 | 500 | ||
508 | if (msg->msg_controllen) { | 501 | if (msg->msg_controllen) { |
509 | err = ip_cmsg_send(msg, &ipc); | 502 | err = ip_cmsg_send(sock_net(sk), msg, &ipc); |
510 | if (err) | 503 | if (err) |
511 | goto out; | 504 | goto out; |
512 | if (ipc.opt) | 505 | if (ipc.opt) |
@@ -560,7 +553,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
560 | } | 553 | } |
561 | 554 | ||
562 | security_sk_classify_flow(sk, &fl); | 555 | security_sk_classify_flow(sk, &fl); |
563 | err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); | 556 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); |
564 | } | 557 | } |
565 | if (err) | 558 | if (err) |
566 | goto done; | 559 | goto done; |
@@ -627,7 +620,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
627 | 620 | ||
628 | if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) | 621 | if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) |
629 | goto out; | 622 | goto out; |
630 | chk_addr_ret = inet_addr_type(sk->sk_net, addr->sin_addr.s_addr); | 623 | chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); |
631 | ret = -EADDRNOTAVAIL; | 624 | ret = -EADDRNOTAVAIL; |
632 | if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && | 625 | if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && |
633 | chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) | 626 | chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) |
@@ -825,8 +818,6 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
825 | } | 818 | } |
826 | } | 819 | } |
827 | 820 | ||
828 | DEFINE_PROTO_INUSE(raw) | ||
829 | |||
830 | struct proto raw_prot = { | 821 | struct proto raw_prot = { |
831 | .name = "RAW", | 822 | .name = "RAW", |
832 | .owner = THIS_MODULE, | 823 | .owner = THIS_MODULE, |
@@ -841,14 +832,14 @@ struct proto raw_prot = { | |||
841 | .recvmsg = raw_recvmsg, | 832 | .recvmsg = raw_recvmsg, |
842 | .bind = raw_bind, | 833 | .bind = raw_bind, |
843 | .backlog_rcv = raw_rcv_skb, | 834 | .backlog_rcv = raw_rcv_skb, |
844 | .hash = raw_v4_hash, | 835 | .hash = raw_hash_sk, |
845 | .unhash = raw_v4_unhash, | 836 | .unhash = raw_unhash_sk, |
846 | .obj_size = sizeof(struct raw_sock), | 837 | .obj_size = sizeof(struct raw_sock), |
838 | .h.raw_hash = &raw_v4_hashinfo, | ||
847 | #ifdef CONFIG_COMPAT | 839 | #ifdef CONFIG_COMPAT |
848 | .compat_setsockopt = compat_raw_setsockopt, | 840 | .compat_setsockopt = compat_raw_setsockopt, |
849 | .compat_getsockopt = compat_raw_getsockopt, | 841 | .compat_getsockopt = compat_raw_getsockopt, |
850 | #endif | 842 | #endif |
851 | REF_PROTO_INUSE(raw) | ||
852 | }; | 843 | }; |
853 | 844 | ||
854 | #ifdef CONFIG_PROC_FS | 845 | #ifdef CONFIG_PROC_FS |
@@ -862,7 +853,7 @@ static struct sock *raw_get_first(struct seq_file *seq) | |||
862 | struct hlist_node *node; | 853 | struct hlist_node *node; |
863 | 854 | ||
864 | sk_for_each(sk, node, &state->h->ht[state->bucket]) | 855 | sk_for_each(sk, node, &state->h->ht[state->bucket]) |
865 | if (sk->sk_net == state->p.net) | 856 | if (sock_net(sk) == seq_file_net(seq)) |
866 | goto found; | 857 | goto found; |
867 | } | 858 | } |
868 | sk = NULL; | 859 | sk = NULL; |
@@ -878,7 +869,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) | |||
878 | sk = sk_next(sk); | 869 | sk = sk_next(sk); |
879 | try_again: | 870 | try_again: |
880 | ; | 871 | ; |
881 | } while (sk && sk->sk_net != state->p.net); | 872 | } while (sk && sock_net(sk) != seq_file_net(seq)); |
882 | 873 | ||
883 | if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { | 874 | if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { |
884 | sk = sk_head(&state->h->ht[state->bucket]); | 875 | sk = sk_head(&state->h->ht[state->bucket]); |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7b5e8e1d94be..780e9484c825 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -118,21 +118,19 @@ | |||
118 | #define RT_GC_TIMEOUT (300*HZ) | 118 | #define RT_GC_TIMEOUT (300*HZ) |
119 | 119 | ||
120 | static int ip_rt_max_size; | 120 | static int ip_rt_max_size; |
121 | static int ip_rt_gc_timeout = RT_GC_TIMEOUT; | 121 | static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; |
122 | static int ip_rt_gc_interval = 60 * HZ; | 122 | static int ip_rt_gc_interval __read_mostly = 60 * HZ; |
123 | static int ip_rt_gc_min_interval = HZ / 2; | 123 | static int ip_rt_gc_min_interval __read_mostly = HZ / 2; |
124 | static int ip_rt_redirect_number = 9; | 124 | static int ip_rt_redirect_number __read_mostly = 9; |
125 | static int ip_rt_redirect_load = HZ / 50; | 125 | static int ip_rt_redirect_load __read_mostly = HZ / 50; |
126 | static int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1)); | 126 | static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); |
127 | static int ip_rt_error_cost = HZ; | 127 | static int ip_rt_error_cost __read_mostly = HZ; |
128 | static int ip_rt_error_burst = 5 * HZ; | 128 | static int ip_rt_error_burst __read_mostly = 5 * HZ; |
129 | static int ip_rt_gc_elasticity = 8; | 129 | static int ip_rt_gc_elasticity __read_mostly = 8; |
130 | static int ip_rt_mtu_expires = 10 * 60 * HZ; | 130 | static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; |
131 | static int ip_rt_min_pmtu = 512 + 20 + 20; | 131 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; |
132 | static int ip_rt_min_advmss = 256; | 132 | static int ip_rt_min_advmss __read_mostly = 256; |
133 | static int ip_rt_secret_interval = 10 * 60 * HZ; | 133 | static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; |
134 | |||
135 | #define RTprint(a...) printk(KERN_DEBUG a) | ||
136 | 134 | ||
137 | static void rt_worker_func(struct work_struct *work); | 135 | static void rt_worker_func(struct work_struct *work); |
138 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); | 136 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); |
@@ -252,40 +250,41 @@ static inline void rt_hash_lock_init(void) | |||
252 | } | 250 | } |
253 | #endif | 251 | #endif |
254 | 252 | ||
255 | static struct rt_hash_bucket *rt_hash_table; | 253 | static struct rt_hash_bucket *rt_hash_table __read_mostly; |
256 | static unsigned rt_hash_mask; | 254 | static unsigned rt_hash_mask __read_mostly; |
257 | static unsigned int rt_hash_log; | 255 | static unsigned int rt_hash_log __read_mostly; |
258 | static atomic_t rt_genid; | 256 | static atomic_t rt_genid __read_mostly; |
259 | 257 | ||
260 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 258 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
261 | #define RT_CACHE_STAT_INC(field) \ | 259 | #define RT_CACHE_STAT_INC(field) \ |
262 | (__raw_get_cpu_var(rt_cache_stat).field++) | 260 | (__raw_get_cpu_var(rt_cache_stat).field++) |
263 | 261 | ||
264 | static unsigned int rt_hash_code(u32 daddr, u32 saddr) | 262 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx) |
265 | { | 263 | { |
266 | return jhash_2words(daddr, saddr, atomic_read(&rt_genid)) | 264 | return jhash_3words((__force u32)(__be32)(daddr), |
265 | (__force u32)(__be32)(saddr), | ||
266 | idx, atomic_read(&rt_genid)) | ||
267 | & rt_hash_mask; | 267 | & rt_hash_mask; |
268 | } | 268 | } |
269 | 269 | ||
270 | #define rt_hash(daddr, saddr, idx) \ | ||
271 | rt_hash_code((__force u32)(__be32)(daddr),\ | ||
272 | (__force u32)(__be32)(saddr) ^ ((idx) << 5)) | ||
273 | |||
274 | #ifdef CONFIG_PROC_FS | 270 | #ifdef CONFIG_PROC_FS |
275 | struct rt_cache_iter_state { | 271 | struct rt_cache_iter_state { |
272 | struct seq_net_private p; | ||
276 | int bucket; | 273 | int bucket; |
277 | int genid; | 274 | int genid; |
278 | }; | 275 | }; |
279 | 276 | ||
280 | static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) | 277 | static struct rtable *rt_cache_get_first(struct seq_file *seq) |
281 | { | 278 | { |
279 | struct rt_cache_iter_state *st = seq->private; | ||
282 | struct rtable *r = NULL; | 280 | struct rtable *r = NULL; |
283 | 281 | ||
284 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { | 282 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { |
285 | rcu_read_lock_bh(); | 283 | rcu_read_lock_bh(); |
286 | r = rcu_dereference(rt_hash_table[st->bucket].chain); | 284 | r = rcu_dereference(rt_hash_table[st->bucket].chain); |
287 | while (r) { | 285 | while (r) { |
288 | if (r->rt_genid == st->genid) | 286 | if (dev_net(r->u.dst.dev) == seq_file_net(seq) && |
287 | r->rt_genid == st->genid) | ||
289 | return r; | 288 | return r; |
290 | r = rcu_dereference(r->u.dst.rt_next); | 289 | r = rcu_dereference(r->u.dst.rt_next); |
291 | } | 290 | } |
@@ -294,8 +293,10 @@ static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) | |||
294 | return r; | 293 | return r; |
295 | } | 294 | } |
296 | 295 | ||
297 | static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) | 296 | static struct rtable *__rt_cache_get_next(struct seq_file *seq, |
297 | struct rtable *r) | ||
298 | { | 298 | { |
299 | struct rt_cache_iter_state *st = seq->private; | ||
299 | r = r->u.dst.rt_next; | 300 | r = r->u.dst.rt_next; |
300 | while (!r) { | 301 | while (!r) { |
301 | rcu_read_unlock_bh(); | 302 | rcu_read_unlock_bh(); |
@@ -307,25 +308,34 @@ static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct r | |||
307 | return rcu_dereference(r); | 308 | return rcu_dereference(r); |
308 | } | 309 | } |
309 | 310 | ||
310 | static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) | 311 | static struct rtable *rt_cache_get_next(struct seq_file *seq, |
312 | struct rtable *r) | ||
313 | { | ||
314 | struct rt_cache_iter_state *st = seq->private; | ||
315 | while ((r = __rt_cache_get_next(seq, r)) != NULL) { | ||
316 | if (dev_net(r->u.dst.dev) != seq_file_net(seq)) | ||
317 | continue; | ||
318 | if (r->rt_genid == st->genid) | ||
319 | break; | ||
320 | } | ||
321 | return r; | ||
322 | } | ||
323 | |||
324 | static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) | ||
311 | { | 325 | { |
312 | struct rtable *r = rt_cache_get_first(st); | 326 | struct rtable *r = rt_cache_get_first(seq); |
313 | 327 | ||
314 | if (r) | 328 | if (r) |
315 | while (pos && (r = rt_cache_get_next(st, r))) { | 329 | while (pos && (r = rt_cache_get_next(seq, r))) |
316 | if (r->rt_genid != st->genid) | ||
317 | continue; | ||
318 | --pos; | 330 | --pos; |
319 | } | ||
320 | return pos ? NULL : r; | 331 | return pos ? NULL : r; |
321 | } | 332 | } |
322 | 333 | ||
323 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | 334 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) |
324 | { | 335 | { |
325 | struct rt_cache_iter_state *st = seq->private; | 336 | struct rt_cache_iter_state *st = seq->private; |
326 | |||
327 | if (*pos) | 337 | if (*pos) |
328 | return rt_cache_get_idx(st, *pos - 1); | 338 | return rt_cache_get_idx(seq, *pos - 1); |
329 | st->genid = atomic_read(&rt_genid); | 339 | st->genid = atomic_read(&rt_genid); |
330 | return SEQ_START_TOKEN; | 340 | return SEQ_START_TOKEN; |
331 | } | 341 | } |
@@ -333,12 +343,11 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | |||
333 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 343 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
334 | { | 344 | { |
335 | struct rtable *r; | 345 | struct rtable *r; |
336 | struct rt_cache_iter_state *st = seq->private; | ||
337 | 346 | ||
338 | if (v == SEQ_START_TOKEN) | 347 | if (v == SEQ_START_TOKEN) |
339 | r = rt_cache_get_first(st); | 348 | r = rt_cache_get_first(seq); |
340 | else | 349 | else |
341 | r = rt_cache_get_next(st, v); | 350 | r = rt_cache_get_next(seq, v); |
342 | ++*pos; | 351 | ++*pos; |
343 | return r; | 352 | return r; |
344 | } | 353 | } |
@@ -390,7 +399,7 @@ static const struct seq_operations rt_cache_seq_ops = { | |||
390 | 399 | ||
391 | static int rt_cache_seq_open(struct inode *inode, struct file *file) | 400 | static int rt_cache_seq_open(struct inode *inode, struct file *file) |
392 | { | 401 | { |
393 | return seq_open_private(file, &rt_cache_seq_ops, | 402 | return seq_open_net(inode, file, &rt_cache_seq_ops, |
394 | sizeof(struct rt_cache_iter_state)); | 403 | sizeof(struct rt_cache_iter_state)); |
395 | } | 404 | } |
396 | 405 | ||
@@ -399,7 +408,7 @@ static const struct file_operations rt_cache_seq_fops = { | |||
399 | .open = rt_cache_seq_open, | 408 | .open = rt_cache_seq_open, |
400 | .read = seq_read, | 409 | .read = seq_read, |
401 | .llseek = seq_lseek, | 410 | .llseek = seq_lseek, |
402 | .release = seq_release_private, | 411 | .release = seq_release_net, |
403 | }; | 412 | }; |
404 | 413 | ||
405 | 414 | ||
@@ -533,7 +542,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset, | |||
533 | } | 542 | } |
534 | #endif | 543 | #endif |
535 | 544 | ||
536 | static __init int ip_rt_proc_init(struct net *net) | 545 | static int __net_init ip_rt_do_proc_init(struct net *net) |
537 | { | 546 | { |
538 | struct proc_dir_entry *pde; | 547 | struct proc_dir_entry *pde; |
539 | 548 | ||
@@ -564,25 +573,43 @@ err2: | |||
564 | err1: | 573 | err1: |
565 | return -ENOMEM; | 574 | return -ENOMEM; |
566 | } | 575 | } |
576 | |||
577 | static void __net_exit ip_rt_do_proc_exit(struct net *net) | ||
578 | { | ||
579 | remove_proc_entry("rt_cache", net->proc_net_stat); | ||
580 | remove_proc_entry("rt_cache", net->proc_net); | ||
581 | remove_proc_entry("rt_acct", net->proc_net); | ||
582 | } | ||
583 | |||
584 | static struct pernet_operations ip_rt_proc_ops __net_initdata = { | ||
585 | .init = ip_rt_do_proc_init, | ||
586 | .exit = ip_rt_do_proc_exit, | ||
587 | }; | ||
588 | |||
589 | static int __init ip_rt_proc_init(void) | ||
590 | { | ||
591 | return register_pernet_subsys(&ip_rt_proc_ops); | ||
592 | } | ||
593 | |||
567 | #else | 594 | #else |
568 | static inline int ip_rt_proc_init(struct net *net) | 595 | static inline int ip_rt_proc_init(void) |
569 | { | 596 | { |
570 | return 0; | 597 | return 0; |
571 | } | 598 | } |
572 | #endif /* CONFIG_PROC_FS */ | 599 | #endif /* CONFIG_PROC_FS */ |
573 | 600 | ||
574 | static __inline__ void rt_free(struct rtable *rt) | 601 | static inline void rt_free(struct rtable *rt) |
575 | { | 602 | { |
576 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); | 603 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); |
577 | } | 604 | } |
578 | 605 | ||
579 | static __inline__ void rt_drop(struct rtable *rt) | 606 | static inline void rt_drop(struct rtable *rt) |
580 | { | 607 | { |
581 | ip_rt_put(rt); | 608 | ip_rt_put(rt); |
582 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); | 609 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); |
583 | } | 610 | } |
584 | 611 | ||
585 | static __inline__ int rt_fast_clean(struct rtable *rth) | 612 | static inline int rt_fast_clean(struct rtable *rth) |
586 | { | 613 | { |
587 | /* Kill broadcast/multicast entries very aggresively, if they | 614 | /* Kill broadcast/multicast entries very aggresively, if they |
588 | collide in hash table with more useful entries */ | 615 | collide in hash table with more useful entries */ |
@@ -590,7 +617,7 @@ static __inline__ int rt_fast_clean(struct rtable *rth) | |||
590 | rth->fl.iif && rth->u.dst.rt_next; | 617 | rth->fl.iif && rth->u.dst.rt_next; |
591 | } | 618 | } |
592 | 619 | ||
593 | static __inline__ int rt_valuable(struct rtable *rth) | 620 | static inline int rt_valuable(struct rtable *rth) |
594 | { | 621 | { |
595 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 622 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
596 | rth->u.dst.expires; | 623 | rth->u.dst.expires; |
@@ -652,7 +679,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | |||
652 | 679 | ||
653 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 680 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
654 | { | 681 | { |
655 | return rt1->u.dst.dev->nd_net == rt2->u.dst.dev->nd_net; | 682 | return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); |
656 | } | 683 | } |
657 | 684 | ||
658 | /* | 685 | /* |
@@ -1032,10 +1059,10 @@ restart: | |||
1032 | #if RT_CACHE_DEBUG >= 2 | 1059 | #if RT_CACHE_DEBUG >= 2 |
1033 | if (rt->u.dst.rt_next) { | 1060 | if (rt->u.dst.rt_next) { |
1034 | struct rtable *trt; | 1061 | struct rtable *trt; |
1035 | printk(KERN_DEBUG "rt_cache @%02x: %u.%u.%u.%u", hash, | 1062 | printk(KERN_DEBUG "rt_cache @%02x: " NIPQUAD_FMT, hash, |
1036 | NIPQUAD(rt->rt_dst)); | 1063 | NIPQUAD(rt->rt_dst)); |
1037 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) | 1064 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) |
1038 | printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst)); | 1065 | printk(" . " NIPQUAD_FMT, NIPQUAD(trt->rt_dst)); |
1039 | printk("\n"); | 1066 | printk("\n"); |
1040 | } | 1067 | } |
1041 | #endif | 1068 | #endif |
@@ -1131,10 +1158,12 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1131 | __be32 skeys[2] = { saddr, 0 }; | 1158 | __be32 skeys[2] = { saddr, 0 }; |
1132 | int ikeys[2] = { dev->ifindex, 0 }; | 1159 | int ikeys[2] = { dev->ifindex, 0 }; |
1133 | struct netevent_redirect netevent; | 1160 | struct netevent_redirect netevent; |
1161 | struct net *net; | ||
1134 | 1162 | ||
1135 | if (!in_dev) | 1163 | if (!in_dev) |
1136 | return; | 1164 | return; |
1137 | 1165 | ||
1166 | net = dev_net(dev); | ||
1138 | if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) | 1167 | if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) |
1139 | || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) | 1168 | || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) |
1140 | || ipv4_is_zeronet(new_gw)) | 1169 | || ipv4_is_zeronet(new_gw)) |
@@ -1146,7 +1175,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1146 | if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) | 1175 | if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) |
1147 | goto reject_redirect; | 1176 | goto reject_redirect; |
1148 | } else { | 1177 | } else { |
1149 | if (inet_addr_type(&init_net, new_gw) != RTN_UNICAST) | 1178 | if (inet_addr_type(net, new_gw) != RTN_UNICAST) |
1150 | goto reject_redirect; | 1179 | goto reject_redirect; |
1151 | } | 1180 | } |
1152 | 1181 | ||
@@ -1164,7 +1193,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1164 | rth->fl.fl4_src != skeys[i] || | 1193 | rth->fl.fl4_src != skeys[i] || |
1165 | rth->fl.oif != ikeys[k] || | 1194 | rth->fl.oif != ikeys[k] || |
1166 | rth->fl.iif != 0 || | 1195 | rth->fl.iif != 0 || |
1167 | rth->rt_genid != atomic_read(&rt_genid)) { | 1196 | rth->rt_genid != atomic_read(&rt_genid) || |
1197 | !net_eq(dev_net(rth->u.dst.dev), net)) { | ||
1168 | rthp = &rth->u.dst.rt_next; | 1198 | rthp = &rth->u.dst.rt_next; |
1169 | continue; | 1199 | continue; |
1170 | } | 1200 | } |
@@ -1245,9 +1275,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1245 | reject_redirect: | 1275 | reject_redirect: |
1246 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1276 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1247 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 1277 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) |
1248 | printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " | 1278 | printk(KERN_INFO "Redirect from " NIPQUAD_FMT " on %s about " |
1249 | "%u.%u.%u.%u ignored.\n" | 1279 | NIPQUAD_FMT " ignored.\n" |
1250 | " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u\n", | 1280 | " Advised path = " NIPQUAD_FMT " -> " NIPQUAD_FMT "\n", |
1251 | NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), | 1281 | NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), |
1252 | NIPQUAD(saddr), NIPQUAD(daddr)); | 1282 | NIPQUAD(saddr), NIPQUAD(daddr)); |
1253 | #endif | 1283 | #endif |
@@ -1256,7 +1286,7 @@ reject_redirect: | |||
1256 | 1286 | ||
1257 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | 1287 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) |
1258 | { | 1288 | { |
1259 | struct rtable *rt = (struct rtable*)dst; | 1289 | struct rtable *rt = (struct rtable *)dst; |
1260 | struct dst_entry *ret = dst; | 1290 | struct dst_entry *ret = dst; |
1261 | 1291 | ||
1262 | if (rt) { | 1292 | if (rt) { |
@@ -1269,7 +1299,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1269 | rt->fl.oif); | 1299 | rt->fl.oif); |
1270 | #if RT_CACHE_DEBUG >= 1 | 1300 | #if RT_CACHE_DEBUG >= 1 |
1271 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to " | 1301 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to " |
1272 | "%u.%u.%u.%u/%02x dropped\n", | 1302 | NIPQUAD_FMT "/%02x dropped\n", |
1273 | NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); | 1303 | NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); |
1274 | #endif | 1304 | #endif |
1275 | rt_del(hash, rt); | 1305 | rt_del(hash, rt); |
@@ -1297,7 +1327,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1297 | 1327 | ||
1298 | void ip_rt_send_redirect(struct sk_buff *skb) | 1328 | void ip_rt_send_redirect(struct sk_buff *skb) |
1299 | { | 1329 | { |
1300 | struct rtable *rt = (struct rtable*)skb->dst; | 1330 | struct rtable *rt = skb->rtable; |
1301 | struct in_device *in_dev = in_dev_get(rt->u.dst.dev); | 1331 | struct in_device *in_dev = in_dev_get(rt->u.dst.dev); |
1302 | 1332 | ||
1303 | if (!in_dev) | 1333 | if (!in_dev) |
@@ -1334,8 +1364,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1334 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 1364 | if (IN_DEV_LOG_MARTIANS(in_dev) && |
1335 | rt->u.dst.rate_tokens == ip_rt_redirect_number && | 1365 | rt->u.dst.rate_tokens == ip_rt_redirect_number && |
1336 | net_ratelimit()) | 1366 | net_ratelimit()) |
1337 | printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores " | 1367 | printk(KERN_WARNING "host " NIPQUAD_FMT "/if%d ignores " |
1338 | "redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n", | 1368 | "redirects for " NIPQUAD_FMT " to " NIPQUAD_FMT ".\n", |
1339 | NIPQUAD(rt->rt_src), rt->rt_iif, | 1369 | NIPQUAD(rt->rt_src), rt->rt_iif, |
1340 | NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway)); | 1370 | NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway)); |
1341 | #endif | 1371 | #endif |
@@ -1346,7 +1376,7 @@ out: | |||
1346 | 1376 | ||
1347 | static int ip_error(struct sk_buff *skb) | 1377 | static int ip_error(struct sk_buff *skb) |
1348 | { | 1378 | { |
1349 | struct rtable *rt = (struct rtable*)skb->dst; | 1379 | struct rtable *rt = skb->rtable; |
1350 | unsigned long now; | 1380 | unsigned long now; |
1351 | int code; | 1381 | int code; |
1352 | 1382 | ||
@@ -1388,7 +1418,7 @@ out: kfree_skb(skb); | |||
1388 | static const unsigned short mtu_plateau[] = | 1418 | static const unsigned short mtu_plateau[] = |
1389 | {32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; | 1419 | {32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; |
1390 | 1420 | ||
1391 | static __inline__ unsigned short guess_mtu(unsigned short old_mtu) | 1421 | static inline unsigned short guess_mtu(unsigned short old_mtu) |
1392 | { | 1422 | { |
1393 | int i; | 1423 | int i; |
1394 | 1424 | ||
@@ -1423,7 +1453,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1423 | rth->rt_src == iph->saddr && | 1453 | rth->rt_src == iph->saddr && |
1424 | rth->fl.iif == 0 && | 1454 | rth->fl.iif == 0 && |
1425 | !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && | 1455 | !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && |
1426 | rth->u.dst.dev->nd_net == net && | 1456 | net_eq(dev_net(rth->u.dst.dev), net) && |
1427 | rth->rt_genid == atomic_read(&rt_genid)) { | 1457 | rth->rt_genid == atomic_read(&rt_genid)) { |
1428 | unsigned short mtu = new_mtu; | 1458 | unsigned short mtu = new_mtu; |
1429 | 1459 | ||
@@ -1499,9 +1529,9 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
1499 | { | 1529 | { |
1500 | struct rtable *rt = (struct rtable *) dst; | 1530 | struct rtable *rt = (struct rtable *) dst; |
1501 | struct in_device *idev = rt->idev; | 1531 | struct in_device *idev = rt->idev; |
1502 | if (dev != dev->nd_net->loopback_dev && idev && idev->dev == dev) { | 1532 | if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) { |
1503 | struct in_device *loopback_idev = | 1533 | struct in_device *loopback_idev = |
1504 | in_dev_get(dev->nd_net->loopback_dev); | 1534 | in_dev_get(dev_net(dev)->loopback_dev); |
1505 | if (loopback_idev) { | 1535 | if (loopback_idev) { |
1506 | rt->idev = loopback_idev; | 1536 | rt->idev = loopback_idev; |
1507 | in_dev_put(idev); | 1537 | in_dev_put(idev); |
@@ -1515,14 +1545,14 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1515 | 1545 | ||
1516 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | 1546 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
1517 | 1547 | ||
1518 | rt = (struct rtable *) skb->dst; | 1548 | rt = skb->rtable; |
1519 | if (rt) | 1549 | if (rt) |
1520 | dst_set_expires(&rt->u.dst, 0); | 1550 | dst_set_expires(&rt->u.dst, 0); |
1521 | } | 1551 | } |
1522 | 1552 | ||
1523 | static int ip_rt_bug(struct sk_buff *skb) | 1553 | static int ip_rt_bug(struct sk_buff *skb) |
1524 | { | 1554 | { |
1525 | printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n", | 1555 | printk(KERN_DEBUG "ip_rt_bug: " NIPQUAD_FMT " -> " NIPQUAD_FMT ", %s\n", |
1526 | NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr), | 1556 | NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr), |
1527 | skb->dev ? skb->dev->name : "?"); | 1557 | skb->dev ? skb->dev->name : "?"); |
1528 | kfree_skb(skb); | 1558 | kfree_skb(skb); |
@@ -1545,7 +1575,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1545 | 1575 | ||
1546 | if (rt->fl.iif == 0) | 1576 | if (rt->fl.iif == 0) |
1547 | src = rt->rt_src; | 1577 | src = rt->rt_src; |
1548 | else if (fib_lookup(rt->u.dst.dev->nd_net, &rt->fl, &res) == 0) { | 1578 | else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { |
1549 | src = FIB_RES_PREFSRC(res); | 1579 | src = FIB_RES_PREFSRC(res); |
1550 | fib_res_put(&res); | 1580 | fib_res_put(&res); |
1551 | } else | 1581 | } else |
@@ -1675,7 +1705,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1675 | 1705 | ||
1676 | in_dev_put(in_dev); | 1706 | in_dev_put(in_dev); |
1677 | hash = rt_hash(daddr, saddr, dev->ifindex); | 1707 | hash = rt_hash(daddr, saddr, dev->ifindex); |
1678 | return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); | 1708 | return rt_intern_hash(hash, rth, &skb->rtable); |
1679 | 1709 | ||
1680 | e_nobufs: | 1710 | e_nobufs: |
1681 | in_dev_put(in_dev); | 1711 | in_dev_put(in_dev); |
@@ -1700,8 +1730,8 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1700 | * RFC1812 recommendation, if source is martian, | 1730 | * RFC1812 recommendation, if source is martian, |
1701 | * the only hint is MAC header. | 1731 | * the only hint is MAC header. |
1702 | */ | 1732 | */ |
1703 | printk(KERN_WARNING "martian source %u.%u.%u.%u from " | 1733 | printk(KERN_WARNING "martian source " NIPQUAD_FMT " from " |
1704 | "%u.%u.%u.%u, on dev %s\n", | 1734 | NIPQUAD_FMT", on dev %s\n", |
1705 | NIPQUAD(daddr), NIPQUAD(saddr), dev->name); | 1735 | NIPQUAD(daddr), NIPQUAD(saddr), dev->name); |
1706 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { | 1736 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { |
1707 | int i; | 1737 | int i; |
@@ -1718,11 +1748,11 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1718 | #endif | 1748 | #endif |
1719 | } | 1749 | } |
1720 | 1750 | ||
1721 | static inline int __mkroute_input(struct sk_buff *skb, | 1751 | static int __mkroute_input(struct sk_buff *skb, |
1722 | struct fib_result* res, | 1752 | struct fib_result *res, |
1723 | struct in_device *in_dev, | 1753 | struct in_device *in_dev, |
1724 | __be32 daddr, __be32 saddr, u32 tos, | 1754 | __be32 daddr, __be32 saddr, u32 tos, |
1725 | struct rtable **result) | 1755 | struct rtable **result) |
1726 | { | 1756 | { |
1727 | 1757 | ||
1728 | struct rtable *rth; | 1758 | struct rtable *rth; |
@@ -1814,11 +1844,11 @@ static inline int __mkroute_input(struct sk_buff *skb, | |||
1814 | return err; | 1844 | return err; |
1815 | } | 1845 | } |
1816 | 1846 | ||
1817 | static inline int ip_mkroute_input(struct sk_buff *skb, | 1847 | static int ip_mkroute_input(struct sk_buff *skb, |
1818 | struct fib_result* res, | 1848 | struct fib_result *res, |
1819 | const struct flowi *fl, | 1849 | const struct flowi *fl, |
1820 | struct in_device *in_dev, | 1850 | struct in_device *in_dev, |
1821 | __be32 daddr, __be32 saddr, u32 tos) | 1851 | __be32 daddr, __be32 saddr, u32 tos) |
1822 | { | 1852 | { |
1823 | struct rtable* rth = NULL; | 1853 | struct rtable* rth = NULL; |
1824 | int err; | 1854 | int err; |
@@ -1836,7 +1866,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, | |||
1836 | 1866 | ||
1837 | /* put it into the cache */ | 1867 | /* put it into the cache */ |
1838 | hash = rt_hash(daddr, saddr, fl->iif); | 1868 | hash = rt_hash(daddr, saddr, fl->iif); |
1839 | return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); | 1869 | return rt_intern_hash(hash, rth, &skb->rtable); |
1840 | } | 1870 | } |
1841 | 1871 | ||
1842 | /* | 1872 | /* |
@@ -1869,7 +1899,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1869 | __be32 spec_dst; | 1899 | __be32 spec_dst; |
1870 | int err = -EINVAL; | 1900 | int err = -EINVAL; |
1871 | int free_res = 0; | 1901 | int free_res = 0; |
1872 | struct net * net = dev->nd_net; | 1902 | struct net * net = dev_net(dev); |
1873 | 1903 | ||
1874 | /* IP on this device is disabled. */ | 1904 | /* IP on this device is disabled. */ |
1875 | 1905 | ||
@@ -1992,7 +2022,7 @@ local_input: | |||
1992 | } | 2022 | } |
1993 | rth->rt_type = res.type; | 2023 | rth->rt_type = res.type; |
1994 | hash = rt_hash(daddr, saddr, fl.iif); | 2024 | hash = rt_hash(daddr, saddr, fl.iif); |
1995 | err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); | 2025 | err = rt_intern_hash(hash, rth, &skb->rtable); |
1996 | goto done; | 2026 | goto done; |
1997 | 2027 | ||
1998 | no_route: | 2028 | no_route: |
@@ -2010,8 +2040,8 @@ martian_destination: | |||
2010 | RT_CACHE_STAT_INC(in_martian_dst); | 2040 | RT_CACHE_STAT_INC(in_martian_dst); |
2011 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 2041 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
2012 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 2042 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) |
2013 | printk(KERN_WARNING "martian destination %u.%u.%u.%u from " | 2043 | printk(KERN_WARNING "martian destination " NIPQUAD_FMT " from " |
2014 | "%u.%u.%u.%u, dev %s\n", | 2044 | NIPQUAD_FMT ", dev %s\n", |
2015 | NIPQUAD(daddr), NIPQUAD(saddr), dev->name); | 2045 | NIPQUAD(daddr), NIPQUAD(saddr), dev->name); |
2016 | #endif | 2046 | #endif |
2017 | 2047 | ||
@@ -2040,25 +2070,25 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2040 | int iif = dev->ifindex; | 2070 | int iif = dev->ifindex; |
2041 | struct net *net; | 2071 | struct net *net; |
2042 | 2072 | ||
2043 | net = dev->nd_net; | 2073 | net = dev_net(dev); |
2044 | tos &= IPTOS_RT_MASK; | 2074 | tos &= IPTOS_RT_MASK; |
2045 | hash = rt_hash(daddr, saddr, iif); | 2075 | hash = rt_hash(daddr, saddr, iif); |
2046 | 2076 | ||
2047 | rcu_read_lock(); | 2077 | rcu_read_lock(); |
2048 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2078 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
2049 | rth = rcu_dereference(rth->u.dst.rt_next)) { | 2079 | rth = rcu_dereference(rth->u.dst.rt_next)) { |
2050 | if (rth->fl.fl4_dst == daddr && | 2080 | if (((rth->fl.fl4_dst ^ daddr) | |
2051 | rth->fl.fl4_src == saddr && | 2081 | (rth->fl.fl4_src ^ saddr) | |
2052 | rth->fl.iif == iif && | 2082 | (rth->fl.iif ^ iif) | |
2053 | rth->fl.oif == 0 && | 2083 | rth->fl.oif | |
2084 | (rth->fl.fl4_tos ^ tos)) == 0 && | ||
2054 | rth->fl.mark == skb->mark && | 2085 | rth->fl.mark == skb->mark && |
2055 | rth->fl.fl4_tos == tos && | 2086 | net_eq(dev_net(rth->u.dst.dev), net) && |
2056 | rth->u.dst.dev->nd_net == net && | ||
2057 | rth->rt_genid == atomic_read(&rt_genid)) { | 2087 | rth->rt_genid == atomic_read(&rt_genid)) { |
2058 | dst_use(&rth->u.dst, jiffies); | 2088 | dst_use(&rth->u.dst, jiffies); |
2059 | RT_CACHE_STAT_INC(in_hit); | 2089 | RT_CACHE_STAT_INC(in_hit); |
2060 | rcu_read_unlock(); | 2090 | rcu_read_unlock(); |
2061 | skb->dst = (struct dst_entry*)rth; | 2091 | skb->rtable = rth; |
2062 | return 0; | 2092 | return 0; |
2063 | } | 2093 | } |
2064 | RT_CACHE_STAT_INC(in_hlist_search); | 2094 | RT_CACHE_STAT_INC(in_hlist_search); |
@@ -2100,12 +2130,12 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2100 | return ip_route_input_slow(skb, daddr, saddr, tos, dev); | 2130 | return ip_route_input_slow(skb, daddr, saddr, tos, dev); |
2101 | } | 2131 | } |
2102 | 2132 | ||
2103 | static inline int __mkroute_output(struct rtable **result, | 2133 | static int __mkroute_output(struct rtable **result, |
2104 | struct fib_result* res, | 2134 | struct fib_result *res, |
2105 | const struct flowi *fl, | 2135 | const struct flowi *fl, |
2106 | const struct flowi *oldflp, | 2136 | const struct flowi *oldflp, |
2107 | struct net_device *dev_out, | 2137 | struct net_device *dev_out, |
2108 | unsigned flags) | 2138 | unsigned flags) |
2109 | { | 2139 | { |
2110 | struct rtable *rth; | 2140 | struct rtable *rth; |
2111 | struct in_device *in_dev; | 2141 | struct in_device *in_dev; |
@@ -2220,12 +2250,12 @@ static inline int __mkroute_output(struct rtable **result, | |||
2220 | return err; | 2250 | return err; |
2221 | } | 2251 | } |
2222 | 2252 | ||
2223 | static inline int ip_mkroute_output(struct rtable **rp, | 2253 | static int ip_mkroute_output(struct rtable **rp, |
2224 | struct fib_result* res, | 2254 | struct fib_result *res, |
2225 | const struct flowi *fl, | 2255 | const struct flowi *fl, |
2226 | const struct flowi *oldflp, | 2256 | const struct flowi *oldflp, |
2227 | struct net_device *dev_out, | 2257 | struct net_device *dev_out, |
2228 | unsigned flags) | 2258 | unsigned flags) |
2229 | { | 2259 | { |
2230 | struct rtable *rth = NULL; | 2260 | struct rtable *rth = NULL; |
2231 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); | 2261 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); |
@@ -2455,7 +2485,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2455 | rth->fl.mark == flp->mark && | 2485 | rth->fl.mark == flp->mark && |
2456 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2486 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & |
2457 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2487 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2458 | rth->u.dst.dev->nd_net == net && | 2488 | net_eq(dev_net(rth->u.dst.dev), net) && |
2459 | rth->rt_genid == atomic_read(&rt_genid)) { | 2489 | rth->rt_genid == atomic_read(&rt_genid)) { |
2460 | dst_use(&rth->u.dst, jiffies); | 2490 | dst_use(&rth->u.dst, jiffies); |
2461 | RT_CACHE_STAT_INC(out_hit); | 2491 | RT_CACHE_STAT_INC(out_hit); |
@@ -2487,7 +2517,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2487 | }; | 2517 | }; |
2488 | 2518 | ||
2489 | 2519 | ||
2490 | static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk) | 2520 | static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) |
2491 | { | 2521 | { |
2492 | struct rtable *ort = *rp; | 2522 | struct rtable *ort = *rp; |
2493 | struct rtable *rt = (struct rtable *) | 2523 | struct rtable *rt = (struct rtable *) |
@@ -2547,7 +2577,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | |||
2547 | err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, | 2577 | err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, |
2548 | flags ? XFRM_LOOKUP_WAIT : 0); | 2578 | flags ? XFRM_LOOKUP_WAIT : 0); |
2549 | if (err == -EREMOTE) | 2579 | if (err == -EREMOTE) |
2550 | err = ipv4_dst_blackhole(rp, flp, sk); | 2580 | err = ipv4_dst_blackhole(rp, flp); |
2551 | 2581 | ||
2552 | return err; | 2582 | return err; |
2553 | } | 2583 | } |
@@ -2565,7 +2595,7 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) | |||
2565 | static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | 2595 | static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, |
2566 | int nowait, unsigned int flags) | 2596 | int nowait, unsigned int flags) |
2567 | { | 2597 | { |
2568 | struct rtable *rt = (struct rtable*)skb->dst; | 2598 | struct rtable *rt = skb->rtable; |
2569 | struct rtmsg *r; | 2599 | struct rtmsg *r; |
2570 | struct nlmsghdr *nlh; | 2600 | struct nlmsghdr *nlh; |
2571 | long expires; | 2601 | long expires; |
@@ -2658,7 +2688,7 @@ nla_put_failure: | |||
2658 | 2688 | ||
2659 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) | 2689 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) |
2660 | { | 2690 | { |
2661 | struct net *net = in_skb->sk->sk_net; | 2691 | struct net *net = sock_net(in_skb->sk); |
2662 | struct rtmsg *rtm; | 2692 | struct rtmsg *rtm; |
2663 | struct nlattr *tb[RTA_MAX+1]; | 2693 | struct nlattr *tb[RTA_MAX+1]; |
2664 | struct rtable *rt = NULL; | 2694 | struct rtable *rt = NULL; |
@@ -2668,9 +2698,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2668 | int err; | 2698 | int err; |
2669 | struct sk_buff *skb; | 2699 | struct sk_buff *skb; |
2670 | 2700 | ||
2671 | if (net != &init_net) | ||
2672 | return -EINVAL; | ||
2673 | |||
2674 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); | 2701 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); |
2675 | if (err < 0) | 2702 | if (err < 0) |
2676 | goto errout; | 2703 | goto errout; |
@@ -2700,7 +2727,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2700 | if (iif) { | 2727 | if (iif) { |
2701 | struct net_device *dev; | 2728 | struct net_device *dev; |
2702 | 2729 | ||
2703 | dev = __dev_get_by_index(&init_net, iif); | 2730 | dev = __dev_get_by_index(net, iif); |
2704 | if (dev == NULL) { | 2731 | if (dev == NULL) { |
2705 | err = -ENODEV; | 2732 | err = -ENODEV; |
2706 | goto errout_free; | 2733 | goto errout_free; |
@@ -2712,7 +2739,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2712 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); | 2739 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); |
2713 | local_bh_enable(); | 2740 | local_bh_enable(); |
2714 | 2741 | ||
2715 | rt = (struct rtable*) skb->dst; | 2742 | rt = skb->rtable; |
2716 | if (err == 0 && rt->u.dst.error) | 2743 | if (err == 0 && rt->u.dst.error) |
2717 | err = -rt->u.dst.error; | 2744 | err = -rt->u.dst.error; |
2718 | } else { | 2745 | } else { |
@@ -2726,22 +2753,22 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2726 | }, | 2753 | }, |
2727 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 2754 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
2728 | }; | 2755 | }; |
2729 | err = ip_route_output_key(&init_net, &rt, &fl); | 2756 | err = ip_route_output_key(net, &rt, &fl); |
2730 | } | 2757 | } |
2731 | 2758 | ||
2732 | if (err) | 2759 | if (err) |
2733 | goto errout_free; | 2760 | goto errout_free; |
2734 | 2761 | ||
2735 | skb->dst = &rt->u.dst; | 2762 | skb->rtable = rt; |
2736 | if (rtm->rtm_flags & RTM_F_NOTIFY) | 2763 | if (rtm->rtm_flags & RTM_F_NOTIFY) |
2737 | rt->rt_flags |= RTCF_NOTIFY; | 2764 | rt->rt_flags |= RTCF_NOTIFY; |
2738 | 2765 | ||
2739 | err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, | 2766 | err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, |
2740 | RTM_NEWROUTE, 0, 0); | 2767 | RTM_NEWROUTE, 0, 0); |
2741 | if (err <= 0) | 2768 | if (err <= 0) |
2742 | goto errout_free; | 2769 | goto errout_free; |
2743 | 2770 | ||
2744 | err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); | 2771 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); |
2745 | errout: | 2772 | errout: |
2746 | return err; | 2773 | return err; |
2747 | 2774 | ||
@@ -2755,6 +2782,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2755 | struct rtable *rt; | 2782 | struct rtable *rt; |
2756 | int h, s_h; | 2783 | int h, s_h; |
2757 | int idx, s_idx; | 2784 | int idx, s_idx; |
2785 | struct net *net; | ||
2786 | |||
2787 | net = sock_net(skb->sk); | ||
2758 | 2788 | ||
2759 | s_h = cb->args[0]; | 2789 | s_h = cb->args[0]; |
2760 | if (s_h < 0) | 2790 | if (s_h < 0) |
@@ -2764,7 +2794,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2764 | rcu_read_lock_bh(); | 2794 | rcu_read_lock_bh(); |
2765 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; | 2795 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; |
2766 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 2796 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { |
2767 | if (idx < s_idx) | 2797 | if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) |
2768 | continue; | 2798 | continue; |
2769 | if (rt->rt_genid != atomic_read(&rt_genid)) | 2799 | if (rt->rt_genid != atomic_read(&rt_genid)) |
2770 | continue; | 2800 | continue; |
@@ -3028,7 +3058,9 @@ int __init ip_rt_init(void) | |||
3028 | devinet_init(); | 3058 | devinet_init(); |
3029 | ip_fib_init(); | 3059 | ip_fib_init(); |
3030 | 3060 | ||
3031 | setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); | 3061 | rt_secret_timer.function = rt_secret_rebuild; |
3062 | rt_secret_timer.data = 0; | ||
3063 | init_timer_deferrable(&rt_secret_timer); | ||
3032 | 3064 | ||
3033 | /* All the timers, started at system startup tend | 3065 | /* All the timers, started at system startup tend |
3034 | to synchronize. Perturb it a bit. | 3066 | to synchronize. Perturb it a bit. |
@@ -3040,7 +3072,7 @@ int __init ip_rt_init(void) | |||
3040 | ip_rt_secret_interval; | 3072 | ip_rt_secret_interval; |
3041 | add_timer(&rt_secret_timer); | 3073 | add_timer(&rt_secret_timer); |
3042 | 3074 | ||
3043 | if (ip_rt_proc_init(&init_net)) | 3075 | if (ip_rt_proc_init()) |
3044 | printk(KERN_ERR "Unable to create route proc files\n"); | 3076 | printk(KERN_ERR "Unable to create route proc files\n"); |
3045 | #ifdef CONFIG_XFRM | 3077 | #ifdef CONFIG_XFRM |
3046 | xfrm_init(); | 3078 | xfrm_init(); |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f470fe4511db..73ba98921d64 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -10,8 +10,6 @@ | |||
10 | * 2 of the License, or (at your option) any later version. | 10 | * 2 of the License, or (at your option) any later version. |
11 | * | 11 | * |
12 | * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $ | 12 | * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $ |
13 | * | ||
14 | * Missing: IPv6 support. | ||
15 | */ | 13 | */ |
16 | 14 | ||
17 | #include <linux/tcp.h> | 15 | #include <linux/tcp.h> |
@@ -21,26 +19,33 @@ | |||
21 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
22 | #include <net/tcp.h> | 20 | #include <net/tcp.h> |
23 | 21 | ||
22 | /* Timestamps: lowest 9 bits store TCP options */ | ||
23 | #define TSBITS 9 | ||
24 | #define TSMASK (((__u32)1 << TSBITS) - 1) | ||
25 | |||
24 | extern int sysctl_tcp_syncookies; | 26 | extern int sysctl_tcp_syncookies; |
25 | 27 | ||
26 | static __u32 syncookie_secret[2][16-3+SHA_DIGEST_WORDS]; | 28 | __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; |
29 | EXPORT_SYMBOL(syncookie_secret); | ||
27 | 30 | ||
28 | static __init int init_syncookies(void) | 31 | static __init int init_syncookies(void) |
29 | { | 32 | { |
30 | get_random_bytes(syncookie_secret, sizeof(syncookie_secret)); | 33 | get_random_bytes(syncookie_secret, sizeof(syncookie_secret)); |
31 | return 0; | 34 | return 0; |
32 | } | 35 | } |
33 | module_init(init_syncookies); | 36 | __initcall(init_syncookies); |
34 | 37 | ||
35 | #define COOKIEBITS 24 /* Upper bits store count */ | 38 | #define COOKIEBITS 24 /* Upper bits store count */ |
36 | #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) | 39 | #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) |
37 | 40 | ||
41 | static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS]; | ||
42 | |||
38 | static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, | 43 | static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, |
39 | u32 count, int c) | 44 | u32 count, int c) |
40 | { | 45 | { |
41 | __u32 tmp[16 + 5 + SHA_WORKSPACE_WORDS]; | 46 | __u32 *tmp = __get_cpu_var(cookie_scratch); |
42 | 47 | ||
43 | memcpy(tmp + 3, syncookie_secret[c], sizeof(syncookie_secret[c])); | 48 | memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c])); |
44 | tmp[0] = (__force u32)saddr; | 49 | tmp[0] = (__force u32)saddr; |
45 | tmp[1] = (__force u32)daddr; | 50 | tmp[1] = (__force u32)daddr; |
46 | tmp[2] = ((__force u32)sport << 16) + (__force u32)dport; | 51 | tmp[2] = ((__force u32)sport << 16) + (__force u32)dport; |
@@ -50,6 +55,39 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, | |||
50 | return tmp[17]; | 55 | return tmp[17]; |
51 | } | 56 | } |
52 | 57 | ||
58 | |||
59 | /* | ||
60 | * when syncookies are in effect and tcp timestamps are enabled we encode | ||
61 | * tcp options in the lowest 9 bits of the timestamp value that will be | ||
62 | * sent in the syn-ack. | ||
63 | * Since subsequent timestamps use the normal tcp_time_stamp value, we | ||
64 | * must make sure that the resulting initial timestamp is <= tcp_time_stamp. | ||
65 | */ | ||
66 | __u32 cookie_init_timestamp(struct request_sock *req) | ||
67 | { | ||
68 | struct inet_request_sock *ireq; | ||
69 | u32 ts, ts_now = tcp_time_stamp; | ||
70 | u32 options = 0; | ||
71 | |||
72 | ireq = inet_rsk(req); | ||
73 | if (ireq->wscale_ok) { | ||
74 | options = ireq->snd_wscale; | ||
75 | options |= ireq->rcv_wscale << 4; | ||
76 | } | ||
77 | options |= ireq->sack_ok << 8; | ||
78 | |||
79 | ts = ts_now & ~TSMASK; | ||
80 | ts |= options; | ||
81 | if (ts > ts_now) { | ||
82 | ts >>= TSBITS; | ||
83 | ts--; | ||
84 | ts <<= TSBITS; | ||
85 | ts |= options; | ||
86 | } | ||
87 | return ts; | ||
88 | } | ||
89 | |||
90 | |||
53 | static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, | 91 | static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, |
54 | __be16 dport, __u32 sseq, __u32 count, | 92 | __be16 dport, __u32 sseq, __u32 count, |
55 | __u32 data) | 93 | __u32 data) |
@@ -184,6 +222,35 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, | |||
184 | return child; | 222 | return child; |
185 | } | 223 | } |
186 | 224 | ||
225 | |||
226 | /* | ||
227 | * when syncookies are in effect and tcp timestamps are enabled we stored | ||
228 | * additional tcp options in the timestamp. | ||
229 | * This extracts these options from the timestamp echo. | ||
230 | * | ||
231 | * The lowest 4 bits are for snd_wscale | ||
232 | * The next 4 lsb are for rcv_wscale | ||
233 | * The next lsb is for sack_ok | ||
234 | */ | ||
235 | void cookie_check_timestamp(struct tcp_options_received *tcp_opt) | ||
236 | { | ||
237 | /* echoed timestamp, 9 lowest bits contain options */ | ||
238 | u32 options = tcp_opt->rcv_tsecr & TSMASK; | ||
239 | |||
240 | tcp_opt->snd_wscale = options & 0xf; | ||
241 | options >>= 4; | ||
242 | tcp_opt->rcv_wscale = options & 0xf; | ||
243 | |||
244 | tcp_opt->sack_ok = (options >> 4) & 0x1; | ||
245 | |||
246 | if (tcp_opt->sack_ok) | ||
247 | tcp_sack_reset(tcp_opt); | ||
248 | |||
249 | if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) | ||
250 | tcp_opt->wscale_ok = 1; | ||
251 | } | ||
252 | EXPORT_SYMBOL(cookie_check_timestamp); | ||
253 | |||
187 | struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | 254 | struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, |
188 | struct ip_options *opt) | 255 | struct ip_options *opt) |
189 | { | 256 | { |
@@ -197,6 +264,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
197 | int mss; | 264 | int mss; |
198 | struct rtable *rt; | 265 | struct rtable *rt; |
199 | __u8 rcv_wscale; | 266 | __u8 rcv_wscale; |
267 | struct tcp_options_received tcp_opt; | ||
200 | 268 | ||
201 | if (!sysctl_tcp_syncookies || !th->ack) | 269 | if (!sysctl_tcp_syncookies || !th->ack) |
202 | goto out; | 270 | goto out; |
@@ -209,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
209 | 277 | ||
210 | NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); | 278 | NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); |
211 | 279 | ||
280 | /* check for timestamp cookie support */ | ||
281 | memset(&tcp_opt, 0, sizeof(tcp_opt)); | ||
282 | tcp_parse_options(skb, &tcp_opt, 0); | ||
283 | |||
284 | if (tcp_opt.saw_tstamp) | ||
285 | cookie_check_timestamp(&tcp_opt); | ||
286 | |||
212 | ret = NULL; | 287 | ret = NULL; |
213 | req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ | 288 | req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ |
214 | if (!req) | 289 | if (!req) |
@@ -227,6 +302,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
227 | ireq->loc_addr = ip_hdr(skb)->daddr; | 302 | ireq->loc_addr = ip_hdr(skb)->daddr; |
228 | ireq->rmt_addr = ip_hdr(skb)->saddr; | 303 | ireq->rmt_addr = ip_hdr(skb)->saddr; |
229 | ireq->opt = NULL; | 304 | ireq->opt = NULL; |
305 | ireq->snd_wscale = tcp_opt.snd_wscale; | ||
306 | ireq->rcv_wscale = tcp_opt.rcv_wscale; | ||
307 | ireq->sack_ok = tcp_opt.sack_ok; | ||
308 | ireq->wscale_ok = tcp_opt.wscale_ok; | ||
309 | ireq->tstamp_ok = tcp_opt.saw_tstamp; | ||
310 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; | ||
230 | 311 | ||
231 | /* We throwed the options of the initial SYN away, so we hope | 312 | /* We throwed the options of the initial SYN away, so we hope |
232 | * the ACK carries the same options again (see RFC1122 4.2.3.8) | 313 | * the ACK carries the same options again (see RFC1122 4.2.3.8) |
@@ -241,8 +322,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
241 | } | 322 | } |
242 | } | 323 | } |
243 | 324 | ||
244 | ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0; | ||
245 | ireq->wscale_ok = ireq->sack_ok = 0; | ||
246 | req->expires = 0UL; | 325 | req->expires = 0UL; |
247 | req->retrans = 0; | 326 | req->retrans = 0; |
248 | 327 | ||
@@ -271,11 +350,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
271 | } | 350 | } |
272 | 351 | ||
273 | /* Try to redo what tcp_v4_send_synack did. */ | 352 | /* Try to redo what tcp_v4_send_synack did. */ |
274 | req->window_clamp = dst_metric(&rt->u.dst, RTAX_WINDOW); | 353 | req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); |
354 | |||
275 | tcp_select_initial_window(tcp_full_space(sk), req->mss, | 355 | tcp_select_initial_window(tcp_full_space(sk), req->mss, |
276 | &req->rcv_wnd, &req->window_clamp, | 356 | &req->rcv_wnd, &req->window_clamp, |
277 | 0, &rcv_wscale); | 357 | ireq->wscale_ok, &rcv_wscale); |
278 | /* BTW win scale with syncookies is 0 by definition */ | 358 | |
279 | ireq->rcv_wscale = rcv_wscale; | 359 | ireq->rcv_wscale = rcv_wscale; |
280 | 360 | ||
281 | ret = get_cookie_sock(sk, skb, req, &rt->u.dst); | 361 | ret = get_cookie_sock(sk, skb, req, &rt->u.dst); |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 88286f35d1e2..c437f804ee38 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -404,38 +404,6 @@ static struct ctl_table ipv4_table[] = { | |||
404 | .strategy = &ipv4_sysctl_local_port_range, | 404 | .strategy = &ipv4_sysctl_local_port_range, |
405 | }, | 405 | }, |
406 | { | 406 | { |
407 | .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, | ||
408 | .procname = "icmp_echo_ignore_all", | ||
409 | .data = &sysctl_icmp_echo_ignore_all, | ||
410 | .maxlen = sizeof(int), | ||
411 | .mode = 0644, | ||
412 | .proc_handler = &proc_dointvec | ||
413 | }, | ||
414 | { | ||
415 | .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, | ||
416 | .procname = "icmp_echo_ignore_broadcasts", | ||
417 | .data = &sysctl_icmp_echo_ignore_broadcasts, | ||
418 | .maxlen = sizeof(int), | ||
419 | .mode = 0644, | ||
420 | .proc_handler = &proc_dointvec | ||
421 | }, | ||
422 | { | ||
423 | .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, | ||
424 | .procname = "icmp_ignore_bogus_error_responses", | ||
425 | .data = &sysctl_icmp_ignore_bogus_error_responses, | ||
426 | .maxlen = sizeof(int), | ||
427 | .mode = 0644, | ||
428 | .proc_handler = &proc_dointvec | ||
429 | }, | ||
430 | { | ||
431 | .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, | ||
432 | .procname = "icmp_errors_use_inbound_ifaddr", | ||
433 | .data = &sysctl_icmp_errors_use_inbound_ifaddr, | ||
434 | .maxlen = sizeof(int), | ||
435 | .mode = 0644, | ||
436 | .proc_handler = &proc_dointvec | ||
437 | }, | ||
438 | { | ||
439 | .ctl_name = NET_IPV4_ROUTE, | 407 | .ctl_name = NET_IPV4_ROUTE, |
440 | .procname = "route", | 408 | .procname = "route", |
441 | .maxlen = 0, | 409 | .maxlen = 0, |
@@ -586,22 +554,6 @@ static struct ctl_table ipv4_table[] = { | |||
586 | .proc_handler = &proc_dointvec | 554 | .proc_handler = &proc_dointvec |
587 | }, | 555 | }, |
588 | { | 556 | { |
589 | .ctl_name = NET_IPV4_ICMP_RATELIMIT, | ||
590 | .procname = "icmp_ratelimit", | ||
591 | .data = &sysctl_icmp_ratelimit, | ||
592 | .maxlen = sizeof(int), | ||
593 | .mode = 0644, | ||
594 | .proc_handler = &proc_dointvec | ||
595 | }, | ||
596 | { | ||
597 | .ctl_name = NET_IPV4_ICMP_RATEMASK, | ||
598 | .procname = "icmp_ratemask", | ||
599 | .data = &sysctl_icmp_ratemask, | ||
600 | .maxlen = sizeof(int), | ||
601 | .mode = 0644, | ||
602 | .proc_handler = &proc_dointvec | ||
603 | }, | ||
604 | { | ||
605 | .ctl_name = NET_TCP_TW_REUSE, | 557 | .ctl_name = NET_TCP_TW_REUSE, |
606 | .procname = "tcp_tw_reuse", | 558 | .procname = "tcp_tw_reuse", |
607 | .data = &sysctl_tcp_tw_reuse, | 559 | .data = &sysctl_tcp_tw_reuse, |
@@ -804,6 +756,58 @@ static struct ctl_table ipv4_table[] = { | |||
804 | { .ctl_name = 0 } | 756 | { .ctl_name = 0 } |
805 | }; | 757 | }; |
806 | 758 | ||
759 | static struct ctl_table ipv4_net_table[] = { | ||
760 | { | ||
761 | .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, | ||
762 | .procname = "icmp_echo_ignore_all", | ||
763 | .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, | ||
764 | .maxlen = sizeof(int), | ||
765 | .mode = 0644, | ||
766 | .proc_handler = &proc_dointvec | ||
767 | }, | ||
768 | { | ||
769 | .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, | ||
770 | .procname = "icmp_echo_ignore_broadcasts", | ||
771 | .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, | ||
772 | .maxlen = sizeof(int), | ||
773 | .mode = 0644, | ||
774 | .proc_handler = &proc_dointvec | ||
775 | }, | ||
776 | { | ||
777 | .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, | ||
778 | .procname = "icmp_ignore_bogus_error_responses", | ||
779 | .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, | ||
780 | .maxlen = sizeof(int), | ||
781 | .mode = 0644, | ||
782 | .proc_handler = &proc_dointvec | ||
783 | }, | ||
784 | { | ||
785 | .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, | ||
786 | .procname = "icmp_errors_use_inbound_ifaddr", | ||
787 | .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, | ||
788 | .maxlen = sizeof(int), | ||
789 | .mode = 0644, | ||
790 | .proc_handler = &proc_dointvec | ||
791 | }, | ||
792 | { | ||
793 | .ctl_name = NET_IPV4_ICMP_RATELIMIT, | ||
794 | .procname = "icmp_ratelimit", | ||
795 | .data = &init_net.ipv4.sysctl_icmp_ratelimit, | ||
796 | .maxlen = sizeof(int), | ||
797 | .mode = 0644, | ||
798 | .proc_handler = &proc_dointvec | ||
799 | }, | ||
800 | { | ||
801 | .ctl_name = NET_IPV4_ICMP_RATEMASK, | ||
802 | .procname = "icmp_ratemask", | ||
803 | .data = &init_net.ipv4.sysctl_icmp_ratemask, | ||
804 | .maxlen = sizeof(int), | ||
805 | .mode = 0644, | ||
806 | .proc_handler = &proc_dointvec | ||
807 | }, | ||
808 | { } | ||
809 | }; | ||
810 | |||
807 | struct ctl_path net_ipv4_ctl_path[] = { | 811 | struct ctl_path net_ipv4_ctl_path[] = { |
808 | { .procname = "net", .ctl_name = CTL_NET, }, | 812 | { .procname = "net", .ctl_name = CTL_NET, }, |
809 | { .procname = "ipv4", .ctl_name = NET_IPV4, }, | 813 | { .procname = "ipv4", .ctl_name = NET_IPV4, }, |
@@ -811,12 +815,72 @@ struct ctl_path net_ipv4_ctl_path[] = { | |||
811 | }; | 815 | }; |
812 | EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); | 816 | EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); |
813 | 817 | ||
818 | static __net_init int ipv4_sysctl_init_net(struct net *net) | ||
819 | { | ||
820 | struct ctl_table *table; | ||
821 | |||
822 | table = ipv4_net_table; | ||
823 | if (net != &init_net) { | ||
824 | table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); | ||
825 | if (table == NULL) | ||
826 | goto err_alloc; | ||
827 | |||
828 | table[0].data = | ||
829 | &net->ipv4.sysctl_icmp_echo_ignore_all; | ||
830 | table[1].data = | ||
831 | &net->ipv4.sysctl_icmp_echo_ignore_broadcasts; | ||
832 | table[2].data = | ||
833 | &net->ipv4.sysctl_icmp_ignore_bogus_error_responses; | ||
834 | table[3].data = | ||
835 | &net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr; | ||
836 | table[4].data = | ||
837 | &net->ipv4.sysctl_icmp_ratelimit; | ||
838 | table[5].data = | ||
839 | &net->ipv4.sysctl_icmp_ratemask; | ||
840 | } | ||
841 | |||
842 | net->ipv4.ipv4_hdr = register_net_sysctl_table(net, | ||
843 | net_ipv4_ctl_path, table); | ||
844 | if (net->ipv4.ipv4_hdr == NULL) | ||
845 | goto err_reg; | ||
846 | |||
847 | return 0; | ||
848 | |||
849 | err_reg: | ||
850 | if (net != &init_net) | ||
851 | kfree(table); | ||
852 | err_alloc: | ||
853 | return -ENOMEM; | ||
854 | } | ||
855 | |||
856 | static __net_exit void ipv4_sysctl_exit_net(struct net *net) | ||
857 | { | ||
858 | struct ctl_table *table; | ||
859 | |||
860 | table = net->ipv4.ipv4_hdr->ctl_table_arg; | ||
861 | unregister_net_sysctl_table(net->ipv4.ipv4_hdr); | ||
862 | kfree(table); | ||
863 | } | ||
864 | |||
865 | static __net_initdata struct pernet_operations ipv4_sysctl_ops = { | ||
866 | .init = ipv4_sysctl_init_net, | ||
867 | .exit = ipv4_sysctl_exit_net, | ||
868 | }; | ||
869 | |||
814 | static __init int sysctl_ipv4_init(void) | 870 | static __init int sysctl_ipv4_init(void) |
815 | { | 871 | { |
816 | struct ctl_table_header *hdr; | 872 | struct ctl_table_header *hdr; |
817 | 873 | ||
818 | hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); | 874 | hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); |
819 | return hdr == NULL ? -ENOMEM : 0; | 875 | if (hdr == NULL) |
876 | return -ENOMEM; | ||
877 | |||
878 | if (register_pernet_subsys(&ipv4_sysctl_ops)) { | ||
879 | unregister_sysctl_table(hdr); | ||
880 | return -ENOMEM; | ||
881 | } | ||
882 | |||
883 | return 0; | ||
820 | } | 884 | } |
821 | 885 | ||
822 | __initcall(sysctl_ipv4_init); | 886 | __initcall(sysctl_ipv4_init); |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 39b629ac2404..58ac838bf460 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2105,15 +2105,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2105 | break; | 2105 | break; |
2106 | 2106 | ||
2107 | case TCP_DEFER_ACCEPT: | 2107 | case TCP_DEFER_ACCEPT: |
2108 | icsk->icsk_accept_queue.rskq_defer_accept = 0; | 2108 | if (val < 0) { |
2109 | if (val > 0) { | 2109 | err = -EINVAL; |
2110 | /* Translate value in seconds to number of | 2110 | } else { |
2111 | * retransmits */ | 2111 | if (val > MAX_TCP_ACCEPT_DEFERRED) |
2112 | while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && | 2112 | val = MAX_TCP_ACCEPT_DEFERRED; |
2113 | val > ((TCP_TIMEOUT_INIT / HZ) << | 2113 | icsk->icsk_accept_queue.rskq_defer_accept = val; |
2114 | icsk->icsk_accept_queue.rskq_defer_accept)) | ||
2115 | icsk->icsk_accept_queue.rskq_defer_accept++; | ||
2116 | icsk->icsk_accept_queue.rskq_defer_accept++; | ||
2117 | } | 2114 | } |
2118 | break; | 2115 | break; |
2119 | 2116 | ||
@@ -2295,8 +2292,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2295 | val = (val ? : sysctl_tcp_fin_timeout) / HZ; | 2292 | val = (val ? : sysctl_tcp_fin_timeout) / HZ; |
2296 | break; | 2293 | break; |
2297 | case TCP_DEFER_ACCEPT: | 2294 | case TCP_DEFER_ACCEPT: |
2298 | val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : | 2295 | val = icsk->icsk_accept_queue.rskq_defer_accept; |
2299 | ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); | ||
2300 | break; | 2296 | break; |
2301 | case TCP_WINDOW_CLAMP: | 2297 | case TCP_WINDOW_CLAMP: |
2302 | val = tp->window_clamp; | 2298 | val = tp->window_clamp; |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 3aa0b23c1ea0..eb5b9854c8c7 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -1,12 +1,13 @@ | |||
1 | /* | 1 | /* |
2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.1 | 2 | * TCP CUBIC: Binary Increase Congestion control for TCP v2.2 |
3 | * | 3 | * Home page: |
4 | * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC | ||
4 | * This is from the implementation of CUBIC TCP in | 5 | * This is from the implementation of CUBIC TCP in |
5 | * Injong Rhee, Lisong Xu. | 6 | * Injong Rhee, Lisong Xu. |
6 | * "CUBIC: A New TCP-Friendly High-Speed TCP Variant | 7 | * "CUBIC: A New TCP-Friendly High-Speed TCP Variant |
7 | * in PFLDnet 2005 | 8 | * in PFLDnet 2005 |
8 | * Available from: | 9 | * Available from: |
9 | * http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf | 10 | * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf |
10 | * | 11 | * |
11 | * Unless CUBIC is enabled and congestion window is large | 12 | * Unless CUBIC is enabled and congestion window is large |
12 | * this behaves the same as the original Reno. | 13 | * this behaves the same as the original Reno. |
@@ -20,15 +21,10 @@ | |||
20 | #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation | 21 | #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation |
21 | * max_cwnd = snd_cwnd * beta | 22 | * max_cwnd = snd_cwnd * beta |
22 | */ | 23 | */ |
23 | #define BICTCP_B 4 /* | ||
24 | * In binary search, | ||
25 | * go to point (max+min)/N | ||
26 | */ | ||
27 | #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ | 24 | #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ |
28 | 25 | ||
29 | static int fast_convergence __read_mostly = 1; | 26 | static int fast_convergence __read_mostly = 1; |
30 | static int max_increment __read_mostly = 16; | 27 | static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ |
31 | static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */ | ||
32 | static int initial_ssthresh __read_mostly; | 28 | static int initial_ssthresh __read_mostly; |
33 | static int bic_scale __read_mostly = 41; | 29 | static int bic_scale __read_mostly = 41; |
34 | static int tcp_friendliness __read_mostly = 1; | 30 | static int tcp_friendliness __read_mostly = 1; |
@@ -40,9 +36,7 @@ static u64 cube_factor __read_mostly; | |||
40 | /* Note parameters that are used for precomputing scale factors are read-only */ | 36 | /* Note parameters that are used for precomputing scale factors are read-only */ |
41 | module_param(fast_convergence, int, 0644); | 37 | module_param(fast_convergence, int, 0644); |
42 | MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); | 38 | MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); |
43 | module_param(max_increment, int, 0644); | 39 | module_param(beta, int, 0644); |
44 | MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search"); | ||
45 | module_param(beta, int, 0444); | ||
46 | MODULE_PARM_DESC(beta, "beta for multiplicative increase"); | 40 | MODULE_PARM_DESC(beta, "beta for multiplicative increase"); |
47 | module_param(initial_ssthresh, int, 0644); | 41 | module_param(initial_ssthresh, int, 0644); |
48 | MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); | 42 | MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); |
@@ -145,7 +139,7 @@ static u32 cubic_root(u64 a) | |||
145 | static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | 139 | static inline void bictcp_update(struct bictcp *ca, u32 cwnd) |
146 | { | 140 | { |
147 | u64 offs; | 141 | u64 offs; |
148 | u32 delta, t, bic_target, min_cnt, max_cnt; | 142 | u32 delta, t, bic_target, max_cnt; |
149 | 143 | ||
150 | ca->ack_cnt++; /* count the number of ACKs */ | 144 | ca->ack_cnt++; /* count the number of ACKs */ |
151 | 145 | ||
@@ -211,19 +205,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
211 | ca->cnt = 100 * cwnd; /* very small increment*/ | 205 | ca->cnt = 100 * cwnd; /* very small increment*/ |
212 | } | 206 | } |
213 | 207 | ||
214 | if (ca->delay_min > 0) { | ||
215 | /* max increment = Smax * rtt / 0.1 */ | ||
216 | min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min); | ||
217 | |||
218 | /* use concave growth when the target is above the origin */ | ||
219 | if (ca->cnt < min_cnt && t >= ca->bic_K) | ||
220 | ca->cnt = min_cnt; | ||
221 | } | ||
222 | |||
223 | /* slow start and low utilization */ | ||
224 | if (ca->loss_cwnd == 0) /* could be aggressive in slow start */ | ||
225 | ca->cnt = 50; | ||
226 | |||
227 | /* TCP Friendly */ | 208 | /* TCP Friendly */ |
228 | if (tcp_friendliness) { | 209 | if (tcp_friendliness) { |
229 | u32 scale = beta_scale; | 210 | u32 scale = beta_scale; |
@@ -391,4 +372,4 @@ module_exit(cubictcp_unregister); | |||
391 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); | 372 | MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); |
392 | MODULE_LICENSE("GPL"); | 373 | MODULE_LICENSE("GPL"); |
393 | MODULE_DESCRIPTION("CUBIC TCP"); | 374 | MODULE_DESCRIPTION("CUBIC TCP"); |
394 | MODULE_VERSION("2.1"); | 375 | MODULE_VERSION("2.2"); |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bbb7d88a16b4..cdc051bfdb4d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -2309,12 +2309,25 @@ static void DBGUNDO(struct sock *sk, const char *msg) | |||
2309 | struct tcp_sock *tp = tcp_sk(sk); | 2309 | struct tcp_sock *tp = tcp_sk(sk); |
2310 | struct inet_sock *inet = inet_sk(sk); | 2310 | struct inet_sock *inet = inet_sk(sk); |
2311 | 2311 | ||
2312 | printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", | 2312 | if (sk->sk_family == AF_INET) { |
2313 | msg, | 2313 | printk(KERN_DEBUG "Undo %s " NIPQUAD_FMT "/%u c%u l%u ss%u/%u p%u\n", |
2314 | NIPQUAD(inet->daddr), ntohs(inet->dport), | 2314 | msg, |
2315 | tp->snd_cwnd, tcp_left_out(tp), | 2315 | NIPQUAD(inet->daddr), ntohs(inet->dport), |
2316 | tp->snd_ssthresh, tp->prior_ssthresh, | 2316 | tp->snd_cwnd, tcp_left_out(tp), |
2317 | tp->packets_out); | 2317 | tp->snd_ssthresh, tp->prior_ssthresh, |
2318 | tp->packets_out); | ||
2319 | } | ||
2320 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
2321 | else if (sk->sk_family == AF_INET6) { | ||
2322 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
2323 | printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n", | ||
2324 | msg, | ||
2325 | NIP6(np->daddr), ntohs(inet->dport), | ||
2326 | tp->snd_cwnd, tcp_left_out(tp), | ||
2327 | tp->snd_ssthresh, tp->prior_ssthresh, | ||
2328 | tp->packets_out); | ||
2329 | } | ||
2330 | #endif | ||
2318 | } | 2331 | } |
2319 | #else | 2332 | #else |
2320 | #define DBGUNDO(x...) do { } while (0) | 2333 | #define DBGUNDO(x...) do { } while (0) |
@@ -3592,7 +3605,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) | |||
3592 | * cases we should never reach this piece of code. | 3605 | * cases we should never reach this piece of code. |
3593 | */ | 3606 | */ |
3594 | printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", | 3607 | printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", |
3595 | __FUNCTION__, sk->sk_state); | 3608 | __func__, sk->sk_state); |
3596 | break; | 3609 | break; |
3597 | } | 3610 | } |
3598 | 3611 | ||
@@ -4012,7 +4025,7 @@ drop: | |||
4012 | u32 end_seq = TCP_SKB_CB(skb)->end_seq; | 4025 | u32 end_seq = TCP_SKB_CB(skb)->end_seq; |
4013 | 4026 | ||
4014 | if (seq == TCP_SKB_CB(skb1)->end_seq) { | 4027 | if (seq == TCP_SKB_CB(skb1)->end_seq) { |
4015 | __skb_append(skb1, skb, &tp->out_of_order_queue); | 4028 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); |
4016 | 4029 | ||
4017 | if (!tp->rx_opt.num_sacks || | 4030 | if (!tp->rx_opt.num_sacks || |
4018 | tp->selective_acks[0].end_seq != seq) | 4031 | tp->selective_acks[0].end_seq != seq) |
@@ -4508,6 +4521,49 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) | |||
4508 | } | 4521 | } |
4509 | } | 4522 | } |
4510 | 4523 | ||
4524 | static int tcp_defer_accept_check(struct sock *sk) | ||
4525 | { | ||
4526 | struct tcp_sock *tp = tcp_sk(sk); | ||
4527 | |||
4528 | if (tp->defer_tcp_accept.request) { | ||
4529 | int queued_data = tp->rcv_nxt - tp->copied_seq; | ||
4530 | int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ? | ||
4531 | tcp_hdr((struct sk_buff *) | ||
4532 | sk->sk_receive_queue.prev)->fin : 0; | ||
4533 | |||
4534 | if (queued_data && hasfin) | ||
4535 | queued_data--; | ||
4536 | |||
4537 | if (queued_data && | ||
4538 | tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) { | ||
4539 | if (sock_flag(sk, SOCK_KEEPOPEN)) { | ||
4540 | inet_csk_reset_keepalive_timer(sk, | ||
4541 | keepalive_time_when(tp)); | ||
4542 | } else { | ||
4543 | inet_csk_delete_keepalive_timer(sk); | ||
4544 | } | ||
4545 | |||
4546 | inet_csk_reqsk_queue_add( | ||
4547 | tp->defer_tcp_accept.listen_sk, | ||
4548 | tp->defer_tcp_accept.request, | ||
4549 | sk); | ||
4550 | |||
4551 | tp->defer_tcp_accept.listen_sk->sk_data_ready( | ||
4552 | tp->defer_tcp_accept.listen_sk, 0); | ||
4553 | |||
4554 | sock_put(tp->defer_tcp_accept.listen_sk); | ||
4555 | sock_put(sk); | ||
4556 | tp->defer_tcp_accept.listen_sk = NULL; | ||
4557 | tp->defer_tcp_accept.request = NULL; | ||
4558 | } else if (hasfin || | ||
4559 | tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) { | ||
4560 | tcp_reset(sk); | ||
4561 | return -1; | ||
4562 | } | ||
4563 | } | ||
4564 | return 0; | ||
4565 | } | ||
4566 | |||
4511 | static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) | 4567 | static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) |
4512 | { | 4568 | { |
4513 | struct tcp_sock *tp = tcp_sk(sk); | 4569 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -4868,6 +4924,9 @@ step5: | |||
4868 | 4924 | ||
4869 | tcp_data_snd_check(sk); | 4925 | tcp_data_snd_check(sk); |
4870 | tcp_ack_snd_check(sk); | 4926 | tcp_ack_snd_check(sk); |
4927 | |||
4928 | if (tcp_defer_accept_check(sk)) | ||
4929 | return -1; | ||
4871 | return 0; | 4930 | return 0; |
4872 | 4931 | ||
4873 | csum_error: | 4932 | csum_error: |
@@ -5387,6 +5446,7 @@ discard: | |||
5387 | 5446 | ||
5388 | EXPORT_SYMBOL(sysctl_tcp_ecn); | 5447 | EXPORT_SYMBOL(sysctl_tcp_ecn); |
5389 | EXPORT_SYMBOL(sysctl_tcp_reordering); | 5448 | EXPORT_SYMBOL(sysctl_tcp_reordering); |
5449 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | ||
5390 | EXPORT_SYMBOL(tcp_parse_options); | 5450 | EXPORT_SYMBOL(tcp_parse_options); |
5391 | EXPORT_SYMBOL(tcp_rcv_established); | 5451 | EXPORT_SYMBOL(tcp_rcv_established); |
5392 | EXPORT_SYMBOL(tcp_rcv_state_process); | 5452 | EXPORT_SYMBOL(tcp_rcv_state_process); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 00156bf421ca..776615180b93 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -88,9 +88,6 @@ int sysctl_tcp_low_latency __read_mostly; | |||
88 | /* Check TCP sequence numbers in ICMP packets. */ | 88 | /* Check TCP sequence numbers in ICMP packets. */ |
89 | #define ICMP_MIN_LENGTH 8 | 89 | #define ICMP_MIN_LENGTH 8 |
90 | 90 | ||
91 | /* Socket used for sending RSTs */ | ||
92 | static struct socket *tcp_socket __read_mostly; | ||
93 | |||
94 | void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); | 91 | void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); |
95 | 92 | ||
96 | #ifdef CONFIG_TCP_MD5SIG | 93 | #ifdef CONFIG_TCP_MD5SIG |
@@ -353,7 +350,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
353 | return; | 350 | return; |
354 | } | 351 | } |
355 | 352 | ||
356 | sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest, | 353 | sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest, |
357 | iph->saddr, th->source, inet_iif(skb)); | 354 | iph->saddr, th->source, inet_iif(skb)); |
358 | if (!sk) { | 355 | if (!sk) { |
359 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | 356 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); |
@@ -552,7 +549,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
552 | if (th->rst) | 549 | if (th->rst) |
553 | return; | 550 | return; |
554 | 551 | ||
555 | if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) | 552 | if (skb->rtable->rt_type != RTN_LOCAL) |
556 | return; | 553 | return; |
557 | 554 | ||
558 | /* Swap the send and the receive. */ | 555 | /* Swap the send and the receive. */ |
@@ -598,7 +595,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
598 | sizeof(struct tcphdr), IPPROTO_TCP, 0); | 595 | sizeof(struct tcphdr), IPPROTO_TCP, 0); |
599 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; | 596 | arg.csumoffset = offsetof(struct tcphdr, check) / 2; |
600 | 597 | ||
601 | ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); | 598 | ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb, |
599 | &arg, arg.iov[0].iov_len); | ||
602 | 600 | ||
603 | TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); | 601 | TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); |
604 | TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); | 602 | TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); |
@@ -693,7 +691,8 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, | |||
693 | if (twsk) | 691 | if (twsk) |
694 | arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; | 692 | arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; |
695 | 693 | ||
696 | ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); | 694 | ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb, |
695 | &arg, arg.iov[0].iov_len); | ||
697 | 696 | ||
698 | TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); | 697 | TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); |
699 | } | 698 | } |
@@ -723,8 +722,8 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, | |||
723 | * This still operates on a request_sock only, not on a big | 722 | * This still operates on a request_sock only, not on a big |
724 | * socket. | 723 | * socket. |
725 | */ | 724 | */ |
726 | static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | 725 | static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, |
727 | struct dst_entry *dst) | 726 | struct dst_entry *dst) |
728 | { | 727 | { |
729 | const struct inet_request_sock *ireq = inet_rsk(req); | 728 | const struct inet_request_sock *ireq = inet_rsk(req); |
730 | int err = -1; | 729 | int err = -1; |
@@ -732,7 +731,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | |||
732 | 731 | ||
733 | /* First, grab a route. */ | 732 | /* First, grab a route. */ |
734 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) | 733 | if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) |
735 | goto out; | 734 | return -1; |
736 | 735 | ||
737 | skb = tcp_make_synack(sk, dst, req); | 736 | skb = tcp_make_synack(sk, dst, req); |
738 | 737 | ||
@@ -751,11 +750,15 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, | |||
751 | err = net_xmit_eval(err); | 750 | err = net_xmit_eval(err); |
752 | } | 751 | } |
753 | 752 | ||
754 | out: | ||
755 | dst_release(dst); | 753 | dst_release(dst); |
756 | return err; | 754 | return err; |
757 | } | 755 | } |
758 | 756 | ||
757 | static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req) | ||
758 | { | ||
759 | return __tcp_v4_send_synack(sk, req, NULL); | ||
760 | } | ||
761 | |||
759 | /* | 762 | /* |
760 | * IPv4 request_sock destructor. | 763 | * IPv4 request_sock destructor. |
761 | */ | 764 | */ |
@@ -1258,8 +1261,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1258 | #endif | 1261 | #endif |
1259 | 1262 | ||
1260 | /* Never answer to SYNs send to broadcast or multicast */ | 1263 | /* Never answer to SYNs send to broadcast or multicast */ |
1261 | if (((struct rtable *)skb->dst)->rt_flags & | 1264 | if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) |
1262 | (RTCF_BROADCAST | RTCF_MULTICAST)) | ||
1263 | goto drop; | 1265 | goto drop; |
1264 | 1266 | ||
1265 | /* TW buckets are converted to open requests without | 1267 | /* TW buckets are converted to open requests without |
@@ -1297,10 +1299,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1297 | 1299 | ||
1298 | tcp_parse_options(skb, &tmp_opt, 0); | 1300 | tcp_parse_options(skb, &tmp_opt, 0); |
1299 | 1301 | ||
1300 | if (want_cookie) { | 1302 | if (want_cookie && !tmp_opt.saw_tstamp) |
1301 | tcp_clear_options(&tmp_opt); | 1303 | tcp_clear_options(&tmp_opt); |
1302 | tmp_opt.saw_tstamp = 0; | ||
1303 | } | ||
1304 | 1304 | ||
1305 | if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { | 1305 | if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { |
1306 | /* Some OSes (unknown ones, but I see them on web server, which | 1306 | /* Some OSes (unknown ones, but I see them on web server, which |
@@ -1328,6 +1328,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1328 | if (want_cookie) { | 1328 | if (want_cookie) { |
1329 | #ifdef CONFIG_SYN_COOKIES | 1329 | #ifdef CONFIG_SYN_COOKIES |
1330 | syn_flood_warning(skb); | 1330 | syn_flood_warning(skb); |
1331 | req->cookie_ts = tmp_opt.tstamp_ok; | ||
1331 | #endif | 1332 | #endif |
1332 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); | 1333 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); |
1333 | } else if (!isn) { | 1334 | } else if (!isn) { |
@@ -1351,8 +1352,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1351 | (s32)(peer->tcp_ts - req->ts_recent) > | 1352 | (s32)(peer->tcp_ts - req->ts_recent) > |
1352 | TCP_PAWS_WINDOW) { | 1353 | TCP_PAWS_WINDOW) { |
1353 | NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); | 1354 | NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); |
1354 | dst_release(dst); | 1355 | goto drop_and_release; |
1355 | goto drop_and_free; | ||
1356 | } | 1356 | } |
1357 | } | 1357 | } |
1358 | /* Kill the following clause, if you dislike this way. */ | 1358 | /* Kill the following clause, if you dislike this way. */ |
@@ -1369,27 +1369,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1369 | * to the moment of synflood. | 1369 | * to the moment of synflood. |
1370 | */ | 1370 | */ |
1371 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " | 1371 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " |
1372 | "request from %u.%u.%u.%u/%u\n", | 1372 | "request from " NIPQUAD_FMT "/%u\n", |
1373 | NIPQUAD(saddr), | 1373 | NIPQUAD(saddr), |
1374 | ntohs(tcp_hdr(skb)->source)); | 1374 | ntohs(tcp_hdr(skb)->source)); |
1375 | dst_release(dst); | 1375 | goto drop_and_release; |
1376 | goto drop_and_free; | ||
1377 | } | 1376 | } |
1378 | 1377 | ||
1379 | isn = tcp_v4_init_sequence(skb); | 1378 | isn = tcp_v4_init_sequence(skb); |
1380 | } | 1379 | } |
1381 | tcp_rsk(req)->snt_isn = isn; | 1380 | tcp_rsk(req)->snt_isn = isn; |
1382 | 1381 | ||
1383 | if (tcp_v4_send_synack(sk, req, dst)) | 1382 | if (__tcp_v4_send_synack(sk, req, dst) || want_cookie) |
1384 | goto drop_and_free; | 1383 | goto drop_and_free; |
1385 | 1384 | ||
1386 | if (want_cookie) { | 1385 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); |
1387 | reqsk_free(req); | ||
1388 | } else { | ||
1389 | inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); | ||
1390 | } | ||
1391 | return 0; | 1386 | return 0; |
1392 | 1387 | ||
1388 | drop_and_release: | ||
1389 | dst_release(dst); | ||
1393 | drop_and_free: | 1390 | drop_and_free: |
1394 | reqsk_free(req); | 1391 | reqsk_free(req); |
1395 | drop: | 1392 | drop: |
@@ -1487,7 +1484,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1487 | if (req) | 1484 | if (req) |
1488 | return tcp_check_req(sk, skb, req, prev); | 1485 | return tcp_check_req(sk, skb, req, prev); |
1489 | 1486 | ||
1490 | nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr, | 1487 | nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, |
1491 | th->source, iph->daddr, th->dest, inet_iif(skb)); | 1488 | th->source, iph->daddr, th->dest, inet_iif(skb)); |
1492 | 1489 | ||
1493 | if (nsk) { | 1490 | if (nsk) { |
@@ -1645,7 +1642,7 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
1645 | TCP_SKB_CB(skb)->flags = iph->tos; | 1642 | TCP_SKB_CB(skb)->flags = iph->tos; |
1646 | TCP_SKB_CB(skb)->sacked = 0; | 1643 | TCP_SKB_CB(skb)->sacked = 0; |
1647 | 1644 | ||
1648 | sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr, | 1645 | sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, |
1649 | th->source, iph->daddr, th->dest, inet_iif(skb)); | 1646 | th->source, iph->daddr, th->dest, inet_iif(skb)); |
1650 | if (!sk) | 1647 | if (!sk) |
1651 | goto no_tcp_socket; | 1648 | goto no_tcp_socket; |
@@ -1719,7 +1716,7 @@ do_time_wait: | |||
1719 | } | 1716 | } |
1720 | switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { | 1717 | switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { |
1721 | case TCP_TW_SYN: { | 1718 | case TCP_TW_SYN: { |
1722 | struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net, | 1719 | struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), |
1723 | &tcp_hashinfo, | 1720 | &tcp_hashinfo, |
1724 | iph->daddr, th->dest, | 1721 | iph->daddr, th->dest, |
1725 | inet_iif(skb)); | 1722 | inet_iif(skb)); |
@@ -1921,6 +1918,14 @@ int tcp_v4_destroy_sock(struct sock *sk) | |||
1921 | sk->sk_sndmsg_page = NULL; | 1918 | sk->sk_sndmsg_page = NULL; |
1922 | } | 1919 | } |
1923 | 1920 | ||
1921 | if (tp->defer_tcp_accept.request) { | ||
1922 | reqsk_free(tp->defer_tcp_accept.request); | ||
1923 | sock_put(tp->defer_tcp_accept.listen_sk); | ||
1924 | sock_put(sk); | ||
1925 | tp->defer_tcp_accept.listen_sk = NULL; | ||
1926 | tp->defer_tcp_accept.request = NULL; | ||
1927 | } | ||
1928 | |||
1924 | atomic_dec(&tcp_sockets_allocated); | 1929 | atomic_dec(&tcp_sockets_allocated); |
1925 | 1930 | ||
1926 | return 0; | 1931 | return 0; |
@@ -1949,6 +1954,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
1949 | struct hlist_node *node; | 1954 | struct hlist_node *node; |
1950 | struct sock *sk = cur; | 1955 | struct sock *sk = cur; |
1951 | struct tcp_iter_state* st = seq->private; | 1956 | struct tcp_iter_state* st = seq->private; |
1957 | struct net *net = seq_file_net(seq); | ||
1952 | 1958 | ||
1953 | if (!sk) { | 1959 | if (!sk) { |
1954 | st->bucket = 0; | 1960 | st->bucket = 0; |
@@ -1965,7 +1971,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
1965 | req = req->dl_next; | 1971 | req = req->dl_next; |
1966 | while (1) { | 1972 | while (1) { |
1967 | while (req) { | 1973 | while (req) { |
1968 | if (req->rsk_ops->family == st->family) { | 1974 | if (req->rsk_ops->family == st->family && |
1975 | net_eq(sock_net(req->sk), net)) { | ||
1969 | cur = req; | 1976 | cur = req; |
1970 | goto out; | 1977 | goto out; |
1971 | } | 1978 | } |
@@ -1989,7 +1996,7 @@ get_req: | |||
1989 | } | 1996 | } |
1990 | get_sk: | 1997 | get_sk: |
1991 | sk_for_each_from(sk, node) { | 1998 | sk_for_each_from(sk, node) { |
1992 | if (sk->sk_family == st->family) { | 1999 | if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { |
1993 | cur = sk; | 2000 | cur = sk; |
1994 | goto out; | 2001 | goto out; |
1995 | } | 2002 | } |
@@ -2028,6 +2035,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) | |||
2028 | static void *established_get_first(struct seq_file *seq) | 2035 | static void *established_get_first(struct seq_file *seq) |
2029 | { | 2036 | { |
2030 | struct tcp_iter_state* st = seq->private; | 2037 | struct tcp_iter_state* st = seq->private; |
2038 | struct net *net = seq_file_net(seq); | ||
2031 | void *rc = NULL; | 2039 | void *rc = NULL; |
2032 | 2040 | ||
2033 | for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { | 2041 | for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { |
@@ -2038,7 +2046,8 @@ static void *established_get_first(struct seq_file *seq) | |||
2038 | 2046 | ||
2039 | read_lock_bh(lock); | 2047 | read_lock_bh(lock); |
2040 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { | 2048 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { |
2041 | if (sk->sk_family != st->family) { | 2049 | if (sk->sk_family != st->family || |
2050 | !net_eq(sock_net(sk), net)) { | ||
2042 | continue; | 2051 | continue; |
2043 | } | 2052 | } |
2044 | rc = sk; | 2053 | rc = sk; |
@@ -2047,7 +2056,8 @@ static void *established_get_first(struct seq_file *seq) | |||
2047 | st->state = TCP_SEQ_STATE_TIME_WAIT; | 2056 | st->state = TCP_SEQ_STATE_TIME_WAIT; |
2048 | inet_twsk_for_each(tw, node, | 2057 | inet_twsk_for_each(tw, node, |
2049 | &tcp_hashinfo.ehash[st->bucket].twchain) { | 2058 | &tcp_hashinfo.ehash[st->bucket].twchain) { |
2050 | if (tw->tw_family != st->family) { | 2059 | if (tw->tw_family != st->family || |
2060 | !net_eq(twsk_net(tw), net)) { | ||
2051 | continue; | 2061 | continue; |
2052 | } | 2062 | } |
2053 | rc = tw; | 2063 | rc = tw; |
@@ -2066,6 +2076,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) | |||
2066 | struct inet_timewait_sock *tw; | 2076 | struct inet_timewait_sock *tw; |
2067 | struct hlist_node *node; | 2077 | struct hlist_node *node; |
2068 | struct tcp_iter_state* st = seq->private; | 2078 | struct tcp_iter_state* st = seq->private; |
2079 | struct net *net = seq_file_net(seq); | ||
2069 | 2080 | ||
2070 | ++st->num; | 2081 | ++st->num; |
2071 | 2082 | ||
@@ -2073,7 +2084,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) | |||
2073 | tw = cur; | 2084 | tw = cur; |
2074 | tw = tw_next(tw); | 2085 | tw = tw_next(tw); |
2075 | get_tw: | 2086 | get_tw: |
2076 | while (tw && tw->tw_family != st->family) { | 2087 | while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) { |
2077 | tw = tw_next(tw); | 2088 | tw = tw_next(tw); |
2078 | } | 2089 | } |
2079 | if (tw) { | 2090 | if (tw) { |
@@ -2094,7 +2105,7 @@ get_tw: | |||
2094 | sk = sk_next(sk); | 2105 | sk = sk_next(sk); |
2095 | 2106 | ||
2096 | sk_for_each_from(sk, node) { | 2107 | sk_for_each_from(sk, node) { |
2097 | if (sk->sk_family == st->family) | 2108 | if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) |
2098 | goto found; | 2109 | goto found; |
2099 | } | 2110 | } |
2100 | 2111 | ||
@@ -2200,48 +2211,37 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) | |||
2200 | static int tcp_seq_open(struct inode *inode, struct file *file) | 2211 | static int tcp_seq_open(struct inode *inode, struct file *file) |
2201 | { | 2212 | { |
2202 | struct tcp_seq_afinfo *afinfo = PDE(inode)->data; | 2213 | struct tcp_seq_afinfo *afinfo = PDE(inode)->data; |
2203 | struct seq_file *seq; | ||
2204 | struct tcp_iter_state *s; | 2214 | struct tcp_iter_state *s; |
2205 | int rc; | 2215 | int err; |
2206 | 2216 | ||
2207 | if (unlikely(afinfo == NULL)) | 2217 | if (unlikely(afinfo == NULL)) |
2208 | return -EINVAL; | 2218 | return -EINVAL; |
2209 | 2219 | ||
2210 | s = kzalloc(sizeof(*s), GFP_KERNEL); | 2220 | err = seq_open_net(inode, file, &afinfo->seq_ops, |
2211 | if (!s) | 2221 | sizeof(struct tcp_iter_state)); |
2212 | return -ENOMEM; | 2222 | if (err < 0) |
2223 | return err; | ||
2224 | |||
2225 | s = ((struct seq_file *)file->private_data)->private; | ||
2213 | s->family = afinfo->family; | 2226 | s->family = afinfo->family; |
2214 | s->seq_ops.start = tcp_seq_start; | 2227 | return 0; |
2215 | s->seq_ops.next = tcp_seq_next; | ||
2216 | s->seq_ops.show = afinfo->seq_show; | ||
2217 | s->seq_ops.stop = tcp_seq_stop; | ||
2218 | |||
2219 | rc = seq_open(file, &s->seq_ops); | ||
2220 | if (rc) | ||
2221 | goto out_kfree; | ||
2222 | seq = file->private_data; | ||
2223 | seq->private = s; | ||
2224 | out: | ||
2225 | return rc; | ||
2226 | out_kfree: | ||
2227 | kfree(s); | ||
2228 | goto out; | ||
2229 | } | 2228 | } |
2230 | 2229 | ||
2231 | int tcp_proc_register(struct tcp_seq_afinfo *afinfo) | 2230 | int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) |
2232 | { | 2231 | { |
2233 | int rc = 0; | 2232 | int rc = 0; |
2234 | struct proc_dir_entry *p; | 2233 | struct proc_dir_entry *p; |
2235 | 2234 | ||
2236 | if (!afinfo) | 2235 | afinfo->seq_fops.open = tcp_seq_open; |
2237 | return -EINVAL; | 2236 | afinfo->seq_fops.read = seq_read; |
2238 | afinfo->seq_fops->owner = afinfo->owner; | 2237 | afinfo->seq_fops.llseek = seq_lseek; |
2239 | afinfo->seq_fops->open = tcp_seq_open; | 2238 | afinfo->seq_fops.release = seq_release_net; |
2240 | afinfo->seq_fops->read = seq_read; | 2239 | |
2241 | afinfo->seq_fops->llseek = seq_lseek; | 2240 | afinfo->seq_ops.start = tcp_seq_start; |
2242 | afinfo->seq_fops->release = seq_release_private; | 2241 | afinfo->seq_ops.next = tcp_seq_next; |
2242 | afinfo->seq_ops.stop = tcp_seq_stop; | ||
2243 | 2243 | ||
2244 | p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); | 2244 | p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops); |
2245 | if (p) | 2245 | if (p) |
2246 | p->data = afinfo; | 2246 | p->data = afinfo; |
2247 | else | 2247 | else |
@@ -2249,12 +2249,9 @@ int tcp_proc_register(struct tcp_seq_afinfo *afinfo) | |||
2249 | return rc; | 2249 | return rc; |
2250 | } | 2250 | } |
2251 | 2251 | ||
2252 | void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) | 2252 | void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) |
2253 | { | 2253 | { |
2254 | if (!afinfo) | 2254 | proc_net_remove(net, afinfo->name); |
2255 | return; | ||
2256 | proc_net_remove(&init_net, afinfo->name); | ||
2257 | memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); | ||
2258 | } | 2255 | } |
2259 | 2256 | ||
2260 | static void get_openreq4(struct sock *sk, struct request_sock *req, | 2257 | static void get_openreq4(struct sock *sk, struct request_sock *req, |
@@ -2383,28 +2380,43 @@ out: | |||
2383 | return 0; | 2380 | return 0; |
2384 | } | 2381 | } |
2385 | 2382 | ||
2386 | static struct file_operations tcp4_seq_fops; | ||
2387 | static struct tcp_seq_afinfo tcp4_seq_afinfo = { | 2383 | static struct tcp_seq_afinfo tcp4_seq_afinfo = { |
2388 | .owner = THIS_MODULE, | ||
2389 | .name = "tcp", | 2384 | .name = "tcp", |
2390 | .family = AF_INET, | 2385 | .family = AF_INET, |
2391 | .seq_show = tcp4_seq_show, | 2386 | .seq_fops = { |
2392 | .seq_fops = &tcp4_seq_fops, | 2387 | .owner = THIS_MODULE, |
2388 | }, | ||
2389 | .seq_ops = { | ||
2390 | .show = tcp4_seq_show, | ||
2391 | }, | ||
2392 | }; | ||
2393 | |||
2394 | static int tcp4_proc_init_net(struct net *net) | ||
2395 | { | ||
2396 | return tcp_proc_register(net, &tcp4_seq_afinfo); | ||
2397 | } | ||
2398 | |||
2399 | static void tcp4_proc_exit_net(struct net *net) | ||
2400 | { | ||
2401 | tcp_proc_unregister(net, &tcp4_seq_afinfo); | ||
2402 | } | ||
2403 | |||
2404 | static struct pernet_operations tcp4_net_ops = { | ||
2405 | .init = tcp4_proc_init_net, | ||
2406 | .exit = tcp4_proc_exit_net, | ||
2393 | }; | 2407 | }; |
2394 | 2408 | ||
2395 | int __init tcp4_proc_init(void) | 2409 | int __init tcp4_proc_init(void) |
2396 | { | 2410 | { |
2397 | return tcp_proc_register(&tcp4_seq_afinfo); | 2411 | return register_pernet_subsys(&tcp4_net_ops); |
2398 | } | 2412 | } |
2399 | 2413 | ||
2400 | void tcp4_proc_exit(void) | 2414 | void tcp4_proc_exit(void) |
2401 | { | 2415 | { |
2402 | tcp_proc_unregister(&tcp4_seq_afinfo); | 2416 | unregister_pernet_subsys(&tcp4_net_ops); |
2403 | } | 2417 | } |
2404 | #endif /* CONFIG_PROC_FS */ | 2418 | #endif /* CONFIG_PROC_FS */ |
2405 | 2419 | ||
2406 | DEFINE_PROTO_INUSE(tcp) | ||
2407 | |||
2408 | struct proto tcp_prot = { | 2420 | struct proto tcp_prot = { |
2409 | .name = "TCP", | 2421 | .name = "TCP", |
2410 | .owner = THIS_MODULE, | 2422 | .owner = THIS_MODULE, |
@@ -2435,18 +2447,33 @@ struct proto tcp_prot = { | |||
2435 | .obj_size = sizeof(struct tcp_sock), | 2447 | .obj_size = sizeof(struct tcp_sock), |
2436 | .twsk_prot = &tcp_timewait_sock_ops, | 2448 | .twsk_prot = &tcp_timewait_sock_ops, |
2437 | .rsk_prot = &tcp_request_sock_ops, | 2449 | .rsk_prot = &tcp_request_sock_ops, |
2438 | .hashinfo = &tcp_hashinfo, | 2450 | .h.hashinfo = &tcp_hashinfo, |
2439 | #ifdef CONFIG_COMPAT | 2451 | #ifdef CONFIG_COMPAT |
2440 | .compat_setsockopt = compat_tcp_setsockopt, | 2452 | .compat_setsockopt = compat_tcp_setsockopt, |
2441 | .compat_getsockopt = compat_tcp_getsockopt, | 2453 | .compat_getsockopt = compat_tcp_getsockopt, |
2442 | #endif | 2454 | #endif |
2443 | REF_PROTO_INUSE(tcp) | ||
2444 | }; | 2455 | }; |
2445 | 2456 | ||
2446 | void __init tcp_v4_init(struct net_proto_family *ops) | 2457 | |
2458 | static int __net_init tcp_sk_init(struct net *net) | ||
2459 | { | ||
2460 | return inet_ctl_sock_create(&net->ipv4.tcp_sock, | ||
2461 | PF_INET, SOCK_RAW, IPPROTO_TCP, net); | ||
2462 | } | ||
2463 | |||
2464 | static void __net_exit tcp_sk_exit(struct net *net) | ||
2465 | { | ||
2466 | inet_ctl_sock_destroy(net->ipv4.tcp_sock); | ||
2467 | } | ||
2468 | |||
2469 | static struct pernet_operations __net_initdata tcp_sk_ops = { | ||
2470 | .init = tcp_sk_init, | ||
2471 | .exit = tcp_sk_exit, | ||
2472 | }; | ||
2473 | |||
2474 | void __init tcp_v4_init(void) | ||
2447 | { | 2475 | { |
2448 | if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, | 2476 | if (register_pernet_device(&tcp_sk_ops)) |
2449 | IPPROTO_TCP) < 0) | ||
2450 | panic("Failed to create the TCP control socket.\n"); | 2477 | panic("Failed to create the TCP control socket.\n"); |
2451 | } | 2478 | } |
2452 | 2479 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b61b76847ad9..019c8c16e5cc 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -35,6 +35,8 @@ | |||
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; | 37 | int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; |
38 | EXPORT_SYMBOL(sysctl_tcp_syncookies); | ||
39 | |||
38 | int sysctl_tcp_abort_on_overflow __read_mostly; | 40 | int sysctl_tcp_abort_on_overflow __read_mostly; |
39 | 41 | ||
40 | struct inet_timewait_death_row tcp_death_row = { | 42 | struct inet_timewait_death_row tcp_death_row = { |
@@ -536,7 +538,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
536 | * Enforce "SYN-ACK" according to figure 8, figure 6 | 538 | * Enforce "SYN-ACK" according to figure 8, figure 6 |
537 | * of RFC793, fixed by RFC1122. | 539 | * of RFC793, fixed by RFC1122. |
538 | */ | 540 | */ |
539 | req->rsk_ops->rtx_syn_ack(sk, req, NULL); | 541 | req->rsk_ops->rtx_syn_ack(sk, req); |
540 | return NULL; | 542 | return NULL; |
541 | } | 543 | } |
542 | 544 | ||
@@ -569,10 +571,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
569 | does sequence test, SYN is truncated, and thus we consider | 571 | does sequence test, SYN is truncated, and thus we consider |
570 | it a bare ACK. | 572 | it a bare ACK. |
571 | 573 | ||
572 | If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this | 574 | Both ends (listening sockets) accept the new incoming |
573 | bare ACK. Otherwise, we create an established connection. Both | 575 | connection and try to talk to each other. 8-) |
574 | ends (listening sockets) accept the new incoming connection and try | ||
575 | to talk to each other. 8-) | ||
576 | 576 | ||
577 | Note: This case is both harmless, and rare. Possibility is about the | 577 | Note: This case is both harmless, and rare. Possibility is about the |
578 | same as us discovering intelligent life on another plant tomorrow. | 578 | same as us discovering intelligent life on another plant tomorrow. |
@@ -640,13 +640,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
640 | if (!(flg & TCP_FLAG_ACK)) | 640 | if (!(flg & TCP_FLAG_ACK)) |
641 | return NULL; | 641 | return NULL; |
642 | 642 | ||
643 | /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ | ||
644 | if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && | ||
645 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { | ||
646 | inet_rsk(req)->acked = 1; | ||
647 | return NULL; | ||
648 | } | ||
649 | |||
650 | /* OK, ACK is valid, create big socket and | 643 | /* OK, ACK is valid, create big socket and |
651 | * feed this segment to it. It will repeat all | 644 | * feed this segment to it. It will repeat all |
652 | * the tests. THIS SEGMENT MUST MOVE SOCKET TO | 645 | * the tests. THIS SEGMENT MUST MOVE SOCKET TO |
@@ -685,7 +678,24 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, | |||
685 | inet_csk_reqsk_queue_unlink(sk, req, prev); | 678 | inet_csk_reqsk_queue_unlink(sk, req, prev); |
686 | inet_csk_reqsk_queue_removed(sk, req); | 679 | inet_csk_reqsk_queue_removed(sk, req); |
687 | 680 | ||
688 | inet_csk_reqsk_queue_add(sk, req, child); | 681 | if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && |
682 | TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { | ||
683 | |||
684 | /* the accept queue handling is done is est recv slow | ||
685 | * path so lets make sure to start there | ||
686 | */ | ||
687 | tcp_sk(child)->pred_flags = 0; | ||
688 | sock_hold(sk); | ||
689 | sock_hold(child); | ||
690 | tcp_sk(child)->defer_tcp_accept.listen_sk = sk; | ||
691 | tcp_sk(child)->defer_tcp_accept.request = req; | ||
692 | |||
693 | inet_csk_reset_keepalive_timer(child, | ||
694 | inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ); | ||
695 | } else { | ||
696 | inet_csk_reqsk_queue_add(sk, req, child); | ||
697 | } | ||
698 | |||
689 | return child; | 699 | return child; |
690 | 700 | ||
691 | listen_overflow: | 701 | listen_overflow: |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d29ef79c00ca..debf23581606 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -998,7 +998,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
998 | xmit_size_goal = mss_now; | 998 | xmit_size_goal = mss_now; |
999 | 999 | ||
1000 | if (doing_tso) { | 1000 | if (doing_tso) { |
1001 | xmit_size_goal = (65535 - | 1001 | xmit_size_goal = ((sk->sk_gso_max_size - 1) - |
1002 | inet_csk(sk)->icsk_af_ops->net_header_len - | 1002 | inet_csk(sk)->icsk_af_ops->net_header_len - |
1003 | inet_csk(sk)->icsk_ext_hdr_len - | 1003 | inet_csk(sk)->icsk_ext_hdr_len - |
1004 | tp->tcp_header_len); | 1004 | tp->tcp_header_len); |
@@ -1057,7 +1057,7 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, | |||
1057 | 1057 | ||
1058 | needed = min(skb->len, window); | 1058 | needed = min(skb->len, window); |
1059 | 1059 | ||
1060 | if (skb == tcp_write_queue_tail(sk) && cwnd_len <= needed) | 1060 | if (cwnd_len <= needed) |
1061 | return cwnd_len; | 1061 | return cwnd_len; |
1062 | 1062 | ||
1063 | return needed - needed % mss_now; | 1063 | return needed - needed % mss_now; |
@@ -1282,7 +1282,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1282 | limit = min(send_win, cong_win); | 1282 | limit = min(send_win, cong_win); |
1283 | 1283 | ||
1284 | /* If a full-sized TSO skb can be sent, do it. */ | 1284 | /* If a full-sized TSO skb can be sent, do it. */ |
1285 | if (limit >= 65536) | 1285 | if (limit >= sk->sk_gso_max_size) |
1286 | goto send_now; | 1286 | goto send_now; |
1287 | 1287 | ||
1288 | if (sysctl_tcp_tso_win_divisor) { | 1288 | if (sysctl_tcp_tso_win_divisor) { |
@@ -2236,7 +2236,11 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2236 | 2236 | ||
2237 | /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ | 2237 | /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ |
2238 | th->window = htons(min(req->rcv_wnd, 65535U)); | 2238 | th->window = htons(min(req->rcv_wnd, 65535U)); |
2239 | 2239 | #ifdef CONFIG_SYN_COOKIES | |
2240 | if (unlikely(req->cookie_ts)) | ||
2241 | TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); | ||
2242 | else | ||
2243 | #endif | ||
2240 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2244 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2241 | tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, | 2245 | tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, |
2242 | ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, | 2246 | ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, |
@@ -2571,6 +2575,7 @@ void tcp_send_probe0(struct sock *sk) | |||
2571 | } | 2575 | } |
2572 | } | 2576 | } |
2573 | 2577 | ||
2578 | EXPORT_SYMBOL(tcp_select_initial_window); | ||
2574 | EXPORT_SYMBOL(tcp_connect); | 2579 | EXPORT_SYMBOL(tcp_connect); |
2575 | EXPORT_SYMBOL(tcp_make_synack); | 2580 | EXPORT_SYMBOL(tcp_make_synack); |
2576 | EXPORT_SYMBOL(tcp_simple_retransmit); | 2581 | EXPORT_SYMBOL(tcp_simple_retransmit); |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 87dd5bff315f..1c509592574a 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -153,7 +153,7 @@ static int tcpprobe_sprint(char *tbuf, int n) | |||
153 | = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); | 153 | = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); |
154 | 154 | ||
155 | return snprintf(tbuf, n, | 155 | return snprintf(tbuf, n, |
156 | "%lu.%09lu %d.%d.%d.%d:%u %d.%d.%d.%d:%u" | 156 | "%lu.%09lu " NIPQUAD_FMT ":%u " NIPQUAD_FMT ":%u" |
157 | " %d %#x %#x %u %u %u %u\n", | 157 | " %d %#x %#x %u %u %u %u\n", |
158 | (unsigned long) tv.tv_sec, | 158 | (unsigned long) tv.tv_sec, |
159 | (unsigned long) tv.tv_nsec, | 159 | (unsigned long) tv.tv_nsec, |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 803d758a2b12..4de68cf5f2aa 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -299,12 +299,20 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
299 | * we cannot allow such beasts to hang infinitely. | 299 | * we cannot allow such beasts to hang infinitely. |
300 | */ | 300 | */ |
301 | #ifdef TCP_DEBUG | 301 | #ifdef TCP_DEBUG |
302 | if (1) { | 302 | struct inet_sock *inet = inet_sk(sk); |
303 | struct inet_sock *inet = inet_sk(sk); | 303 | if (sk->sk_family == AF_INET) { |
304 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n", | 304 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIPQUAD_FMT ":%u/%u shrinks window %u:%u. Repaired.\n", |
305 | NIPQUAD(inet->daddr), ntohs(inet->dport), | 305 | NIPQUAD(inet->daddr), ntohs(inet->dport), |
306 | inet->num, tp->snd_una, tp->snd_nxt); | 306 | inet->num, tp->snd_una, tp->snd_nxt); |
307 | } | 307 | } |
308 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
309 | else if (sk->sk_family == AF_INET6) { | ||
310 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
311 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIP6_FMT ":%u/%u shrinks window %u:%u. Repaired.\n", | ||
312 | NIP6(np->daddr), ntohs(inet->dport), | ||
313 | inet->num, tp->snd_una, tp->snd_nxt); | ||
314 | } | ||
315 | #endif | ||
308 | #endif | 316 | #endif |
309 | if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { | 317 | if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { |
310 | tcp_write_err(sk); | 318 | tcp_write_err(sk); |
@@ -481,6 +489,11 @@ static void tcp_keepalive_timer (unsigned long data) | |||
481 | goto death; | 489 | goto death; |
482 | } | 490 | } |
483 | 491 | ||
492 | if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) { | ||
493 | tcp_send_active_reset(sk, GFP_ATOMIC); | ||
494 | goto death; | ||
495 | } | ||
496 | |||
484 | if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) | 497 | if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) |
485 | goto out; | 498 | goto out; |
486 | 499 | ||
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index 978b3fd61e65..d3b709a6f264 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c | |||
@@ -136,6 +136,7 @@ static struct net_protocol tunnel4_protocol = { | |||
136 | .handler = tunnel4_rcv, | 136 | .handler = tunnel4_rcv, |
137 | .err_handler = tunnel4_err, | 137 | .err_handler = tunnel4_err, |
138 | .no_policy = 1, | 138 | .no_policy = 1, |
139 | .netns_ok = 1, | ||
139 | }; | 140 | }; |
140 | 141 | ||
141 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 142 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
@@ -143,6 +144,7 @@ static struct net_protocol tunnel64_protocol = { | |||
143 | .handler = tunnel64_rcv, | 144 | .handler = tunnel64_rcv, |
144 | .err_handler = tunnel64_err, | 145 | .err_handler = tunnel64_err, |
145 | .no_policy = 1, | 146 | .no_policy = 1, |
147 | .netns_ok = 1, | ||
146 | }; | 148 | }; |
147 | #endif | 149 | #endif |
148 | 150 | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1704c1474ea1..b053ac795275 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -137,29 +137,28 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, | |||
137 | struct hlist_node *node; | 137 | struct hlist_node *node; |
138 | 138 | ||
139 | sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) | 139 | sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) |
140 | if (sk->sk_net == net && sk->sk_hash == num) | 140 | if (net_eq(sock_net(sk), net) && sk->sk_hash == num) |
141 | return 1; | 141 | return 1; |
142 | return 0; | 142 | return 0; |
143 | } | 143 | } |
144 | 144 | ||
145 | /** | 145 | /** |
146 | * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 | 146 | * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 |
147 | * | 147 | * |
148 | * @sk: socket struct in question | 148 | * @sk: socket struct in question |
149 | * @snum: port number to look up | 149 | * @snum: port number to look up |
150 | * @udptable: hash list table, must be of UDP_HTABLE_SIZE | ||
151 | * @saddr_comp: AF-dependent comparison of bound local IP addresses | 150 | * @saddr_comp: AF-dependent comparison of bound local IP addresses |
152 | */ | 151 | */ |
153 | int __udp_lib_get_port(struct sock *sk, unsigned short snum, | 152 | int udp_lib_get_port(struct sock *sk, unsigned short snum, |
154 | struct hlist_head udptable[], | ||
155 | int (*saddr_comp)(const struct sock *sk1, | 153 | int (*saddr_comp)(const struct sock *sk1, |
156 | const struct sock *sk2 ) ) | 154 | const struct sock *sk2 ) ) |
157 | { | 155 | { |
156 | struct hlist_head *udptable = sk->sk_prot->h.udp_hash; | ||
158 | struct hlist_node *node; | 157 | struct hlist_node *node; |
159 | struct hlist_head *head; | 158 | struct hlist_head *head; |
160 | struct sock *sk2; | 159 | struct sock *sk2; |
161 | int error = 1; | 160 | int error = 1; |
162 | struct net *net = sk->sk_net; | 161 | struct net *net = sock_net(sk); |
163 | 162 | ||
164 | write_lock_bh(&udp_hash_lock); | 163 | write_lock_bh(&udp_hash_lock); |
165 | 164 | ||
@@ -219,7 +218,7 @@ gotit: | |||
219 | sk_for_each(sk2, node, head) | 218 | sk_for_each(sk2, node, head) |
220 | if (sk2->sk_hash == snum && | 219 | if (sk2->sk_hash == snum && |
221 | sk2 != sk && | 220 | sk2 != sk && |
222 | sk2->sk_net == net && | 221 | net_eq(sock_net(sk2), net) && |
223 | (!sk2->sk_reuse || !sk->sk_reuse) && | 222 | (!sk2->sk_reuse || !sk->sk_reuse) && |
224 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if | 223 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if |
225 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 224 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
@@ -232,7 +231,7 @@ gotit: | |||
232 | if (sk_unhashed(sk)) { | 231 | if (sk_unhashed(sk)) { |
233 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; | 232 | head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; |
234 | sk_add_node(sk, head); | 233 | sk_add_node(sk, head); |
235 | sock_prot_inuse_add(sk->sk_prot, 1); | 234 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
236 | } | 235 | } |
237 | error = 0; | 236 | error = 0; |
238 | fail: | 237 | fail: |
@@ -240,13 +239,7 @@ fail: | |||
240 | return error; | 239 | return error; |
241 | } | 240 | } |
242 | 241 | ||
243 | int udp_get_port(struct sock *sk, unsigned short snum, | 242 | static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) |
244 | int (*scmp)(const struct sock *, const struct sock *)) | ||
245 | { | ||
246 | return __udp_lib_get_port(sk, snum, udp_hash, scmp); | ||
247 | } | ||
248 | |||
249 | int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | ||
250 | { | 243 | { |
251 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 244 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
252 | 245 | ||
@@ -255,9 +248,9 @@ int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | |||
255 | inet1->rcv_saddr == inet2->rcv_saddr )); | 248 | inet1->rcv_saddr == inet2->rcv_saddr )); |
256 | } | 249 | } |
257 | 250 | ||
258 | static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) | 251 | int udp_v4_get_port(struct sock *sk, unsigned short snum) |
259 | { | 252 | { |
260 | return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); | 253 | return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); |
261 | } | 254 | } |
262 | 255 | ||
263 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 256 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
@@ -276,7 +269,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
276 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { | 269 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { |
277 | struct inet_sock *inet = inet_sk(sk); | 270 | struct inet_sock *inet = inet_sk(sk); |
278 | 271 | ||
279 | if (sk->sk_net == net && sk->sk_hash == hnum && | 272 | if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && |
280 | !ipv6_only_sock(sk)) { | 273 | !ipv6_only_sock(sk)) { |
281 | int score = (sk->sk_family == PF_INET ? 1 : 0); | 274 | int score = (sk->sk_family == PF_INET ? 1 : 0); |
282 | if (inet->rcv_saddr) { | 275 | if (inet->rcv_saddr) { |
@@ -364,7 +357,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) | |||
364 | int harderr; | 357 | int harderr; |
365 | int err; | 358 | int err; |
366 | 359 | ||
367 | sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, | 360 | sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest, |
368 | iph->saddr, uh->source, skb->dev->ifindex, udptable); | 361 | iph->saddr, uh->source, skb->dev->ifindex, udptable); |
369 | if (sk == NULL) { | 362 | if (sk == NULL) { |
370 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | 363 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); |
@@ -614,7 +607,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
614 | 607 | ||
615 | ipc.oif = sk->sk_bound_dev_if; | 608 | ipc.oif = sk->sk_bound_dev_if; |
616 | if (msg->msg_controllen) { | 609 | if (msg->msg_controllen) { |
617 | err = ip_cmsg_send(msg, &ipc); | 610 | err = ip_cmsg_send(sock_net(sk), msg, &ipc); |
618 | if (err) | 611 | if (err) |
619 | return err; | 612 | return err; |
620 | if (ipc.opt) | 613 | if (ipc.opt) |
@@ -663,7 +656,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
663 | { .sport = inet->sport, | 656 | { .sport = inet->sport, |
664 | .dport = dport } } }; | 657 | .dport = dport } } }; |
665 | security_sk_classify_flow(sk, &fl); | 658 | security_sk_classify_flow(sk, &fl); |
666 | err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); | 659 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); |
667 | if (err) { | 660 | if (err) { |
668 | if (err == -ENETUNREACH) | 661 | if (err == -ENETUNREACH) |
669 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); | 662 | IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); |
@@ -1188,7 +1181,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1188 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | 1181 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) |
1189 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); | 1182 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); |
1190 | 1183 | ||
1191 | sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, | 1184 | sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr, |
1192 | uh->dest, inet_iif(skb), udptable); | 1185 | uh->dest, inet_iif(skb), udptable); |
1193 | 1186 | ||
1194 | if (sk != NULL) { | 1187 | if (sk != NULL) { |
@@ -1228,7 +1221,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1228 | return 0; | 1221 | return 0; |
1229 | 1222 | ||
1230 | short_packet: | 1223 | short_packet: |
1231 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", | 1224 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From " NIPQUAD_FMT ":%u %d/%d to " NIPQUAD_FMT ":%u\n", |
1232 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | 1225 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
1233 | NIPQUAD(saddr), | 1226 | NIPQUAD(saddr), |
1234 | ntohs(uh->source), | 1227 | ntohs(uh->source), |
@@ -1243,7 +1236,7 @@ csum_error: | |||
1243 | * RFC1122: OK. Discards the bad packet silently (as far as | 1236 | * RFC1122: OK. Discards the bad packet silently (as far as |
1244 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). | 1237 | * the network is concerned, anyway) as per 4.1.3.4 (MUST). |
1245 | */ | 1238 | */ |
1246 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", | 1239 | LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From " NIPQUAD_FMT ":%u to " NIPQUAD_FMT ":%u ulen %d\n", |
1247 | proto == IPPROTO_UDPLITE ? "-Lite" : "", | 1240 | proto == IPPROTO_UDPLITE ? "-Lite" : "", |
1248 | NIPQUAD(saddr), | 1241 | NIPQUAD(saddr), |
1249 | ntohs(uh->source), | 1242 | ntohs(uh->source), |
@@ -1474,8 +1467,6 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1474 | 1467 | ||
1475 | } | 1468 | } |
1476 | 1469 | ||
1477 | DEFINE_PROTO_INUSE(udp) | ||
1478 | |||
1479 | struct proto udp_prot = { | 1470 | struct proto udp_prot = { |
1480 | .name = "UDP", | 1471 | .name = "UDP", |
1481 | .owner = THIS_MODULE, | 1472 | .owner = THIS_MODULE, |
@@ -1498,11 +1489,11 @@ struct proto udp_prot = { | |||
1498 | .sysctl_wmem = &sysctl_udp_wmem_min, | 1489 | .sysctl_wmem = &sysctl_udp_wmem_min, |
1499 | .sysctl_rmem = &sysctl_udp_rmem_min, | 1490 | .sysctl_rmem = &sysctl_udp_rmem_min, |
1500 | .obj_size = sizeof(struct udp_sock), | 1491 | .obj_size = sizeof(struct udp_sock), |
1492 | .h.udp_hash = udp_hash, | ||
1501 | #ifdef CONFIG_COMPAT | 1493 | #ifdef CONFIG_COMPAT |
1502 | .compat_setsockopt = compat_udp_setsockopt, | 1494 | .compat_setsockopt = compat_udp_setsockopt, |
1503 | .compat_getsockopt = compat_udp_getsockopt, | 1495 | .compat_getsockopt = compat_udp_getsockopt, |
1504 | #endif | 1496 | #endif |
1505 | REF_PROTO_INUSE(udp) | ||
1506 | }; | 1497 | }; |
1507 | 1498 | ||
1508 | /* ------------------------------------------------------------------------ */ | 1499 | /* ------------------------------------------------------------------------ */ |
@@ -1512,10 +1503,13 @@ static struct sock *udp_get_first(struct seq_file *seq) | |||
1512 | { | 1503 | { |
1513 | struct sock *sk; | 1504 | struct sock *sk; |
1514 | struct udp_iter_state *state = seq->private; | 1505 | struct udp_iter_state *state = seq->private; |
1506 | struct net *net = seq_file_net(seq); | ||
1515 | 1507 | ||
1516 | for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { | 1508 | for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { |
1517 | struct hlist_node *node; | 1509 | struct hlist_node *node; |
1518 | sk_for_each(sk, node, state->hashtable + state->bucket) { | 1510 | sk_for_each(sk, node, state->hashtable + state->bucket) { |
1511 | if (!net_eq(sock_net(sk), net)) | ||
1512 | continue; | ||
1519 | if (sk->sk_family == state->family) | 1513 | if (sk->sk_family == state->family) |
1520 | goto found; | 1514 | goto found; |
1521 | } | 1515 | } |
@@ -1528,12 +1522,13 @@ found: | |||
1528 | static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) | 1522 | static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) |
1529 | { | 1523 | { |
1530 | struct udp_iter_state *state = seq->private; | 1524 | struct udp_iter_state *state = seq->private; |
1525 | struct net *net = seq_file_net(seq); | ||
1531 | 1526 | ||
1532 | do { | 1527 | do { |
1533 | sk = sk_next(sk); | 1528 | sk = sk_next(sk); |
1534 | try_again: | 1529 | try_again: |
1535 | ; | 1530 | ; |
1536 | } while (sk && sk->sk_family != state->family); | 1531 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); |
1537 | 1532 | ||
1538 | if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { | 1533 | if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { |
1539 | sk = sk_head(state->hashtable + state->bucket); | 1534 | sk = sk_head(state->hashtable + state->bucket); |
@@ -1581,47 +1576,36 @@ static void udp_seq_stop(struct seq_file *seq, void *v) | |||
1581 | static int udp_seq_open(struct inode *inode, struct file *file) | 1576 | static int udp_seq_open(struct inode *inode, struct file *file) |
1582 | { | 1577 | { |
1583 | struct udp_seq_afinfo *afinfo = PDE(inode)->data; | 1578 | struct udp_seq_afinfo *afinfo = PDE(inode)->data; |
1584 | struct seq_file *seq; | 1579 | struct udp_iter_state *s; |
1585 | int rc = -ENOMEM; | 1580 | int err; |
1586 | struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); | ||
1587 | 1581 | ||
1588 | if (!s) | 1582 | err = seq_open_net(inode, file, &afinfo->seq_ops, |
1589 | goto out; | 1583 | sizeof(struct udp_iter_state)); |
1584 | if (err < 0) | ||
1585 | return err; | ||
1586 | |||
1587 | s = ((struct seq_file *)file->private_data)->private; | ||
1590 | s->family = afinfo->family; | 1588 | s->family = afinfo->family; |
1591 | s->hashtable = afinfo->hashtable; | 1589 | s->hashtable = afinfo->hashtable; |
1592 | s->seq_ops.start = udp_seq_start; | 1590 | return err; |
1593 | s->seq_ops.next = udp_seq_next; | ||
1594 | s->seq_ops.show = afinfo->seq_show; | ||
1595 | s->seq_ops.stop = udp_seq_stop; | ||
1596 | |||
1597 | rc = seq_open(file, &s->seq_ops); | ||
1598 | if (rc) | ||
1599 | goto out_kfree; | ||
1600 | |||
1601 | seq = file->private_data; | ||
1602 | seq->private = s; | ||
1603 | out: | ||
1604 | return rc; | ||
1605 | out_kfree: | ||
1606 | kfree(s); | ||
1607 | goto out; | ||
1608 | } | 1591 | } |
1609 | 1592 | ||
1610 | /* ------------------------------------------------------------------------ */ | 1593 | /* ------------------------------------------------------------------------ */ |
1611 | int udp_proc_register(struct udp_seq_afinfo *afinfo) | 1594 | int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) |
1612 | { | 1595 | { |
1613 | struct proc_dir_entry *p; | 1596 | struct proc_dir_entry *p; |
1614 | int rc = 0; | 1597 | int rc = 0; |
1615 | 1598 | ||
1616 | if (!afinfo) | 1599 | afinfo->seq_fops.open = udp_seq_open; |
1617 | return -EINVAL; | 1600 | afinfo->seq_fops.read = seq_read; |
1618 | afinfo->seq_fops->owner = afinfo->owner; | 1601 | afinfo->seq_fops.llseek = seq_lseek; |
1619 | afinfo->seq_fops->open = udp_seq_open; | 1602 | afinfo->seq_fops.release = seq_release_net; |
1620 | afinfo->seq_fops->read = seq_read; | 1603 | |
1621 | afinfo->seq_fops->llseek = seq_lseek; | 1604 | afinfo->seq_ops.start = udp_seq_start; |
1622 | afinfo->seq_fops->release = seq_release_private; | 1605 | afinfo->seq_ops.next = udp_seq_next; |
1606 | afinfo->seq_ops.stop = udp_seq_stop; | ||
1623 | 1607 | ||
1624 | p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); | 1608 | p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops); |
1625 | if (p) | 1609 | if (p) |
1626 | p->data = afinfo; | 1610 | p->data = afinfo; |
1627 | else | 1611 | else |
@@ -1629,12 +1613,9 @@ int udp_proc_register(struct udp_seq_afinfo *afinfo) | |||
1629 | return rc; | 1613 | return rc; |
1630 | } | 1614 | } |
1631 | 1615 | ||
1632 | void udp_proc_unregister(struct udp_seq_afinfo *afinfo) | 1616 | void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) |
1633 | { | 1617 | { |
1634 | if (!afinfo) | 1618 | proc_net_remove(net, afinfo->name); |
1635 | return; | ||
1636 | proc_net_remove(&init_net, afinfo->name); | ||
1637 | memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); | ||
1638 | } | 1619 | } |
1639 | 1620 | ||
1640 | /* ------------------------------------------------------------------------ */ | 1621 | /* ------------------------------------------------------------------------ */ |
@@ -1673,24 +1654,41 @@ int udp4_seq_show(struct seq_file *seq, void *v) | |||
1673 | } | 1654 | } |
1674 | 1655 | ||
1675 | /* ------------------------------------------------------------------------ */ | 1656 | /* ------------------------------------------------------------------------ */ |
1676 | static struct file_operations udp4_seq_fops; | ||
1677 | static struct udp_seq_afinfo udp4_seq_afinfo = { | 1657 | static struct udp_seq_afinfo udp4_seq_afinfo = { |
1678 | .owner = THIS_MODULE, | ||
1679 | .name = "udp", | 1658 | .name = "udp", |
1680 | .family = AF_INET, | 1659 | .family = AF_INET, |
1681 | .hashtable = udp_hash, | 1660 | .hashtable = udp_hash, |
1682 | .seq_show = udp4_seq_show, | 1661 | .seq_fops = { |
1683 | .seq_fops = &udp4_seq_fops, | 1662 | .owner = THIS_MODULE, |
1663 | }, | ||
1664 | .seq_ops = { | ||
1665 | .show = udp4_seq_show, | ||
1666 | }, | ||
1667 | }; | ||
1668 | |||
1669 | static int udp4_proc_init_net(struct net *net) | ||
1670 | { | ||
1671 | return udp_proc_register(net, &udp4_seq_afinfo); | ||
1672 | } | ||
1673 | |||
1674 | static void udp4_proc_exit_net(struct net *net) | ||
1675 | { | ||
1676 | udp_proc_unregister(net, &udp4_seq_afinfo); | ||
1677 | } | ||
1678 | |||
1679 | static struct pernet_operations udp4_net_ops = { | ||
1680 | .init = udp4_proc_init_net, | ||
1681 | .exit = udp4_proc_exit_net, | ||
1684 | }; | 1682 | }; |
1685 | 1683 | ||
1686 | int __init udp4_proc_init(void) | 1684 | int __init udp4_proc_init(void) |
1687 | { | 1685 | { |
1688 | return udp_proc_register(&udp4_seq_afinfo); | 1686 | return register_pernet_subsys(&udp4_net_ops); |
1689 | } | 1687 | } |
1690 | 1688 | ||
1691 | void udp4_proc_exit(void) | 1689 | void udp4_proc_exit(void) |
1692 | { | 1690 | { |
1693 | udp_proc_unregister(&udp4_seq_afinfo); | 1691 | unregister_pernet_subsys(&udp4_net_ops); |
1694 | } | 1692 | } |
1695 | #endif /* CONFIG_PROC_FS */ | 1693 | #endif /* CONFIG_PROC_FS */ |
1696 | 1694 | ||
@@ -1717,12 +1715,12 @@ EXPORT_SYMBOL(udp_disconnect); | |||
1717 | EXPORT_SYMBOL(udp_hash); | 1715 | EXPORT_SYMBOL(udp_hash); |
1718 | EXPORT_SYMBOL(udp_hash_lock); | 1716 | EXPORT_SYMBOL(udp_hash_lock); |
1719 | EXPORT_SYMBOL(udp_ioctl); | 1717 | EXPORT_SYMBOL(udp_ioctl); |
1720 | EXPORT_SYMBOL(udp_get_port); | ||
1721 | EXPORT_SYMBOL(udp_prot); | 1718 | EXPORT_SYMBOL(udp_prot); |
1722 | EXPORT_SYMBOL(udp_sendmsg); | 1719 | EXPORT_SYMBOL(udp_sendmsg); |
1723 | EXPORT_SYMBOL(udp_lib_getsockopt); | 1720 | EXPORT_SYMBOL(udp_lib_getsockopt); |
1724 | EXPORT_SYMBOL(udp_lib_setsockopt); | 1721 | EXPORT_SYMBOL(udp_lib_setsockopt); |
1725 | EXPORT_SYMBOL(udp_poll); | 1722 | EXPORT_SYMBOL(udp_poll); |
1723 | EXPORT_SYMBOL(udp_lib_get_port); | ||
1726 | 1724 | ||
1727 | #ifdef CONFIG_PROC_FS | 1725 | #ifdef CONFIG_PROC_FS |
1728 | EXPORT_SYMBOL(udp_proc_register); | 1726 | EXPORT_SYMBOL(udp_proc_register); |
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 6c55828e41ba..7288bf7977fb 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h | |||
@@ -8,11 +8,7 @@ | |||
8 | extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); | 8 | extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); |
9 | extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); | 9 | extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); |
10 | 10 | ||
11 | extern int __udp_lib_get_port(struct sock *sk, unsigned short snum, | 11 | extern int udp_v4_get_port(struct sock *sk, unsigned short snum); |
12 | struct hlist_head udptable[], | ||
13 | int (*)(const struct sock*,const struct sock*)); | ||
14 | extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *); | ||
15 | |||
16 | 12 | ||
17 | extern int udp_setsockopt(struct sock *sk, int level, int optname, | 13 | extern int udp_setsockopt(struct sock *sk, int level, int optname, |
18 | char __user *optval, int optlen); | 14 | char __user *optval, int optlen); |
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 001b881ca36f..72ce26b6c4d3 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c | |||
@@ -17,17 +17,6 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly; | |||
17 | 17 | ||
18 | struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; | 18 | struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; |
19 | 19 | ||
20 | int udplite_get_port(struct sock *sk, unsigned short p, | ||
21 | int (*c)(const struct sock *, const struct sock *)) | ||
22 | { | ||
23 | return __udp_lib_get_port(sk, p, udplite_hash, c); | ||
24 | } | ||
25 | |||
26 | static int udplite_v4_get_port(struct sock *sk, unsigned short snum) | ||
27 | { | ||
28 | return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal); | ||
29 | } | ||
30 | |||
31 | static int udplite_rcv(struct sk_buff *skb) | 20 | static int udplite_rcv(struct sk_buff *skb) |
32 | { | 21 | { |
33 | return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); | 22 | return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); |
@@ -42,10 +31,9 @@ static struct net_protocol udplite_protocol = { | |||
42 | .handler = udplite_rcv, | 31 | .handler = udplite_rcv, |
43 | .err_handler = udplite_err, | 32 | .err_handler = udplite_err, |
44 | .no_policy = 1, | 33 | .no_policy = 1, |
34 | .netns_ok = 1, | ||
45 | }; | 35 | }; |
46 | 36 | ||
47 | DEFINE_PROTO_INUSE(udplite) | ||
48 | |||
49 | struct proto udplite_prot = { | 37 | struct proto udplite_prot = { |
50 | .name = "UDP-Lite", | 38 | .name = "UDP-Lite", |
51 | .owner = THIS_MODULE, | 39 | .owner = THIS_MODULE, |
@@ -63,13 +51,13 @@ struct proto udplite_prot = { | |||
63 | .backlog_rcv = udp_queue_rcv_skb, | 51 | .backlog_rcv = udp_queue_rcv_skb, |
64 | .hash = udp_lib_hash, | 52 | .hash = udp_lib_hash, |
65 | .unhash = udp_lib_unhash, | 53 | .unhash = udp_lib_unhash, |
66 | .get_port = udplite_v4_get_port, | 54 | .get_port = udp_v4_get_port, |
67 | .obj_size = sizeof(struct udp_sock), | 55 | .obj_size = sizeof(struct udp_sock), |
56 | .h.udp_hash = udplite_hash, | ||
68 | #ifdef CONFIG_COMPAT | 57 | #ifdef CONFIG_COMPAT |
69 | .compat_setsockopt = compat_udp_setsockopt, | 58 | .compat_setsockopt = compat_udp_setsockopt, |
70 | .compat_getsockopt = compat_udp_getsockopt, | 59 | .compat_getsockopt = compat_udp_getsockopt, |
71 | #endif | 60 | #endif |
72 | REF_PROTO_INUSE(udplite) | ||
73 | }; | 61 | }; |
74 | 62 | ||
75 | static struct inet_protosw udplite4_protosw = { | 63 | static struct inet_protosw udplite4_protosw = { |
@@ -83,15 +71,42 @@ static struct inet_protosw udplite4_protosw = { | |||
83 | }; | 71 | }; |
84 | 72 | ||
85 | #ifdef CONFIG_PROC_FS | 73 | #ifdef CONFIG_PROC_FS |
86 | static struct file_operations udplite4_seq_fops; | ||
87 | static struct udp_seq_afinfo udplite4_seq_afinfo = { | 74 | static struct udp_seq_afinfo udplite4_seq_afinfo = { |
88 | .owner = THIS_MODULE, | ||
89 | .name = "udplite", | 75 | .name = "udplite", |
90 | .family = AF_INET, | 76 | .family = AF_INET, |
91 | .hashtable = udplite_hash, | 77 | .hashtable = udplite_hash, |
92 | .seq_show = udp4_seq_show, | 78 | .seq_fops = { |
93 | .seq_fops = &udplite4_seq_fops, | 79 | .owner = THIS_MODULE, |
80 | }, | ||
81 | .seq_ops = { | ||
82 | .show = udp4_seq_show, | ||
83 | }, | ||
84 | }; | ||
85 | |||
86 | static int udplite4_proc_init_net(struct net *net) | ||
87 | { | ||
88 | return udp_proc_register(net, &udplite4_seq_afinfo); | ||
89 | } | ||
90 | |||
91 | static void udplite4_proc_exit_net(struct net *net) | ||
92 | { | ||
93 | udp_proc_unregister(net, &udplite4_seq_afinfo); | ||
94 | } | ||
95 | |||
96 | static struct pernet_operations udplite4_net_ops = { | ||
97 | .init = udplite4_proc_init_net, | ||
98 | .exit = udplite4_proc_exit_net, | ||
94 | }; | 99 | }; |
100 | |||
101 | static __init int udplite4_proc_init(void) | ||
102 | { | ||
103 | return register_pernet_subsys(&udplite4_net_ops); | ||
104 | } | ||
105 | #else | ||
106 | static inline int udplite4_proc_init(void) | ||
107 | { | ||
108 | return 0; | ||
109 | } | ||
95 | #endif | 110 | #endif |
96 | 111 | ||
97 | void __init udplite4_register(void) | 112 | void __init udplite4_register(void) |
@@ -104,18 +119,15 @@ void __init udplite4_register(void) | |||
104 | 119 | ||
105 | inet_register_protosw(&udplite4_protosw); | 120 | inet_register_protosw(&udplite4_protosw); |
106 | 121 | ||
107 | #ifdef CONFIG_PROC_FS | 122 | if (udplite4_proc_init()) |
108 | if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */ | 123 | printk(KERN_ERR "%s: Cannot register /proc!\n", __func__); |
109 | printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__); | ||
110 | #endif | ||
111 | return; | 124 | return; |
112 | 125 | ||
113 | out_unregister_proto: | 126 | out_unregister_proto: |
114 | proto_unregister(&udplite_prot); | 127 | proto_unregister(&udplite_prot); |
115 | out_register_err: | 128 | out_register_err: |
116 | printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __FUNCTION__); | 129 | printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); |
117 | } | 130 | } |
118 | 131 | ||
119 | EXPORT_SYMBOL(udplite_hash); | 132 | EXPORT_SYMBOL(udplite_hash); |
120 | EXPORT_SYMBOL(udplite_prot); | 133 | EXPORT_SYMBOL(udplite_prot); |
121 | EXPORT_SYMBOL(udplite_get_port); | ||
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 10ed70491434..c63de0a72aba 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -221,7 +221,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
221 | xdst = (struct xfrm_dst *)dst; | 221 | xdst = (struct xfrm_dst *)dst; |
222 | if (xdst->u.rt.idev->dev == dev) { | 222 | if (xdst->u.rt.idev->dev == dev) { |
223 | struct in_device *loopback_idev = | 223 | struct in_device *loopback_idev = |
224 | in_dev_get(dev->nd_net->loopback_dev); | 224 | in_dev_get(dev_net(dev)->loopback_dev); |
225 | BUG_ON(!loopback_idev); | 225 | BUG_ON(!loopback_idev); |
226 | 226 | ||
227 | do { | 227 | do { |