aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c87
-rw-r--r--net/ipv4/arp.c9
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c35
-rw-r--r--net/ipv4/fib_frontend.c19
-rw-r--r--net/ipv4/fib_hash.c8
-rw-r--r--net/ipv4/fib_rules.c4
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fib_trie.c30
-rw-r--r--net/ipv4/icmp.c31
-rw-r--r--net/ipv4/igmp.c28
-rw-r--r--net/ipv4/inet_connection_sock.c13
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_hashtables.c23
-rw-r--r--net/ipv4/inet_timewait_sock.c21
-rw-r--r--net/ipv4/inetpeer.c2
-rw-r--r--net/ipv4/ip_forward.c11
-rw-r--r--net/ipv4/ip_fragment.c61
-rw-r--r--net/ipv4/ip_gre.c32
-rw-r--r--net/ipv4/ip_input.c40
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_output.c35
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ipconfig.c6
-rw-r--r--net/ipv4/ipip.c24
-rw-r--r--net/ipv4/ipmr.c125
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c5
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_nq.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_esp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_rr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sched.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sed.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c433
-rw-r--r--net/ipv4/ipvs/ip_vs_wlc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_wrr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c2
-rw-r--r--net/ipv4/netfilter/Kconfig15
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/ip_queue.c5
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/iptable_security.c180
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c4
-rw-r--r--net/ipv4/proc.c113
-rw-r--r--net/ipv4/protocol.c2
-rw-r--r--net/ipv4/raw.c12
-rw-r--r--net/ipv4/route.c262
-rw-r--r--net/ipv4/syncookies.c8
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c102
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_input.c236
-rw-r--r--net/ipv4/tcp_ipv4.c324
-rw-r--r--net/ipv4/tcp_minisocks.c12
-rw-r--r--net/ipv4/tcp_output.c466
-rw-r--r--net/ipv4/tcp_timer.c27
-rw-r--r--net/ipv4/udp.c76
-rw-r--r--net/ipv4/udp_impl.h2
-rw-r--r--net/ipv4/udplite.c3
73 files changed, 1639 insertions, 1361 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 24eca23c2db3..dd919d84285f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * PF_INET protocol family socket handler. 6 * PF_INET protocol family socket handler.
7 * 7 *
8 * Version: $Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de> 10 * Florian La Roche, <flla@stud.uni-sb.de>
@@ -112,12 +110,11 @@
112#include <net/ipip.h> 110#include <net/ipip.h>
113#include <net/inet_common.h> 111#include <net/inet_common.h>
114#include <net/xfrm.h> 112#include <net/xfrm.h>
113#include <net/net_namespace.h>
115#ifdef CONFIG_IP_MROUTE 114#ifdef CONFIG_IP_MROUTE
116#include <linux/mroute.h> 115#include <linux/mroute.h>
117#endif 116#endif
118 117
119DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
120
121extern void ip_mc_drop_socket(struct sock *sk); 118extern void ip_mc_drop_socket(struct sock *sk);
122 119
123/* The inetsw table contains everything that inet_create needs to 120/* The inetsw table contains everything that inet_create needs to
@@ -1341,50 +1338,70 @@ static struct net_protocol icmp_protocol = {
1341 .netns_ok = 1, 1338 .netns_ok = 1,
1342}; 1339};
1343 1340
1344static int __init init_ipv4_mibs(void) 1341static __net_init int ipv4_mib_init_net(struct net *net)
1345{ 1342{
1346 if (snmp_mib_init((void **)net_statistics, 1343 if (snmp_mib_init((void **)net->mib.tcp_statistics,
1347 sizeof(struct linux_mib)) < 0)
1348 goto err_net_mib;
1349 if (snmp_mib_init((void **)ip_statistics,
1350 sizeof(struct ipstats_mib)) < 0)
1351 goto err_ip_mib;
1352 if (snmp_mib_init((void **)icmp_statistics,
1353 sizeof(struct icmp_mib)) < 0)
1354 goto err_icmp_mib;
1355 if (snmp_mib_init((void **)icmpmsg_statistics,
1356 sizeof(struct icmpmsg_mib)) < 0)
1357 goto err_icmpmsg_mib;
1358 if (snmp_mib_init((void **)tcp_statistics,
1359 sizeof(struct tcp_mib)) < 0) 1344 sizeof(struct tcp_mib)) < 0)
1360 goto err_tcp_mib; 1345 goto err_tcp_mib;
1361 if (snmp_mib_init((void **)udp_statistics, 1346 if (snmp_mib_init((void **)net->mib.ip_statistics,
1347 sizeof(struct ipstats_mib)) < 0)
1348 goto err_ip_mib;
1349 if (snmp_mib_init((void **)net->mib.net_statistics,
1350 sizeof(struct linux_mib)) < 0)
1351 goto err_net_mib;
1352 if (snmp_mib_init((void **)net->mib.udp_statistics,
1362 sizeof(struct udp_mib)) < 0) 1353 sizeof(struct udp_mib)) < 0)
1363 goto err_udp_mib; 1354 goto err_udp_mib;
1364 if (snmp_mib_init((void **)udplite_statistics, 1355 if (snmp_mib_init((void **)net->mib.udplite_statistics,
1365 sizeof(struct udp_mib)) < 0) 1356 sizeof(struct udp_mib)) < 0)
1366 goto err_udplite_mib; 1357 goto err_udplite_mib;
1358 if (snmp_mib_init((void **)net->mib.icmp_statistics,
1359 sizeof(struct icmp_mib)) < 0)
1360 goto err_icmp_mib;
1361 if (snmp_mib_init((void **)net->mib.icmpmsg_statistics,
1362 sizeof(struct icmpmsg_mib)) < 0)
1363 goto err_icmpmsg_mib;
1367 1364
1368 tcp_mib_init(); 1365 tcp_mib_init(net);
1369
1370 return 0; 1366 return 0;
1371 1367
1372err_udplite_mib:
1373 snmp_mib_free((void **)udp_statistics);
1374err_udp_mib:
1375 snmp_mib_free((void **)tcp_statistics);
1376err_tcp_mib:
1377 snmp_mib_free((void **)icmpmsg_statistics);
1378err_icmpmsg_mib: 1368err_icmpmsg_mib:
1379 snmp_mib_free((void **)icmp_statistics); 1369 snmp_mib_free((void **)net->mib.icmp_statistics);
1380err_icmp_mib: 1370err_icmp_mib:
1381 snmp_mib_free((void **)ip_statistics); 1371 snmp_mib_free((void **)net->mib.udplite_statistics);
1382err_ip_mib: 1372err_udplite_mib:
1383 snmp_mib_free((void **)net_statistics); 1373 snmp_mib_free((void **)net->mib.udp_statistics);
1374err_udp_mib:
1375 snmp_mib_free((void **)net->mib.net_statistics);
1384err_net_mib: 1376err_net_mib:
1377 snmp_mib_free((void **)net->mib.ip_statistics);
1378err_ip_mib:
1379 snmp_mib_free((void **)net->mib.tcp_statistics);
1380err_tcp_mib:
1385 return -ENOMEM; 1381 return -ENOMEM;
1386} 1382}
1387 1383
1384static __net_exit void ipv4_mib_exit_net(struct net *net)
1385{
1386 snmp_mib_free((void **)net->mib.icmpmsg_statistics);
1387 snmp_mib_free((void **)net->mib.icmp_statistics);
1388 snmp_mib_free((void **)net->mib.udplite_statistics);
1389 snmp_mib_free((void **)net->mib.udp_statistics);
1390 snmp_mib_free((void **)net->mib.net_statistics);
1391 snmp_mib_free((void **)net->mib.ip_statistics);
1392 snmp_mib_free((void **)net->mib.tcp_statistics);
1393}
1394
1395static __net_initdata struct pernet_operations ipv4_mib_ops = {
1396 .init = ipv4_mib_init_net,
1397 .exit = ipv4_mib_exit_net,
1398};
1399
1400static int __init init_ipv4_mibs(void)
1401{
1402 return register_pernet_subsys(&ipv4_mib_ops);
1403}
1404
1388static int ipv4_proc_init(void); 1405static int ipv4_proc_init(void);
1389 1406
1390/* 1407/*
@@ -1481,14 +1498,15 @@ static int __init inet_init(void)
1481 * Initialise the multicast router 1498 * Initialise the multicast router
1482 */ 1499 */
1483#if defined(CONFIG_IP_MROUTE) 1500#if defined(CONFIG_IP_MROUTE)
1484 ip_mr_init(); 1501 if (ip_mr_init())
1502 printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n");
1485#endif 1503#endif
1486 /* 1504 /*
1487 * Initialise per-cpu ipv4 mibs 1505 * Initialise per-cpu ipv4 mibs
1488 */ 1506 */
1489 1507
1490 if (init_ipv4_mibs()) 1508 if (init_ipv4_mibs())
1491 printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; 1509 printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n");
1492 1510
1493 ipv4_proc_init(); 1511 ipv4_proc_init();
1494 1512
@@ -1560,5 +1578,4 @@ EXPORT_SYMBOL(inet_sock_destruct);
1560EXPORT_SYMBOL(inet_stream_connect); 1578EXPORT_SYMBOL(inet_stream_connect);
1561EXPORT_SYMBOL(inet_stream_ops); 1579EXPORT_SYMBOL(inet_stream_ops);
1562EXPORT_SYMBOL(inet_unregister_protosw); 1580EXPORT_SYMBOL(inet_unregister_protosw);
1563EXPORT_SYMBOL(net_statistics);
1564EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); 1581EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9b539fa9fe18..b043eda60b04 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1,7 +1,5 @@
1/* linux/net/ipv4/arp.c 1/* linux/net/ipv4/arp.c
2 * 2 *
3 * Version: $Id: arp.c,v 1.99 2001/08/30 22:55:42 davem Exp $
4 *
5 * Copyright (C) 1994 by Florian La Roche 3 * Copyright (C) 1994 by Florian La Roche
6 * 4 *
7 * This module implements the Address Resolution Protocol ARP (RFC 826), 5 * This module implements the Address Resolution Protocol ARP (RFC 826),
@@ -423,11 +421,12 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
423 struct rtable *rt; 421 struct rtable *rt;
424 int flag = 0; 422 int flag = 0;
425 /*unsigned long now; */ 423 /*unsigned long now; */
424 struct net *net = dev_net(dev);
426 425
427 if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0) 426 if (ip_route_output_key(net, &rt, &fl) < 0)
428 return 1; 427 return 1;
429 if (rt->u.dst.dev != dev) { 428 if (rt->u.dst.dev != dev) {
430 NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); 429 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
431 flag = 1; 430 flag = 1;
432 } 431 }
433 ip_rt_put(rt); 432 ip_rt_put(rt);
@@ -1199,7 +1198,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo
1199 switch (event) { 1198 switch (event) {
1200 case NETDEV_CHANGEADDR: 1199 case NETDEV_CHANGEADDR:
1201 neigh_changeaddr(&arp_tbl, dev); 1200 neigh_changeaddr(&arp_tbl, dev);
1202 rt_cache_flush(0); 1201 rt_cache_flush(dev_net(dev), 0);
1203 break; 1202 break;
1204 default: 1203 default:
1205 break; 1204 break;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 0c0c73f368ce..5e6c5a0f3fde 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -52,7 +52,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
52 inet->sport, usin->sin_port, sk, 1); 52 inet->sport, usin->sin_port, sk, 1);
53 if (err) { 53 if (err) {
54 if (err == -ENETUNREACH) 54 if (err == -ENETUNREACH)
55 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 55 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
56 return err; 56 return err;
57 } 57 }
58 58
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 79a7ef6209ff..2e667e2f90df 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * NET3 IP device support routines. 2 * NET3 IP device support routines.
3 * 3 *
4 * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5 *
6 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 5 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 6 * as published by the Free Software Foundation; either version
@@ -170,6 +168,8 @@ static struct in_device *inetdev_init(struct net_device *dev)
170 in_dev->dev = dev; 168 in_dev->dev = dev;
171 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) 169 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
172 goto out_kfree; 170 goto out_kfree;
171 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
172 dev_disable_lro(dev);
173 /* Reference in_dev->dev */ 173 /* Reference in_dev->dev */
174 dev_hold(dev); 174 dev_hold(dev);
175 /* Account for reference dev->ip_ptr (below) */ 175 /* Account for reference dev->ip_ptr (below) */
@@ -1013,7 +1013,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1013 memcpy(old, ifa->ifa_label, IFNAMSIZ); 1013 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1014 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1014 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1015 if (named++ == 0) 1015 if (named++ == 0)
1016 continue; 1016 goto skip;
1017 dot = strchr(old, ':'); 1017 dot = strchr(old, ':');
1018 if (dot == NULL) { 1018 if (dot == NULL) {
1019 sprintf(old, ":%d", named); 1019 sprintf(old, ":%d", named);
@@ -1024,6 +1024,8 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1024 } else { 1024 } else {
1025 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1025 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1026 } 1026 }
1027skip:
1028 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1027 } 1029 }
1028} 1030}
1029 1031
@@ -1241,6 +1243,8 @@ static void inet_forward_change(struct net *net)
1241 read_lock(&dev_base_lock); 1243 read_lock(&dev_base_lock);
1242 for_each_netdev(net, dev) { 1244 for_each_netdev(net, dev) {
1243 struct in_device *in_dev; 1245 struct in_device *in_dev;
1246 if (on)
1247 dev_disable_lro(dev);
1244 rcu_read_lock(); 1248 rcu_read_lock();
1245 in_dev = __in_dev_get_rcu(dev); 1249 in_dev = __in_dev_get_rcu(dev);
1246 if (in_dev) 1250 if (in_dev)
@@ -1248,8 +1252,6 @@ static void inet_forward_change(struct net *net)
1248 rcu_read_unlock(); 1252 rcu_read_unlock();
1249 } 1253 }
1250 read_unlock(&dev_base_lock); 1254 read_unlock(&dev_base_lock);
1251
1252 rt_cache_flush(0);
1253} 1255}
1254 1256
1255static int devinet_conf_proc(ctl_table *ctl, int write, 1257static int devinet_conf_proc(ctl_table *ctl, int write,
@@ -1335,10 +1337,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
1335 if (write && *valp != val) { 1337 if (write && *valp != val) {
1336 struct net *net = ctl->extra2; 1338 struct net *net = ctl->extra2;
1337 1339
1338 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) 1340 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1339 inet_forward_change(net); 1341 rtnl_lock();
1340 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) 1342 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1341 rt_cache_flush(0); 1343 inet_forward_change(net);
1344 } else if (*valp) {
1345 struct ipv4_devconf *cnf = ctl->extra1;
1346 struct in_device *idev =
1347 container_of(cnf, struct in_device, cnf);
1348 dev_disable_lro(idev->dev);
1349 }
1350 rtnl_unlock();
1351 rt_cache_flush(net, 0);
1352 }
1342 } 1353 }
1343 1354
1344 return ret; 1355 return ret;
@@ -1351,9 +1362,10 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
1351 int *valp = ctl->data; 1362 int *valp = ctl->data;
1352 int val = *valp; 1363 int val = *valp;
1353 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 1364 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1365 struct net *net = ctl->extra2;
1354 1366
1355 if (write && *valp != val) 1367 if (write && *valp != val)
1356 rt_cache_flush(0); 1368 rt_cache_flush(net, 0);
1357 1369
1358 return ret; 1370 return ret;
1359} 1371}
@@ -1364,9 +1376,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1364{ 1376{
1365 int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, 1377 int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1366 newval, newlen); 1378 newval, newlen);
1379 struct net *net = table->extra2;
1367 1380
1368 if (ret == 1) 1381 if (ret == 1)
1369 rt_cache_flush(0); 1382 rt_cache_flush(net, 0);
1370 1383
1371 return ret; 1384 return ret;
1372} 1385}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0b2ac6a3d903..65c1503f8cc8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * IPv4 Forwarding Information Base: FIB frontend. 6 * IPv4 Forwarding Information Base: FIB frontend.
7 * 7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 * 9 *
12 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
@@ -146,7 +144,7 @@ static void fib_flush(struct net *net)
146 } 144 }
147 145
148 if (flushed) 146 if (flushed)
149 rt_cache_flush(-1); 147 rt_cache_flush(net, -1);
150} 148}
151 149
152/* 150/*
@@ -899,21 +897,22 @@ static void fib_disable_ip(struct net_device *dev, int force)
899{ 897{
900 if (fib_sync_down_dev(dev, force)) 898 if (fib_sync_down_dev(dev, force))
901 fib_flush(dev_net(dev)); 899 fib_flush(dev_net(dev));
902 rt_cache_flush(0); 900 rt_cache_flush(dev_net(dev), 0);
903 arp_ifdown(dev); 901 arp_ifdown(dev);
904} 902}
905 903
906static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 904static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
907{ 905{
908 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; 906 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
907 struct net_device *dev = ifa->ifa_dev->dev;
909 908
910 switch (event) { 909 switch (event) {
911 case NETDEV_UP: 910 case NETDEV_UP:
912 fib_add_ifaddr(ifa); 911 fib_add_ifaddr(ifa);
913#ifdef CONFIG_IP_ROUTE_MULTIPATH 912#ifdef CONFIG_IP_ROUTE_MULTIPATH
914 fib_sync_up(ifa->ifa_dev->dev); 913 fib_sync_up(dev);
915#endif 914#endif
916 rt_cache_flush(-1); 915 rt_cache_flush(dev_net(dev), -1);
917 break; 916 break;
918 case NETDEV_DOWN: 917 case NETDEV_DOWN:
919 fib_del_ifaddr(ifa); 918 fib_del_ifaddr(ifa);
@@ -921,9 +920,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
921 /* Last address was deleted from this interface. 920 /* Last address was deleted from this interface.
922 Disable IP. 921 Disable IP.
923 */ 922 */
924 fib_disable_ip(ifa->ifa_dev->dev, 1); 923 fib_disable_ip(dev, 1);
925 } else { 924 } else {
926 rt_cache_flush(-1); 925 rt_cache_flush(dev_net(dev), -1);
927 } 926 }
928 break; 927 break;
929 } 928 }
@@ -951,14 +950,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
951#ifdef CONFIG_IP_ROUTE_MULTIPATH 950#ifdef CONFIG_IP_ROUTE_MULTIPATH
952 fib_sync_up(dev); 951 fib_sync_up(dev);
953#endif 952#endif
954 rt_cache_flush(-1); 953 rt_cache_flush(dev_net(dev), -1);
955 break; 954 break;
956 case NETDEV_DOWN: 955 case NETDEV_DOWN:
957 fib_disable_ip(dev, 0); 956 fib_disable_ip(dev, 0);
958 break; 957 break;
959 case NETDEV_CHANGEMTU: 958 case NETDEV_CHANGEMTU:
960 case NETDEV_CHANGE: 959 case NETDEV_CHANGE:
961 rt_cache_flush(0); 960 rt_cache_flush(dev_net(dev), 0);
962 break; 961 break;
963 } 962 }
964 return NOTIFY_DONE; 963 return NOTIFY_DONE;
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 2e2fc3376ac9..c8cac6c7f881 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * IPv4 FIB: lookup engine and maintenance routines. 6 * IPv4 FIB: lookup engine and maintenance routines.
7 * 7 *
8 * Version: $Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 * 9 *
12 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
@@ -474,7 +472,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
474 472
475 fib_release_info(fi_drop); 473 fib_release_info(fi_drop);
476 if (state & FA_S_ACCESSED) 474 if (state & FA_S_ACCESSED)
477 rt_cache_flush(-1); 475 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
478 rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id, 476 rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id,
479 &cfg->fc_nlinfo, NLM_F_REPLACE); 477 &cfg->fc_nlinfo, NLM_F_REPLACE);
480 return 0; 478 return 0;
@@ -534,7 +532,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
534 532
535 if (new_f) 533 if (new_f)
536 fz->fz_nent++; 534 fz->fz_nent++;
537 rt_cache_flush(-1); 535 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
538 536
539 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, 537 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
540 &cfg->fc_nlinfo, 0); 538 &cfg->fc_nlinfo, 0);
@@ -616,7 +614,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
616 write_unlock_bh(&fib_hash_lock); 614 write_unlock_bh(&fib_hash_lock);
617 615
618 if (fa->fa_state & FA_S_ACCESSED) 616 if (fa->fa_state & FA_S_ACCESSED)
619 rt_cache_flush(-1); 617 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
620 fn_free_alias(fa, f); 618 fn_free_alias(fa, f);
621 if (kill_fn) { 619 if (kill_fn) {
622 fn_free_node(f); 620 fn_free_node(f);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 1fb56876be54..6080d7120821 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -258,9 +258,9 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
258 + nla_total_size(4); /* flow */ 258 + nla_total_size(4); /* flow */
259} 259}
260 260
261static void fib4_rule_flush_cache(void) 261static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
262{ 262{
263 rt_cache_flush(-1); 263 rt_cache_flush(ops->fro_net, -1);
264} 264}
265 265
266static struct fib_rules_ops fib4_rules_ops_template = { 266static struct fib_rules_ops fib4_rules_ops_template = {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0d4d72827e4b..ded2ae34eab1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * IPv4 Forwarding Information Base: semantics. 6 * IPv4 Forwarding Information Base: semantics.
7 * 7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 * 9 *
12 * This program is free software; you can redistribute it and/or 10 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e1600ad8fb0e..5cb72786a8af 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -22,8 +22,6 @@
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 * 24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 * 25 *
28 * Code from fib_hash has been reused which includes the following header: 26 * Code from fib_hash has been reused which includes the following header:
29 * 27 *
@@ -1273,7 +1271,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1273 1271
1274 fib_release_info(fi_drop); 1272 fib_release_info(fi_drop);
1275 if (state & FA_S_ACCESSED) 1273 if (state & FA_S_ACCESSED)
1276 rt_cache_flush(-1); 1274 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
1277 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, 1275 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1278 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); 1276 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
1279 1277
@@ -1318,7 +1316,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1318 list_add_tail_rcu(&new_fa->fa_list, 1316 list_add_tail_rcu(&new_fa->fa_list,
1319 (fa ? &fa->fa_list : fa_head)); 1317 (fa ? &fa->fa_list : fa_head));
1320 1318
1321 rt_cache_flush(-1); 1319 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
1322 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, 1320 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
1323 &cfg->fc_nlinfo, 0); 1321 &cfg->fc_nlinfo, 0);
1324succeeded: 1322succeeded:
@@ -1661,7 +1659,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
1661 trie_leaf_remove(t, l); 1659 trie_leaf_remove(t, l);
1662 1660
1663 if (fa->fa_state & FA_S_ACCESSED) 1661 if (fa->fa_state & FA_S_ACCESSED)
1664 rt_cache_flush(-1); 1662 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
1665 1663
1666 fib_release_info(fa->fa_info); 1664 fib_release_info(fa->fa_info);
1667 alias_free_mem_rcu(fa); 1665 alias_free_mem_rcu(fa);
@@ -2253,25 +2251,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2253 2251
2254static int fib_triestat_seq_open(struct inode *inode, struct file *file) 2252static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2255{ 2253{
2256 int err; 2254 return single_open_net(inode, file, fib_triestat_seq_show);
2257 struct net *net;
2258
2259 net = get_proc_net(inode);
2260 if (net == NULL)
2261 return -ENXIO;
2262 err = single_open(file, fib_triestat_seq_show, net);
2263 if (err < 0) {
2264 put_net(net);
2265 return err;
2266 }
2267 return 0;
2268}
2269
2270static int fib_triestat_seq_release(struct inode *ino, struct file *f)
2271{
2272 struct seq_file *seq = f->private_data;
2273 put_net(seq->private);
2274 return single_release(ino, f);
2275} 2255}
2276 2256
2277static const struct file_operations fib_triestat_fops = { 2257static const struct file_operations fib_triestat_fops = {
@@ -2279,7 +2259,7 @@ static const struct file_operations fib_triestat_fops = {
2279 .open = fib_triestat_seq_open, 2259 .open = fib_triestat_seq_open,
2280 .read = seq_read, 2260 .read = seq_read,
2281 .llseek = seq_lseek, 2261 .llseek = seq_lseek,
2282 .release = fib_triestat_seq_release, 2262 .release = single_release_net,
2283}; 2263};
2284 2264
2285static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 2265static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 87397351ddac..860558633b2c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,8 +3,6 @@
3 * 3 *
4 * Alan Cox, <alan@redhat.com> 4 * Alan Cox, <alan@redhat.com>
5 * 5 *
6 * Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $
7 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
@@ -113,12 +111,6 @@ struct icmp_bxm {
113 unsigned char optbuf[40]; 111 unsigned char optbuf[40];
114}; 112};
115 113
116/*
117 * Statistics
118 */
119DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly;
120DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly;
121
122/* An array of errno for error messages from dest unreach. */ 114/* An array of errno for error messages from dest unreach. */
123/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */ 115/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
124 116
@@ -298,10 +290,10 @@ out:
298/* 290/*
299 * Maintain the counters used in the SNMP statistics for outgoing ICMP 291 * Maintain the counters used in the SNMP statistics for outgoing ICMP
300 */ 292 */
301void icmp_out_count(unsigned char type) 293void icmp_out_count(struct net *net, unsigned char type)
302{ 294{
303 ICMPMSGOUT_INC_STATS(type); 295 ICMPMSGOUT_INC_STATS(net, type);
304 ICMP_INC_STATS(ICMP_MIB_OUTMSGS); 296 ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS);
305} 297}
306 298
307/* 299/*
@@ -765,7 +757,7 @@ static void icmp_unreach(struct sk_buff *skb)
765out: 757out:
766 return; 758 return;
767out_err: 759out_err:
768 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 760 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
769 goto out; 761 goto out;
770} 762}
771 763
@@ -805,7 +797,7 @@ static void icmp_redirect(struct sk_buff *skb)
805out: 797out:
806 return; 798 return;
807out_err: 799out_err:
808 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 800 ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
809 goto out; 801 goto out;
810} 802}
811 803
@@ -876,7 +868,7 @@ static void icmp_timestamp(struct sk_buff *skb)
876out: 868out:
877 return; 869 return;
878out_err: 870out_err:
879 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 871 ICMP_INC_STATS_BH(dev_net(skb->dst->dev), ICMP_MIB_INERRORS);
880 goto out; 872 goto out;
881} 873}
882 874
@@ -975,6 +967,7 @@ int icmp_rcv(struct sk_buff *skb)
975{ 967{
976 struct icmphdr *icmph; 968 struct icmphdr *icmph;
977 struct rtable *rt = skb->rtable; 969 struct rtable *rt = skb->rtable;
970 struct net *net = dev_net(rt->u.dst.dev);
978 971
979 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 972 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
980 int nh; 973 int nh;
@@ -995,7 +988,7 @@ int icmp_rcv(struct sk_buff *skb)
995 skb_set_network_header(skb, nh); 988 skb_set_network_header(skb, nh);
996 } 989 }
997 990
998 ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); 991 ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
999 992
1000 switch (skb->ip_summed) { 993 switch (skb->ip_summed) {
1001 case CHECKSUM_COMPLETE: 994 case CHECKSUM_COMPLETE:
@@ -1013,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb)
1013 1006
1014 icmph = icmp_hdr(skb); 1007 icmph = icmp_hdr(skb);
1015 1008
1016 ICMPMSGIN_INC_STATS_BH(icmph->type); 1009 ICMPMSGIN_INC_STATS_BH(net, icmph->type);
1017 /* 1010 /*
1018 * 18 is the highest 'known' ICMP type. Anything else is a mystery 1011 * 18 is the highest 'known' ICMP type. Anything else is a mystery
1019 * 1012 *
@@ -1029,9 +1022,6 @@ int icmp_rcv(struct sk_buff *skb)
1029 */ 1022 */
1030 1023
1031 if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 1024 if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
1032 struct net *net;
1033
1034 net = dev_net(rt->u.dst.dev);
1035 /* 1025 /*
1036 * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be 1026 * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
1037 * silently ignored (we let user decide with a sysctl). 1027 * silently ignored (we let user decide with a sysctl).
@@ -1057,7 +1047,7 @@ drop:
1057 kfree_skb(skb); 1047 kfree_skb(skb);
1058 return 0; 1048 return 0;
1059error: 1049error:
1060 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 1050 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1061 goto drop; 1051 goto drop;
1062} 1052}
1063 1053
@@ -1217,5 +1207,4 @@ int __init icmp_init(void)
1217 1207
1218EXPORT_SYMBOL(icmp_err_convert); 1208EXPORT_SYMBOL(icmp_err_convert);
1219EXPORT_SYMBOL(icmp_send); 1209EXPORT_SYMBOL(icmp_send);
1220EXPORT_SYMBOL(icmp_statistics);
1221EXPORT_SYMBOL(xrlim_allow); 1210EXPORT_SYMBOL(xrlim_allow);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2769dc4a4c84..6203ece53606 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -8,8 +8,6 @@
8 * the older version didn't come out right using gcc 2.5.8, the newer one 8 * the older version didn't come out right using gcc 2.5.8, the newer one
9 * seems to fall out with gcc 2.6.2. 9 * seems to fall out with gcc 2.6.2.
10 * 10 *
11 * Version: $Id: igmp.c,v 1.47 2002/02/01 22:01:03 davem Exp $
12 *
13 * Authors: 11 * Authors:
14 * Alan Cox <Alan.Cox@linux.org> 12 * Alan Cox <Alan.Cox@linux.org>
15 * 13 *
@@ -1198,7 +1196,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1198 1196
1199 ASSERT_RTNL(); 1197 ASSERT_RTNL();
1200 1198
1201 if (dev_net(in_dev->dev) != &init_net) 1199 if (!net_eq(dev_net(in_dev->dev), &init_net))
1202 return; 1200 return;
1203 1201
1204 for (im=in_dev->mc_list; im; im=im->next) { 1202 for (im=in_dev->mc_list; im; im=im->next) {
@@ -1280,7 +1278,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1280 1278
1281 ASSERT_RTNL(); 1279 ASSERT_RTNL();
1282 1280
1283 if (dev_net(in_dev->dev) != &init_net) 1281 if (!net_eq(dev_net(in_dev->dev), &init_net))
1284 return; 1282 return;
1285 1283
1286 for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { 1284 for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
@@ -1310,7 +1308,7 @@ void ip_mc_down(struct in_device *in_dev)
1310 1308
1311 ASSERT_RTNL(); 1309 ASSERT_RTNL();
1312 1310
1313 if (dev_net(in_dev->dev) != &init_net) 1311 if (!net_eq(dev_net(in_dev->dev), &init_net))
1314 return; 1312 return;
1315 1313
1316 for (i=in_dev->mc_list; i; i=i->next) 1314 for (i=in_dev->mc_list; i; i=i->next)
@@ -1333,7 +1331,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
1333{ 1331{
1334 ASSERT_RTNL(); 1332 ASSERT_RTNL();
1335 1333
1336 if (dev_net(in_dev->dev) != &init_net) 1334 if (!net_eq(dev_net(in_dev->dev), &init_net))
1337 return; 1335 return;
1338 1336
1339 in_dev->mc_tomb = NULL; 1337 in_dev->mc_tomb = NULL;
@@ -1359,7 +1357,7 @@ void ip_mc_up(struct in_device *in_dev)
1359 1357
1360 ASSERT_RTNL(); 1358 ASSERT_RTNL();
1361 1359
1362 if (dev_net(in_dev->dev) != &init_net) 1360 if (!net_eq(dev_net(in_dev->dev), &init_net))
1363 return; 1361 return;
1364 1362
1365 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); 1363 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
@@ -1378,7 +1376,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
1378 1376
1379 ASSERT_RTNL(); 1377 ASSERT_RTNL();
1380 1378
1381 if (dev_net(in_dev->dev) != &init_net) 1379 if (!net_eq(dev_net(in_dev->dev), &init_net))
1382 return; 1380 return;
1383 1381
1384 /* Deactivate timers */ 1382 /* Deactivate timers */
@@ -1762,7 +1760,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1762 if (!ipv4_is_multicast(addr)) 1760 if (!ipv4_is_multicast(addr))
1763 return -EINVAL; 1761 return -EINVAL;
1764 1762
1765 if (sock_net(sk) != &init_net) 1763 if (!net_eq(sock_net(sk), &init_net))
1766 return -EPROTONOSUPPORT; 1764 return -EPROTONOSUPPORT;
1767 1765
1768 rtnl_lock(); 1766 rtnl_lock();
@@ -1833,7 +1831,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1833 u32 ifindex; 1831 u32 ifindex;
1834 int ret = -EADDRNOTAVAIL; 1832 int ret = -EADDRNOTAVAIL;
1835 1833
1836 if (sock_net(sk) != &init_net) 1834 if (!net_eq(sock_net(sk), &init_net))
1837 return -EPROTONOSUPPORT; 1835 return -EPROTONOSUPPORT;
1838 1836
1839 rtnl_lock(); 1837 rtnl_lock();
@@ -1881,7 +1879,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1881 if (!ipv4_is_multicast(addr)) 1879 if (!ipv4_is_multicast(addr))
1882 return -EINVAL; 1880 return -EINVAL;
1883 1881
1884 if (sock_net(sk) != &init_net) 1882 if (!net_eq(sock_net(sk), &init_net))
1885 return -EPROTONOSUPPORT; 1883 return -EPROTONOSUPPORT;
1886 1884
1887 rtnl_lock(); 1885 rtnl_lock();
@@ -2017,7 +2015,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
2017 msf->imsf_fmode != MCAST_EXCLUDE) 2015 msf->imsf_fmode != MCAST_EXCLUDE)
2018 return -EINVAL; 2016 return -EINVAL;
2019 2017
2020 if (sock_net(sk) != &init_net) 2018 if (!net_eq(sock_net(sk), &init_net))
2021 return -EPROTONOSUPPORT; 2019 return -EPROTONOSUPPORT;
2022 2020
2023 rtnl_lock(); 2021 rtnl_lock();
@@ -2100,7 +2098,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2100 if (!ipv4_is_multicast(addr)) 2098 if (!ipv4_is_multicast(addr))
2101 return -EINVAL; 2099 return -EINVAL;
2102 2100
2103 if (sock_net(sk) != &init_net) 2101 if (!net_eq(sock_net(sk), &init_net))
2104 return -EPROTONOSUPPORT; 2102 return -EPROTONOSUPPORT;
2105 2103
2106 rtnl_lock(); 2104 rtnl_lock();
@@ -2165,7 +2163,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2165 if (!ipv4_is_multicast(addr)) 2163 if (!ipv4_is_multicast(addr))
2166 return -EINVAL; 2164 return -EINVAL;
2167 2165
2168 if (sock_net(sk) != &init_net) 2166 if (!net_eq(sock_net(sk), &init_net))
2169 return -EPROTONOSUPPORT; 2167 return -EPROTONOSUPPORT;
2170 2168
2171 rtnl_lock(); 2169 rtnl_lock();
@@ -2252,7 +2250,7 @@ void ip_mc_drop_socket(struct sock *sk)
2252 if (inet->mc_list == NULL) 2250 if (inet->mc_list == NULL)
2253 return; 2251 return;
2254 2252
2255 if (sock_net(sk) != &init_net) 2253 if (!net_eq(sock_net(sk), &init_net))
2256 return; 2254 return;
2257 2255
2258 rtnl_lock(); 2256 rtnl_lock();
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ec834480abe7..bb81c958b744 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -103,7 +103,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
103 rover = net_random() % remaining + low; 103 rover = net_random() % remaining + low;
104 104
105 do { 105 do {
106 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; 106 head = &hashinfo->bhash[inet_bhashfn(net, rover,
107 hashinfo->bhash_size)];
107 spin_lock(&head->lock); 108 spin_lock(&head->lock);
108 inet_bind_bucket_for_each(tb, node, &head->chain) 109 inet_bind_bucket_for_each(tb, node, &head->chain)
109 if (tb->ib_net == net && tb->port == rover) 110 if (tb->ib_net == net && tb->port == rover)
@@ -130,7 +131,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
130 */ 131 */
131 snum = rover; 132 snum = rover;
132 } else { 133 } else {
133 head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; 134 head = &hashinfo->bhash[inet_bhashfn(net, snum,
135 hashinfo->bhash_size)];
134 spin_lock(&head->lock); 136 spin_lock(&head->lock);
135 inet_bind_bucket_for_each(tb, node, &head->chain) 137 inet_bind_bucket_for_each(tb, node, &head->chain)
136 if (tb->ib_net == net && tb->port == snum) 138 if (tb->ib_net == net && tb->port == snum)
@@ -336,15 +338,16 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
336 .uli_u = { .ports = 338 .uli_u = { .ports =
337 { .sport = inet_sk(sk)->sport, 339 { .sport = inet_sk(sk)->sport,
338 .dport = ireq->rmt_port } } }; 340 .dport = ireq->rmt_port } } };
341 struct net *net = sock_net(sk);
339 342
340 security_req_classify_flow(req, &fl); 343 security_req_classify_flow(req, &fl);
341 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) { 344 if (ip_route_output_flow(net, &rt, &fl, sk, 0)) {
342 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 345 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
343 return NULL; 346 return NULL;
344 } 347 }
345 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { 348 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
346 ip_rt_put(rt); 349 ip_rt_put(rt);
347 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 350 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
348 return NULL; 351 return NULL;
349 } 352 }
350 return &rt->u.dst; 353 return &rt->u.dst;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index da97695e7096..c10036e7a463 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * inet_diag.c Module for monitoring INET transport protocols sockets. 2 * inet_diag.c Module for monitoring INET transport protocols sockets.
3 * 3 *
4 * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
5 *
6 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 4 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2023d37b2708..115f53722d20 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -70,7 +70,8 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
70static void __inet_put_port(struct sock *sk) 70static void __inet_put_port(struct sock *sk)
71{ 71{
72 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 72 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
73 const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); 73 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num,
74 hashinfo->bhash_size);
74 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 75 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
75 struct inet_bind_bucket *tb; 76 struct inet_bind_bucket *tb;
76 77
@@ -95,7 +96,8 @@ EXPORT_SYMBOL(inet_put_port);
95void __inet_inherit_port(struct sock *sk, struct sock *child) 96void __inet_inherit_port(struct sock *sk, struct sock *child)
96{ 97{
97 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 98 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
98 const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); 99 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num,
100 table->bhash_size);
99 struct inet_bind_hashbucket *head = &table->bhash[bhash]; 101 struct inet_bind_hashbucket *head = &table->bhash[bhash];
100 struct inet_bind_bucket *tb; 102 struct inet_bind_bucket *tb;
101 103
@@ -192,7 +194,7 @@ struct sock *__inet_lookup_listener(struct net *net,
192 const struct hlist_head *head; 194 const struct hlist_head *head;
193 195
194 read_lock(&hashinfo->lhash_lock); 196 read_lock(&hashinfo->lhash_lock);
195 head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; 197 head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)];
196 if (!hlist_empty(head)) { 198 if (!hlist_empty(head)) {
197 const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); 199 const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
198 200
@@ -225,7 +227,7 @@ struct sock * __inet_lookup_established(struct net *net,
225 /* Optimize here for direct hit, only listening connections can 227 /* Optimize here for direct hit, only listening connections can
226 * have wildcards anyways. 228 * have wildcards anyways.
227 */ 229 */
228 unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); 230 unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
229 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); 231 struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
230 rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); 232 rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
231 233
@@ -265,13 +267,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
265 int dif = sk->sk_bound_dev_if; 267 int dif = sk->sk_bound_dev_if;
266 INET_ADDR_COOKIE(acookie, saddr, daddr) 268 INET_ADDR_COOKIE(acookie, saddr, daddr)
267 const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); 269 const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
268 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); 270 struct net *net = sock_net(sk);
271 unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
269 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 272 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
270 rwlock_t *lock = inet_ehash_lockp(hinfo, hash); 273 rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
271 struct sock *sk2; 274 struct sock *sk2;
272 const struct hlist_node *node; 275 const struct hlist_node *node;
273 struct inet_timewait_sock *tw; 276 struct inet_timewait_sock *tw;
274 struct net *net = sock_net(sk);
275 277
276 prefetch(head->chain.first); 278 prefetch(head->chain.first);
277 write_lock(lock); 279 write_lock(lock);
@@ -310,11 +312,11 @@ unique:
310 312
311 if (twp) { 313 if (twp) {
312 *twp = tw; 314 *twp = tw;
313 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); 315 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
314 } else if (tw) { 316 } else if (tw) {
315 /* Silly. Should hash-dance instead... */ 317 /* Silly. Should hash-dance instead... */
316 inet_twsk_deschedule(tw, death_row); 318 inet_twsk_deschedule(tw, death_row);
317 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); 319 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
318 320
319 inet_twsk_put(tw); 321 inet_twsk_put(tw);
320 } 322 }
@@ -438,7 +440,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
438 local_bh_disable(); 440 local_bh_disable();
439 for (i = 1; i <= remaining; i++) { 441 for (i = 1; i <= remaining; i++) {
440 port = low + (i + offset) % remaining; 442 port = low + (i + offset) % remaining;
441 head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; 443 head = &hinfo->bhash[inet_bhashfn(net, port,
444 hinfo->bhash_size)];
442 spin_lock(&head->lock); 445 spin_lock(&head->lock);
443 446
444 /* Does not bother with rcv_saddr checks, 447 /* Does not bother with rcv_saddr checks,
@@ -493,7 +496,7 @@ ok:
493 goto out; 496 goto out;
494 } 497 }
495 498
496 head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; 499 head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
497 tb = inet_csk(sk)->icsk_bind_hash; 500 tb = inet_csk(sk)->icsk_bind_hash;
498 spin_lock_bh(&head->lock); 501 spin_lock_bh(&head->lock);
499 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 502 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index ce16e9ac24c1..75c2def8f9a0 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -32,7 +32,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
32 write_unlock(lock); 32 write_unlock(lock);
33 33
34 /* Disassociate with bind bucket. */ 34 /* Disassociate with bind bucket. */
35 bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; 35 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
36 hashinfo->bhash_size)];
36 spin_lock(&bhead->lock); 37 spin_lock(&bhead->lock);
37 tb = tw->tw_tb; 38 tb = tw->tw_tb;
38 __hlist_del(&tw->tw_bind_node); 39 __hlist_del(&tw->tw_bind_node);
@@ -81,7 +82,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
81 Note, that any socket with inet->num != 0 MUST be bound in 82 Note, that any socket with inet->num != 0 MUST be bound in
82 binding cache, even if it is closed. 83 binding cache, even if it is closed.
83 */ 84 */
84 bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; 85 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num,
86 hashinfo->bhash_size)];
85 spin_lock(&bhead->lock); 87 spin_lock(&bhead->lock);
86 tw->tw_tb = icsk->icsk_bind_hash; 88 tw->tw_tb = icsk->icsk_bind_hash;
87 BUG_TRAP(icsk->icsk_bind_hash); 89 BUG_TRAP(icsk->icsk_bind_hash);
@@ -158,6 +160,9 @@ rescan:
158 __inet_twsk_del_dead_node(tw); 160 __inet_twsk_del_dead_node(tw);
159 spin_unlock(&twdr->death_lock); 161 spin_unlock(&twdr->death_lock);
160 __inet_twsk_kill(tw, twdr->hashinfo); 162 __inet_twsk_kill(tw, twdr->hashinfo);
163#ifdef CONFIG_NET_NS
164 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
165#endif
161 inet_twsk_put(tw); 166 inet_twsk_put(tw);
162 killed++; 167 killed++;
163 spin_lock(&twdr->death_lock); 168 spin_lock(&twdr->death_lock);
@@ -176,8 +181,9 @@ rescan:
176 } 181 }
177 182
178 twdr->tw_count -= killed; 183 twdr->tw_count -= killed;
179 NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); 184#ifndef CONFIG_NET_NS
180 185 NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed);
186#endif
181 return ret; 187 return ret;
182} 188}
183 189
@@ -370,6 +376,9 @@ void inet_twdr_twcal_tick(unsigned long data)
370 &twdr->twcal_row[slot]) { 376 &twdr->twcal_row[slot]) {
371 __inet_twsk_del_dead_node(tw); 377 __inet_twsk_del_dead_node(tw);
372 __inet_twsk_kill(tw, twdr->hashinfo); 378 __inet_twsk_kill(tw, twdr->hashinfo);
379#ifdef CONFIG_NET_NS
380 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
381#endif
373 inet_twsk_put(tw); 382 inet_twsk_put(tw);
374 killed++; 383 killed++;
375 } 384 }
@@ -393,7 +402,9 @@ void inet_twdr_twcal_tick(unsigned long data)
393out: 402out:
394 if ((twdr->tw_count -= killed) == 0) 403 if ((twdr->tw_count -= killed) == 0)
395 del_timer(&twdr->tw_timer); 404 del_timer(&twdr->tw_timer);
396 NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); 405#ifndef CONFIG_NET_NS
406 NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed);
407#endif
397 spin_unlock(&twdr->death_lock); 408 spin_unlock(&twdr->death_lock);
398} 409}
399 410
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index af995198f643..a456ceeac3f2 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -3,8 +3,6 @@
3 * 3 *
4 * This source is covered by the GNU GPL, the same as all kernel sources. 4 * This source is covered by the GNU GPL, the same as all kernel sources.
5 * 5 *
6 * Version: $Id: inetpeer.c,v 1.7 2001/09/20 21:22:50 davem Exp $
7 *
8 * Authors: Andrey V. Savochkin <saw@msu.ru> 6 * Authors: Andrey V. Savochkin <saw@msu.ru>
9 */ 7 */
10 8
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 4813c39b438b..450016b89a18 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * The IP forwarding functionality. 6 * The IP forwarding functionality.
7 * 7 *
8 * Version: $Id: ip_forward.c,v 1.48 2000/12/13 18:31:48 davem Exp $
9 *
10 * Authors: see ip.c 8 * Authors: see ip.c
11 * 9 *
12 * Fixes: 10 * Fixes:
@@ -44,7 +42,7 @@ static int ip_forward_finish(struct sk_buff *skb)
44{ 42{
45 struct ip_options * opt = &(IPCB(skb)->opt); 43 struct ip_options * opt = &(IPCB(skb)->opt);
46 44
47 IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); 45 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
48 46
49 if (unlikely(opt->optlen)) 47 if (unlikely(opt->optlen))
50 ip_forward_options(skb); 48 ip_forward_options(skb);
@@ -58,6 +56,9 @@ int ip_forward(struct sk_buff *skb)
58 struct rtable *rt; /* Route we use */ 56 struct rtable *rt; /* Route we use */
59 struct ip_options * opt = &(IPCB(skb)->opt); 57 struct ip_options * opt = &(IPCB(skb)->opt);
60 58
59 if (skb_warn_if_lro(skb))
60 goto drop;
61
61 if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) 62 if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
62 goto drop; 63 goto drop;
63 64
@@ -87,7 +88,7 @@ int ip_forward(struct sk_buff *skb)
87 88
88 if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && 89 if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) &&
89 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { 90 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
90 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); 91 IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS);
91 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 92 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
92 htonl(dst_mtu(&rt->u.dst))); 93 htonl(dst_mtu(&rt->u.dst)));
93 goto drop; 94 goto drop;
@@ -122,7 +123,7 @@ sr_failed:
122 123
123too_many_hops: 124too_many_hops:
124 /* Tell the sender its packet died... */ 125 /* Tell the sender its packet died... */
125 IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); 126 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_INHDRERRORS);
126 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); 127 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
127drop: 128drop:
128 kfree_skb(skb); 129 kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 37221f659159..38d38f058018 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * The IP fragmentation functionality. 6 * The IP fragmentation functionality.
7 * 7 *
8 * Version: $Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> 8 * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
11 * Alan Cox <Alan.Cox@linux.org> 9 * Alan Cox <Alan.Cox@linux.org>
12 * 10 *
@@ -180,7 +178,7 @@ static void ip_evictor(struct net *net)
180 178
181 evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags); 179 evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags);
182 if (evicted) 180 if (evicted)
183 IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); 181 IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
184} 182}
185 183
186/* 184/*
@@ -189,8 +187,10 @@ static void ip_evictor(struct net *net)
189static void ip_expire(unsigned long arg) 187static void ip_expire(unsigned long arg)
190{ 188{
191 struct ipq *qp; 189 struct ipq *qp;
190 struct net *net;
192 191
193 qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); 192 qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
193 net = container_of(qp->q.net, struct net, ipv4.frags);
194 194
195 spin_lock(&qp->q.lock); 195 spin_lock(&qp->q.lock);
196 196
@@ -199,14 +199,12 @@ static void ip_expire(unsigned long arg)
199 199
200 ipq_kill(qp); 200 ipq_kill(qp);
201 201
202 IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); 202 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
203 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); 203 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
204 204
205 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { 205 if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
206 struct sk_buff *head = qp->q.fragments; 206 struct sk_buff *head = qp->q.fragments;
207 struct net *net;
208 207
209 net = container_of(qp->q.net, struct net, ipv4.frags);
210 /* Send an ICMP "Fragment Reassembly Timeout" message. */ 208 /* Send an ICMP "Fragment Reassembly Timeout" message. */
211 if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) { 209 if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) {
212 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); 210 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -263,7 +261,10 @@ static inline int ip_frag_too_far(struct ipq *qp)
263 rc = qp->q.fragments && (end - start) > max; 261 rc = qp->q.fragments && (end - start) > max;
264 262
265 if (rc) { 263 if (rc) {
266 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); 264 struct net *net;
265
266 net = container_of(qp->q.net, struct net, ipv4.frags);
267 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
267 } 268 }
268 269
269 return rc; 270 return rc;
@@ -547,7 +548,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
547 iph = ip_hdr(head); 548 iph = ip_hdr(head);
548 iph->frag_off = 0; 549 iph->frag_off = 0;
549 iph->tot_len = htons(len); 550 iph->tot_len = htons(len);
550 IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); 551 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMOKS);
551 qp->q.fragments = NULL; 552 qp->q.fragments = NULL;
552 return 0; 553 return 0;
553 554
@@ -562,7 +563,7 @@ out_oversize:
562 "Oversized IP packet from " NIPQUAD_FMT ".\n", 563 "Oversized IP packet from " NIPQUAD_FMT ".\n",
563 NIPQUAD(qp->saddr)); 564 NIPQUAD(qp->saddr));
564out_fail: 565out_fail:
565 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); 566 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS);
566 return err; 567 return err;
567} 568}
568 569
@@ -572,9 +573,9 @@ int ip_defrag(struct sk_buff *skb, u32 user)
572 struct ipq *qp; 573 struct ipq *qp;
573 struct net *net; 574 struct net *net;
574 575
575 IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
576
577 net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev); 576 net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
577 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
578
578 /* Start by cleaning up the memory. */ 579 /* Start by cleaning up the memory. */
579 if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) 580 if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
580 ip_evictor(net); 581 ip_evictor(net);
@@ -592,7 +593,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
592 return ret; 593 return ret;
593 } 594 }
594 595
595 IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); 596 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
596 kfree_skb(skb); 597 kfree_skb(skb);
597 return -ENOMEM; 598 return -ENOMEM;
598} 599}
@@ -600,7 +601,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
600#ifdef CONFIG_SYSCTL 601#ifdef CONFIG_SYSCTL
601static int zero; 602static int zero;
602 603
603static struct ctl_table ip4_frags_ctl_table[] = { 604static struct ctl_table ip4_frags_ns_ctl_table[] = {
604 { 605 {
605 .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, 606 .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
606 .procname = "ipfrag_high_thresh", 607 .procname = "ipfrag_high_thresh",
@@ -626,6 +627,10 @@ static struct ctl_table ip4_frags_ctl_table[] = {
626 .proc_handler = &proc_dointvec_jiffies, 627 .proc_handler = &proc_dointvec_jiffies,
627 .strategy = &sysctl_jiffies 628 .strategy = &sysctl_jiffies
628 }, 629 },
630 { }
631};
632
633static struct ctl_table ip4_frags_ctl_table[] = {
629 { 634 {
630 .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, 635 .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
631 .procname = "ipfrag_secret_interval", 636 .procname = "ipfrag_secret_interval",
@@ -646,22 +651,20 @@ static struct ctl_table ip4_frags_ctl_table[] = {
646 { } 651 { }
647}; 652};
648 653
649static int ip4_frags_ctl_register(struct net *net) 654static int ip4_frags_ns_ctl_register(struct net *net)
650{ 655{
651 struct ctl_table *table; 656 struct ctl_table *table;
652 struct ctl_table_header *hdr; 657 struct ctl_table_header *hdr;
653 658
654 table = ip4_frags_ctl_table; 659 table = ip4_frags_ns_ctl_table;
655 if (net != &init_net) { 660 if (net != &init_net) {
656 table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL); 661 table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL);
657 if (table == NULL) 662 if (table == NULL)
658 goto err_alloc; 663 goto err_alloc;
659 664
660 table[0].data = &net->ipv4.frags.high_thresh; 665 table[0].data = &net->ipv4.frags.high_thresh;
661 table[1].data = &net->ipv4.frags.low_thresh; 666 table[1].data = &net->ipv4.frags.low_thresh;
662 table[2].data = &net->ipv4.frags.timeout; 667 table[2].data = &net->ipv4.frags.timeout;
663 table[3].mode &= ~0222;
664 table[4].mode &= ~0222;
665 } 668 }
666 669
667 hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); 670 hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
@@ -678,7 +681,7 @@ err_alloc:
678 return -ENOMEM; 681 return -ENOMEM;
679} 682}
680 683
681static void ip4_frags_ctl_unregister(struct net *net) 684static void ip4_frags_ns_ctl_unregister(struct net *net)
682{ 685{
683 struct ctl_table *table; 686 struct ctl_table *table;
684 687
@@ -686,13 +689,22 @@ static void ip4_frags_ctl_unregister(struct net *net)
686 unregister_net_sysctl_table(net->ipv4.frags_hdr); 689 unregister_net_sysctl_table(net->ipv4.frags_hdr);
687 kfree(table); 690 kfree(table);
688} 691}
692
693static void ip4_frags_ctl_register(void)
694{
695 register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table);
696}
689#else 697#else
690static inline int ip4_frags_ctl_register(struct net *net) 698static inline int ip4_frags_ns_ctl_register(struct net *net)
691{ 699{
692 return 0; 700 return 0;
693} 701}
694 702
695static inline void ip4_frags_ctl_unregister(struct net *net) 703static inline void ip4_frags_ns_ctl_unregister(struct net *net)
704{
705}
706
707static inline void ip4_frags_ctl_register(void)
696{ 708{
697} 709}
698#endif 710#endif
@@ -716,12 +728,12 @@ static int ipv4_frags_init_net(struct net *net)
716 728
717 inet_frags_init_net(&net->ipv4.frags); 729 inet_frags_init_net(&net->ipv4.frags);
718 730
719 return ip4_frags_ctl_register(net); 731 return ip4_frags_ns_ctl_register(net);
720} 732}
721 733
722static void ipv4_frags_exit_net(struct net *net) 734static void ipv4_frags_exit_net(struct net *net)
723{ 735{
724 ip4_frags_ctl_unregister(net); 736 ip4_frags_ns_ctl_unregister(net);
725 inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); 737 inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
726} 738}
727 739
@@ -732,6 +744,7 @@ static struct pernet_operations ip4_frags_ops = {
732 744
733void __init ipfrag_init(void) 745void __init ipfrag_init(void)
734{ 746{
747 ip4_frags_ctl_register();
735 register_pernet_subsys(&ip4_frags_ops); 748 register_pernet_subsys(&ip4_frags_ops);
736 ip4_frags.hashfn = ip4_hashfn; 749 ip4_frags.hashfn = ip4_hashfn;
737 ip4_frags.constructor = ip4_frag_init; 750 ip4_frags.constructor = ip4_frag_init;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4342cba4ff82..2a61158ea722 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -473,6 +473,8 @@ static int ipgre_rcv(struct sk_buff *skb)
473 read_lock(&ipgre_lock); 473 read_lock(&ipgre_lock);
474 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), 474 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
475 iph->saddr, iph->daddr, key)) != NULL) { 475 iph->saddr, iph->daddr, key)) != NULL) {
476 struct net_device_stats *stats = &tunnel->dev->stats;
477
476 secpath_reset(skb); 478 secpath_reset(skb);
477 479
478 skb->protocol = *(__be16*)(h + 2); 480 skb->protocol = *(__be16*)(h + 2);
@@ -497,28 +499,28 @@ static int ipgre_rcv(struct sk_buff *skb)
497 /* Looped back packet, drop it! */ 499 /* Looped back packet, drop it! */
498 if (skb->rtable->fl.iif == 0) 500 if (skb->rtable->fl.iif == 0)
499 goto drop; 501 goto drop;
500 tunnel->stat.multicast++; 502 stats->multicast++;
501 skb->pkt_type = PACKET_BROADCAST; 503 skb->pkt_type = PACKET_BROADCAST;
502 } 504 }
503#endif 505#endif
504 506
505 if (((flags&GRE_CSUM) && csum) || 507 if (((flags&GRE_CSUM) && csum) ||
506 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 508 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
507 tunnel->stat.rx_crc_errors++; 509 stats->rx_crc_errors++;
508 tunnel->stat.rx_errors++; 510 stats->rx_errors++;
509 goto drop; 511 goto drop;
510 } 512 }
511 if (tunnel->parms.i_flags&GRE_SEQ) { 513 if (tunnel->parms.i_flags&GRE_SEQ) {
512 if (!(flags&GRE_SEQ) || 514 if (!(flags&GRE_SEQ) ||
513 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 515 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
514 tunnel->stat.rx_fifo_errors++; 516 stats->rx_fifo_errors++;
515 tunnel->stat.rx_errors++; 517 stats->rx_errors++;
516 goto drop; 518 goto drop;
517 } 519 }
518 tunnel->i_seqno = seqno + 1; 520 tunnel->i_seqno = seqno + 1;
519 } 521 }
520 tunnel->stat.rx_packets++; 522 stats->rx_packets++;
521 tunnel->stat.rx_bytes += skb->len; 523 stats->rx_bytes += skb->len;
522 skb->dev = tunnel->dev; 524 skb->dev = tunnel->dev;
523 dst_release(skb->dst); 525 dst_release(skb->dst);
524 skb->dst = NULL; 526 skb->dst = NULL;
@@ -540,7 +542,7 @@ drop_nolock:
540static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 542static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
541{ 543{
542 struct ip_tunnel *tunnel = netdev_priv(dev); 544 struct ip_tunnel *tunnel = netdev_priv(dev);
543 struct net_device_stats *stats = &tunnel->stat; 545 struct net_device_stats *stats = &tunnel->dev->stats;
544 struct iphdr *old_iph = ip_hdr(skb); 546 struct iphdr *old_iph = ip_hdr(skb);
545 struct iphdr *tiph; 547 struct iphdr *tiph;
546 u8 tos; 548 u8 tos;
@@ -554,7 +556,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
554 int mtu; 556 int mtu;
555 557
556 if (tunnel->recursion++) { 558 if (tunnel->recursion++) {
557 tunnel->stat.collisions++; 559 stats->collisions++;
558 goto tx_error; 560 goto tx_error;
559 } 561 }
560 562
@@ -570,7 +572,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
570 /* NBMA tunnel */ 572 /* NBMA tunnel */
571 573
572 if (skb->dst == NULL) { 574 if (skb->dst == NULL) {
573 tunnel->stat.tx_fifo_errors++; 575 stats->tx_fifo_errors++;
574 goto tx_error; 576 goto tx_error;
575 } 577 }
576 578
@@ -621,7 +623,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
621 .tos = RT_TOS(tos) } }, 623 .tos = RT_TOS(tos) } },
622 .proto = IPPROTO_GRE }; 624 .proto = IPPROTO_GRE };
623 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 625 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
624 tunnel->stat.tx_carrier_errors++; 626 stats->tx_carrier_errors++;
625 goto tx_error; 627 goto tx_error;
626 } 628 }
627 } 629 }
@@ -629,7 +631,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
629 631
630 if (tdev == dev) { 632 if (tdev == dev) {
631 ip_rt_put(rt); 633 ip_rt_put(rt);
632 tunnel->stat.collisions++; 634 stats->collisions++;
633 goto tx_error; 635 goto tx_error;
634 } 636 }
635 637
@@ -954,11 +956,6 @@ done:
954 return err; 956 return err;
955} 957}
956 958
957static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
958{
959 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
960}
961
962static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 959static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
963{ 960{
964 struct ip_tunnel *tunnel = netdev_priv(dev); 961 struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -1084,7 +1081,6 @@ static void ipgre_tunnel_setup(struct net_device *dev)
1084 dev->uninit = ipgre_tunnel_uninit; 1081 dev->uninit = ipgre_tunnel_uninit;
1085 dev->destructor = free_netdev; 1082 dev->destructor = free_netdev;
1086 dev->hard_start_xmit = ipgre_tunnel_xmit; 1083 dev->hard_start_xmit = ipgre_tunnel_xmit;
1087 dev->get_stats = ipgre_tunnel_get_stats;
1088 dev->do_ioctl = ipgre_tunnel_ioctl; 1084 dev->do_ioctl = ipgre_tunnel_ioctl;
1089 dev->change_mtu = ipgre_tunnel_change_mtu; 1085 dev->change_mtu = ipgre_tunnel_change_mtu;
1090 1086
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ff77a4a7f9ec..e0bed56c51f1 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * The Internet Protocol (IP) module. 6 * The Internet Protocol (IP) module.
7 * 7 *
8 * Version: $Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org> 10 * Donald Becker, <becker@super.org>
@@ -147,12 +145,6 @@
147#include <linux/netlink.h> 145#include <linux/netlink.h>
148 146
149/* 147/*
150 * SNMP management statistics
151 */
152
153DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
154
155/*
156 * Process Router Attention IP option 148 * Process Router Attention IP option
157 */ 149 */
158int ip_call_ra_chain(struct sk_buff *skb) 150int ip_call_ra_chain(struct sk_buff *skb)
@@ -232,16 +224,16 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
232 protocol = -ret; 224 protocol = -ret;
233 goto resubmit; 225 goto resubmit;
234 } 226 }
235 IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); 227 IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
236 } else { 228 } else {
237 if (!raw) { 229 if (!raw) {
238 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 230 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
239 IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); 231 IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS);
240 icmp_send(skb, ICMP_DEST_UNREACH, 232 icmp_send(skb, ICMP_DEST_UNREACH,
241 ICMP_PROT_UNREACH, 0); 233 ICMP_PROT_UNREACH, 0);
242 } 234 }
243 } else 235 } else
244 IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); 236 IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
245 kfree_skb(skb); 237 kfree_skb(skb);
246 } 238 }
247 } 239 }
@@ -283,7 +275,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
283 --ANK (980813) 275 --ANK (980813)
284 */ 276 */
285 if (skb_cow(skb, skb_headroom(skb))) { 277 if (skb_cow(skb, skb_headroom(skb))) {
286 IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); 278 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
287 goto drop; 279 goto drop;
288 } 280 }
289 281
@@ -292,7 +284,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
292 opt->optlen = iph->ihl*4 - sizeof(struct iphdr); 284 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
293 285
294 if (ip_options_compile(dev_net(dev), opt, skb)) { 286 if (ip_options_compile(dev_net(dev), opt, skb)) {
295 IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); 287 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
296 goto drop; 288 goto drop;
297 } 289 }
298 290
@@ -336,9 +328,11 @@ static int ip_rcv_finish(struct sk_buff *skb)
336 skb->dev); 328 skb->dev);
337 if (unlikely(err)) { 329 if (unlikely(err)) {
338 if (err == -EHOSTUNREACH) 330 if (err == -EHOSTUNREACH)
339 IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); 331 IP_INC_STATS_BH(dev_net(skb->dev),
332 IPSTATS_MIB_INADDRERRORS);
340 else if (err == -ENETUNREACH) 333 else if (err == -ENETUNREACH)
341 IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); 334 IP_INC_STATS_BH(dev_net(skb->dev),
335 IPSTATS_MIB_INNOROUTES);
342 goto drop; 336 goto drop;
343 } 337 }
344 } 338 }
@@ -359,9 +353,9 @@ static int ip_rcv_finish(struct sk_buff *skb)
359 353
360 rt = skb->rtable; 354 rt = skb->rtable;
361 if (rt->rt_type == RTN_MULTICAST) 355 if (rt->rt_type == RTN_MULTICAST)
362 IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); 356 IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCASTPKTS);
363 else if (rt->rt_type == RTN_BROADCAST) 357 else if (rt->rt_type == RTN_BROADCAST)
364 IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS); 358 IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCASTPKTS);
365 359
366 return dst_input(skb); 360 return dst_input(skb);
367 361
@@ -384,10 +378,10 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
384 if (skb->pkt_type == PACKET_OTHERHOST) 378 if (skb->pkt_type == PACKET_OTHERHOST)
385 goto drop; 379 goto drop;
386 380
387 IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES); 381 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES);
388 382
389 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { 383 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
390 IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); 384 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
391 goto out; 385 goto out;
392 } 386 }
393 387
@@ -420,7 +414,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
420 414
421 len = ntohs(iph->tot_len); 415 len = ntohs(iph->tot_len);
422 if (skb->len < len) { 416 if (skb->len < len) {
423 IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); 417 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
424 goto drop; 418 goto drop;
425 } else if (len < (iph->ihl*4)) 419 } else if (len < (iph->ihl*4))
426 goto inhdr_error; 420 goto inhdr_error;
@@ -430,7 +424,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
430 * Note this now means skb->len holds ntohs(iph->tot_len). 424 * Note this now means skb->len holds ntohs(iph->tot_len).
431 */ 425 */
432 if (pskb_trim_rcsum(skb, len)) { 426 if (pskb_trim_rcsum(skb, len)) {
433 IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); 427 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
434 goto drop; 428 goto drop;
435 } 429 }
436 430
@@ -441,11 +435,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
441 ip_rcv_finish); 435 ip_rcv_finish);
442 436
443inhdr_error: 437inhdr_error:
444 IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); 438 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
445drop: 439drop:
446 kfree_skb(skb); 440 kfree_skb(skb);
447out: 441out:
448 return NET_RX_DROP; 442 return NET_RX_DROP;
449} 443}
450
451EXPORT_SYMBOL(ip_statistics);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 33126ad2cfdc..be3f18a7a40e 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * The options processing module for ip.c 6 * The options processing module for ip.c
7 * 7 *
8 * Version: $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $
9 *
10 * Authors: A.N.Kuznetsov 8 * Authors: A.N.Kuznetsov
11 * 9 *
12 */ 10 */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e527628f56cf..465544f6281a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * The Internet Protocol (IP) output module. 6 * The Internet Protocol (IP) output module.
7 * 7 *
8 * Version: $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org> 10 * Donald Becker, <becker@super.org>
@@ -184,9 +182,9 @@ static inline int ip_finish_output2(struct sk_buff *skb)
184 unsigned int hh_len = LL_RESERVED_SPACE(dev); 182 unsigned int hh_len = LL_RESERVED_SPACE(dev);
185 183
186 if (rt->rt_type == RTN_MULTICAST) 184 if (rt->rt_type == RTN_MULTICAST)
187 IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); 185 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS);
188 else if (rt->rt_type == RTN_BROADCAST) 186 else if (rt->rt_type == RTN_BROADCAST)
189 IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS); 187 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS);
190 188
191 /* Be paranoid, rather than too clever. */ 189 /* Be paranoid, rather than too clever. */
192 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { 190 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
@@ -246,7 +244,7 @@ int ip_mc_output(struct sk_buff *skb)
246 /* 244 /*
247 * If the indicated interface is up and running, send the packet. 245 * If the indicated interface is up and running, send the packet.
248 */ 246 */
249 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 247 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
250 248
251 skb->dev = dev; 249 skb->dev = dev;
252 skb->protocol = htons(ETH_P_IP); 250 skb->protocol = htons(ETH_P_IP);
@@ -300,7 +298,7 @@ int ip_output(struct sk_buff *skb)
300{ 298{
301 struct net_device *dev = skb->dst->dev; 299 struct net_device *dev = skb->dst->dev;
302 300
303 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 301 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
304 302
305 skb->dev = dev; 303 skb->dev = dev;
306 skb->protocol = htons(ETH_P_IP); 304 skb->protocol = htons(ETH_P_IP);
@@ -391,7 +389,7 @@ packet_routed:
391 return ip_local_out(skb); 389 return ip_local_out(skb);
392 390
393no_route: 391no_route:
394 IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 392 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
395 kfree_skb(skb); 393 kfree_skb(skb);
396 return -EHOSTUNREACH; 394 return -EHOSTUNREACH;
397} 395}
@@ -453,7 +451,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
453 iph = ip_hdr(skb); 451 iph = ip_hdr(skb);
454 452
455 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) { 453 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
456 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); 454 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
457 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 455 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
458 htonl(ip_skb_dst_mtu(skb))); 456 htonl(ip_skb_dst_mtu(skb)));
459 kfree_skb(skb); 457 kfree_skb(skb);
@@ -544,7 +542,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
544 err = output(skb); 542 err = output(skb);
545 543
546 if (!err) 544 if (!err)
547 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); 545 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
548 if (err || !frag) 546 if (err || !frag)
549 break; 547 break;
550 548
@@ -554,7 +552,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
554 } 552 }
555 553
556 if (err == 0) { 554 if (err == 0) {
557 IP_INC_STATS(IPSTATS_MIB_FRAGOKS); 555 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
558 return 0; 556 return 0;
559 } 557 }
560 558
@@ -563,7 +561,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
563 kfree_skb(frag); 561 kfree_skb(frag);
564 frag = skb; 562 frag = skb;
565 } 563 }
566 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); 564 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
567 return err; 565 return err;
568 } 566 }
569 567
@@ -675,15 +673,15 @@ slow_path:
675 if (err) 673 if (err)
676 goto fail; 674 goto fail;
677 675
678 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); 676 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
679 } 677 }
680 kfree_skb(skb); 678 kfree_skb(skb);
681 IP_INC_STATS(IPSTATS_MIB_FRAGOKS); 679 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
682 return err; 680 return err;
683 681
684fail: 682fail:
685 kfree_skb(skb); 683 kfree_skb(skb);
686 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS); 684 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
687 return err; 685 return err;
688} 686}
689 687
@@ -1049,7 +1047,7 @@ alloc_new_skb:
1049 1047
1050error: 1048error:
1051 inet->cork.length -= length; 1049 inet->cork.length -= length;
1052 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 1050 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1053 return err; 1051 return err;
1054} 1052}
1055 1053
@@ -1191,7 +1189,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1191 1189
1192error: 1190error:
1193 inet->cork.length -= size; 1191 inet->cork.length -= size;
1194 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 1192 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1195 return err; 1193 return err;
1196} 1194}
1197 1195
@@ -1213,6 +1211,7 @@ int ip_push_pending_frames(struct sock *sk)
1213 struct sk_buff *skb, *tmp_skb; 1211 struct sk_buff *skb, *tmp_skb;
1214 struct sk_buff **tail_skb; 1212 struct sk_buff **tail_skb;
1215 struct inet_sock *inet = inet_sk(sk); 1213 struct inet_sock *inet = inet_sk(sk);
1214 struct net *net = sock_net(sk);
1216 struct ip_options *opt = NULL; 1215 struct ip_options *opt = NULL;
1217 struct rtable *rt = (struct rtable *)inet->cork.dst; 1216 struct rtable *rt = (struct rtable *)inet->cork.dst;
1218 struct iphdr *iph; 1217 struct iphdr *iph;
@@ -1282,7 +1281,7 @@ int ip_push_pending_frames(struct sock *sk)
1282 skb->dst = dst_clone(&rt->u.dst); 1281 skb->dst = dst_clone(&rt->u.dst);
1283 1282
1284 if (iph->protocol == IPPROTO_ICMP) 1283 if (iph->protocol == IPPROTO_ICMP)
1285 icmp_out_count(((struct icmphdr *) 1284 icmp_out_count(net, ((struct icmphdr *)
1286 skb_transport_header(skb))->type); 1285 skb_transport_header(skb))->type);
1287 1286
1288 /* Netfilter gets whole the not fragmented skb. */ 1287 /* Netfilter gets whole the not fragmented skb. */
@@ -1299,7 +1298,7 @@ out:
1299 return err; 1298 return err;
1300 1299
1301error: 1300error:
1302 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 1301 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
1303 goto out; 1302 goto out;
1304} 1303}
1305 1304
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index e0514e82308e..105d92a039b9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * The IP to API glue. 6 * The IP to API glue.
7 * 7 *
8 * Version: $Id: ip_sockglue.c,v 1.62 2002/02/01 22:01:04 davem Exp $
9 *
10 * Authors: see ip.c 8 * Authors: see ip.c
11 * 9 *
12 * Fixes: 10 * Fixes:
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index ed45037ce9be..42065fff46c4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * $Id: ipconfig.c,v 1.46 2002/02/01 22:01:04 davem Exp $
3 *
4 * Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or 2 * Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or
5 * user-supplied information to configure own IP address and routes. 3 * user-supplied information to configure own IP address and routes.
6 * 4 *
@@ -434,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
434 unsigned char *sha, *tha; /* s for "source", t for "target" */ 432 unsigned char *sha, *tha; /* s for "source", t for "target" */
435 struct ic_device *d; 433 struct ic_device *d;
436 434
437 if (dev_net(dev) != &init_net) 435 if (!net_eq(dev_net(dev), &init_net))
438 goto drop; 436 goto drop;
439 437
440 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 438 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
@@ -854,7 +852,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
854 struct ic_device *d; 852 struct ic_device *d;
855 int len, ext_len; 853 int len, ext_len;
856 854
857 if (dev_net(dev) != &init_net) 855 if (!net_eq(dev_net(dev), &init_net))
858 goto drop; 856 goto drop;
859 857
860 /* Perform verifications before taking the lock. */ 858 /* Perform verifications before taking the lock. */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index af5cb53da5cc..4c6d2caf9203 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Linux NET3: IP/IP protocol decoder. 2 * Linux NET3: IP/IP protocol decoder.
3 * 3 *
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5 *
6 * Authors: 4 * Authors:
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95 5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 * 6 *
@@ -368,8 +366,8 @@ static int ipip_rcv(struct sk_buff *skb)
368 skb->protocol = htons(ETH_P_IP); 366 skb->protocol = htons(ETH_P_IP);
369 skb->pkt_type = PACKET_HOST; 367 skb->pkt_type = PACKET_HOST;
370 368
371 tunnel->stat.rx_packets++; 369 tunnel->dev->stats.rx_packets++;
372 tunnel->stat.rx_bytes += skb->len; 370 tunnel->dev->stats.rx_bytes += skb->len;
373 skb->dev = tunnel->dev; 371 skb->dev = tunnel->dev;
374 dst_release(skb->dst); 372 dst_release(skb->dst);
375 skb->dst = NULL; 373 skb->dst = NULL;
@@ -392,7 +390,7 @@ static int ipip_rcv(struct sk_buff *skb)
392static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 390static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
393{ 391{
394 struct ip_tunnel *tunnel = netdev_priv(dev); 392 struct ip_tunnel *tunnel = netdev_priv(dev);
395 struct net_device_stats *stats = &tunnel->stat; 393 struct net_device_stats *stats = &tunnel->dev->stats;
396 struct iphdr *tiph = &tunnel->parms.iph; 394 struct iphdr *tiph = &tunnel->parms.iph;
397 u8 tos = tunnel->parms.iph.tos; 395 u8 tos = tunnel->parms.iph.tos;
398 __be16 df = tiph->frag_off; 396 __be16 df = tiph->frag_off;
@@ -405,7 +403,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
405 int mtu; 403 int mtu;
406 404
407 if (tunnel->recursion++) { 405 if (tunnel->recursion++) {
408 tunnel->stat.collisions++; 406 stats->collisions++;
409 goto tx_error; 407 goto tx_error;
410 } 408 }
411 409
@@ -418,7 +416,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
418 if (!dst) { 416 if (!dst) {
419 /* NBMA tunnel */ 417 /* NBMA tunnel */
420 if ((rt = skb->rtable) == NULL) { 418 if ((rt = skb->rtable) == NULL) {
421 tunnel->stat.tx_fifo_errors++; 419 stats->tx_fifo_errors++;
422 goto tx_error; 420 goto tx_error;
423 } 421 }
424 if ((dst = rt->rt_gateway) == 0) 422 if ((dst = rt->rt_gateway) == 0)
@@ -433,7 +431,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
433 .tos = RT_TOS(tos) } }, 431 .tos = RT_TOS(tos) } },
434 .proto = IPPROTO_IPIP }; 432 .proto = IPPROTO_IPIP };
435 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 433 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
436 tunnel->stat.tx_carrier_errors++; 434 stats->tx_carrier_errors++;
437 goto tx_error_icmp; 435 goto tx_error_icmp;
438 } 436 }
439 } 437 }
@@ -441,7 +439,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
441 439
442 if (tdev == dev) { 440 if (tdev == dev) {
443 ip_rt_put(rt); 441 ip_rt_put(rt);
444 tunnel->stat.collisions++; 442 stats->collisions++;
445 goto tx_error; 443 goto tx_error;
446 } 444 }
447 445
@@ -451,7 +449,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
451 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; 449 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
452 450
453 if (mtu < 68) { 451 if (mtu < 68) {
454 tunnel->stat.collisions++; 452 stats->collisions++;
455 ip_rt_put(rt); 453 ip_rt_put(rt);
456 goto tx_error; 454 goto tx_error;
457 } 455 }
@@ -685,11 +683,6 @@ done:
685 return err; 683 return err;
686} 684}
687 685
688static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
689{
690 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
691}
692
693static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 686static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
694{ 687{
695 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) 688 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
@@ -702,7 +695,6 @@ static void ipip_tunnel_setup(struct net_device *dev)
702{ 695{
703 dev->uninit = ipip_tunnel_uninit; 696 dev->uninit = ipip_tunnel_uninit;
704 dev->hard_start_xmit = ipip_tunnel_xmit; 697 dev->hard_start_xmit = ipip_tunnel_xmit;
705 dev->get_stats = ipip_tunnel_get_stats;
706 dev->do_ioctl = ipip_tunnel_ioctl; 698 dev->do_ioctl = ipip_tunnel_ioctl;
707 dev->change_mtu = ipip_tunnel_change_mtu; 699 dev->change_mtu = ipip_tunnel_change_mtu;
708 dev->destructor = free_netdev; 700 dev->destructor = free_netdev;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 11700a4dcd95..c519b8d30eee 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -9,8 +9,6 @@
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 * 11 *
12 * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13 *
14 * Fixes: 12 * Fixes:
15 * Michael Chastain : Incorrect size of copying. 13 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code 14 * Alan Cox : Added the cache manager code
@@ -120,6 +118,31 @@ static struct timer_list ipmr_expire_timer;
120 118
121/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 119/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
122 120
121static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
122{
123 dev_close(dev);
124
125 dev = __dev_get_by_name(&init_net, "tunl0");
126 if (dev) {
127 struct ifreq ifr;
128 mm_segment_t oldfs;
129 struct ip_tunnel_parm p;
130
131 memset(&p, 0, sizeof(p));
132 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134 p.iph.version = 4;
135 p.iph.ihl = 5;
136 p.iph.protocol = IPPROTO_IPIP;
137 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
139
140 oldfs = get_fs(); set_fs(KERNEL_DS);
141 dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL);
142 set_fs(oldfs);
143 }
144}
145
123static 146static
124struct net_device *ipmr_new_tunnel(struct vifctl *v) 147struct net_device *ipmr_new_tunnel(struct vifctl *v)
125{ 148{
@@ -161,6 +184,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
161 184
162 if (dev_open(dev)) 185 if (dev_open(dev))
163 goto failure; 186 goto failure;
187 dev_hold(dev);
164 } 188 }
165 } 189 }
166 return dev; 190 return dev;
@@ -181,26 +205,20 @@ static int reg_vif_num = -1;
181static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 205static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
182{ 206{
183 read_lock(&mrt_lock); 207 read_lock(&mrt_lock);
184 ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len; 208 dev->stats.tx_bytes += skb->len;
185 ((struct net_device_stats*)netdev_priv(dev))->tx_packets++; 209 dev->stats.tx_packets++;
186 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); 210 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
187 read_unlock(&mrt_lock); 211 read_unlock(&mrt_lock);
188 kfree_skb(skb); 212 kfree_skb(skb);
189 return 0; 213 return 0;
190} 214}
191 215
192static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
193{
194 return (struct net_device_stats*)netdev_priv(dev);
195}
196
197static void reg_vif_setup(struct net_device *dev) 216static void reg_vif_setup(struct net_device *dev)
198{ 217{
199 dev->type = ARPHRD_PIMREG; 218 dev->type = ARPHRD_PIMREG;
200 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; 219 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
201 dev->flags = IFF_NOARP; 220 dev->flags = IFF_NOARP;
202 dev->hard_start_xmit = reg_vif_xmit; 221 dev->hard_start_xmit = reg_vif_xmit;
203 dev->get_stats = reg_vif_get_stats;
204 dev->destructor = free_netdev; 222 dev->destructor = free_netdev;
205} 223}
206 224
@@ -209,8 +227,7 @@ static struct net_device *ipmr_reg_vif(void)
209 struct net_device *dev; 227 struct net_device *dev;
210 struct in_device *in_dev; 228 struct in_device *in_dev;
211 229
212 dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg", 230 dev = alloc_netdev(0, "pimreg", reg_vif_setup);
213 reg_vif_setup);
214 231
215 if (dev == NULL) 232 if (dev == NULL)
216 return NULL; 233 return NULL;
@@ -234,6 +251,8 @@ static struct net_device *ipmr_reg_vif(void)
234 if (dev_open(dev)) 251 if (dev_open(dev))
235 goto failure; 252 goto failure;
236 253
254 dev_hold(dev);
255
237 return dev; 256 return dev;
238 257
239failure: 258failure:
@@ -248,9 +267,10 @@ failure:
248 267
249/* 268/*
250 * Delete a VIF entry 269 * Delete a VIF entry
270 * @notify: Set to 1, if the caller is a notifier_call
251 */ 271 */
252 272
253static int vif_delete(int vifi) 273static int vif_delete(int vifi, int notify)
254{ 274{
255 struct vif_device *v; 275 struct vif_device *v;
256 struct net_device *dev; 276 struct net_device *dev;
@@ -293,7 +313,7 @@ static int vif_delete(int vifi)
293 ip_rt_multicast_event(in_dev); 313 ip_rt_multicast_event(in_dev);
294 } 314 }
295 315
296 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 316 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
297 unregister_netdevice(dev); 317 unregister_netdevice(dev);
298 318
299 dev_put(dev); 319 dev_put(dev);
@@ -398,6 +418,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
398 struct vif_device *v = &vif_table[vifi]; 418 struct vif_device *v = &vif_table[vifi];
399 struct net_device *dev; 419 struct net_device *dev;
400 struct in_device *in_dev; 420 struct in_device *in_dev;
421 int err;
401 422
402 /* Is vif busy ? */ 423 /* Is vif busy ? */
403 if (VIF_EXISTS(vifi)) 424 if (VIF_EXISTS(vifi))
@@ -415,18 +436,34 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
415 dev = ipmr_reg_vif(); 436 dev = ipmr_reg_vif();
416 if (!dev) 437 if (!dev)
417 return -ENOBUFS; 438 return -ENOBUFS;
439 err = dev_set_allmulti(dev, 1);
440 if (err) {
441 unregister_netdevice(dev);
442 dev_put(dev);
443 return err;
444 }
418 break; 445 break;
419#endif 446#endif
420 case VIFF_TUNNEL: 447 case VIFF_TUNNEL:
421 dev = ipmr_new_tunnel(vifc); 448 dev = ipmr_new_tunnel(vifc);
422 if (!dev) 449 if (!dev)
423 return -ENOBUFS; 450 return -ENOBUFS;
451 err = dev_set_allmulti(dev, 1);
452 if (err) {
453 ipmr_del_tunnel(dev, vifc);
454 dev_put(dev);
455 return err;
456 }
424 break; 457 break;
425 case 0: 458 case 0:
426 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); 459 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
427 if (!dev) 460 if (!dev)
428 return -EADDRNOTAVAIL; 461 return -EADDRNOTAVAIL;
429 dev_put(dev); 462 err = dev_set_allmulti(dev, 1);
463 if (err) {
464 dev_put(dev);
465 return err;
466 }
430 break; 467 break;
431 default: 468 default:
432 return -EINVAL; 469 return -EINVAL;
@@ -435,7 +472,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
435 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) 472 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
436 return -EADDRNOTAVAIL; 473 return -EADDRNOTAVAIL;
437 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 474 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
438 dev_set_allmulti(dev, +1);
439 ip_rt_multicast_event(in_dev); 475 ip_rt_multicast_event(in_dev);
440 476
441 /* 477 /*
@@ -458,7 +494,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
458 494
459 /* And finish update writing critical data */ 495 /* And finish update writing critical data */
460 write_lock_bh(&mrt_lock); 496 write_lock_bh(&mrt_lock);
461 dev_hold(dev);
462 v->dev=dev; 497 v->dev=dev;
463#ifdef CONFIG_IP_PIMSM 498#ifdef CONFIG_IP_PIMSM
464 if (v->flags&VIFF_REGISTER) 499 if (v->flags&VIFF_REGISTER)
@@ -805,7 +840,7 @@ static void mroute_clean_tables(struct sock *sk)
805 */ 840 */
806 for (i=0; i<maxvif; i++) { 841 for (i=0; i<maxvif; i++) {
807 if (!(vif_table[i].flags&VIFF_STATIC)) 842 if (!(vif_table[i].flags&VIFF_STATIC))
808 vif_delete(i); 843 vif_delete(i, 0);
809 } 844 }
810 845
811 /* 846 /*
@@ -918,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
918 if (optname==MRT_ADD_VIF) { 953 if (optname==MRT_ADD_VIF) {
919 ret = vif_add(&vif, sk==mroute_socket); 954 ret = vif_add(&vif, sk==mroute_socket);
920 } else { 955 } else {
921 ret = vif_delete(vif.vifc_vifi); 956 ret = vif_delete(vif.vifc_vifi, 0);
922 } 957 }
923 rtnl_unlock(); 958 rtnl_unlock();
924 return ret; 959 return ret;
@@ -1089,7 +1124,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1089 struct vif_device *v; 1124 struct vif_device *v;
1090 int ct; 1125 int ct;
1091 1126
1092 if (dev_net(dev) != &init_net) 1127 if (!net_eq(dev_net(dev), &init_net))
1093 return NOTIFY_DONE; 1128 return NOTIFY_DONE;
1094 1129
1095 if (event != NETDEV_UNREGISTER) 1130 if (event != NETDEV_UNREGISTER)
@@ -1097,7 +1132,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1097 v=&vif_table[0]; 1132 v=&vif_table[0];
1098 for (ct=0;ct<maxvif;ct++,v++) { 1133 for (ct=0;ct<maxvif;ct++,v++) {
1099 if (v->dev==dev) 1134 if (v->dev==dev)
1100 vif_delete(ct); 1135 vif_delete(ct, 1);
1101 } 1136 }
1102 return NOTIFY_DONE; 1137 return NOTIFY_DONE;
1103} 1138}
@@ -1143,7 +1178,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
1143{ 1178{
1144 struct ip_options * opt = &(IPCB(skb)->opt); 1179 struct ip_options * opt = &(IPCB(skb)->opt);
1145 1180
1146 IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS); 1181 IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1147 1182
1148 if (unlikely(opt->optlen)) 1183 if (unlikely(opt->optlen))
1149 ip_forward_options(skb); 1184 ip_forward_options(skb);
@@ -1170,8 +1205,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1170 if (vif->flags & VIFF_REGISTER) { 1205 if (vif->flags & VIFF_REGISTER) {
1171 vif->pkt_out++; 1206 vif->pkt_out++;
1172 vif->bytes_out+=skb->len; 1207 vif->bytes_out+=skb->len;
1173 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len; 1208 vif->dev->stats.tx_bytes += skb->len;
1174 ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++; 1209 vif->dev->stats.tx_packets++;
1175 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); 1210 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1176 kfree_skb(skb); 1211 kfree_skb(skb);
1177 return; 1212 return;
@@ -1206,7 +1241,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1206 to blackhole. 1241 to blackhole.
1207 */ 1242 */
1208 1243
1209 IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS); 1244 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1210 ip_rt_put(rt); 1245 ip_rt_put(rt);
1211 goto out_free; 1246 goto out_free;
1212 } 1247 }
@@ -1230,8 +1265,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1230 if (vif->flags & VIFF_TUNNEL) { 1265 if (vif->flags & VIFF_TUNNEL) {
1231 ip_encap(skb, vif->local, vif->remote); 1266 ip_encap(skb, vif->local, vif->remote);
1232 /* FIXME: extra output firewall step used to be here. --RR */ 1267 /* FIXME: extra output firewall step used to be here. --RR */
1233 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++; 1268 vif->dev->stats.tx_packets++;
1234 ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len; 1269 vif->dev->stats.tx_bytes += skb->len;
1235 } 1270 }
1236 1271
1237 IPCB(skb)->flags |= IPSKB_FORWARDED; 1272 IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1487,8 +1522,8 @@ int pim_rcv_v1(struct sk_buff * skb)
1487 skb->pkt_type = PACKET_HOST; 1522 skb->pkt_type = PACKET_HOST;
1488 dst_release(skb->dst); 1523 dst_release(skb->dst);
1489 skb->dst = NULL; 1524 skb->dst = NULL;
1490 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; 1525 reg_dev->stats.rx_bytes += skb->len;
1491 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; 1526 reg_dev->stats.rx_packets++;
1492 nf_reset(skb); 1527 nf_reset(skb);
1493 netif_rx(skb); 1528 netif_rx(skb);
1494 dev_put(reg_dev); 1529 dev_put(reg_dev);
@@ -1542,8 +1577,8 @@ static int pim_rcv(struct sk_buff * skb)
1542 skb->ip_summed = 0; 1577 skb->ip_summed = 0;
1543 skb->pkt_type = PACKET_HOST; 1578 skb->pkt_type = PACKET_HOST;
1544 dst_release(skb->dst); 1579 dst_release(skb->dst);
1545 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; 1580 reg_dev->stats.rx_bytes += skb->len;
1546 ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; 1581 reg_dev->stats.rx_packets++;
1547 skb->dst = NULL; 1582 skb->dst = NULL;
1548 nf_reset(skb); 1583 nf_reset(skb);
1549 netif_rx(skb); 1584 netif_rx(skb);
@@ -1887,16 +1922,36 @@ static struct net_protocol pim_protocol = {
1887 * Setup for IP multicast routing 1922 * Setup for IP multicast routing
1888 */ 1923 */
1889 1924
1890void __init ip_mr_init(void) 1925int __init ip_mr_init(void)
1891{ 1926{
1927 int err;
1928
1892 mrt_cachep = kmem_cache_create("ip_mrt_cache", 1929 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1893 sizeof(struct mfc_cache), 1930 sizeof(struct mfc_cache),
1894 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 1931 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1895 NULL); 1932 NULL);
1933 if (!mrt_cachep)
1934 return -ENOMEM;
1935
1896 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); 1936 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1897 register_netdevice_notifier(&ip_mr_notifier); 1937 err = register_netdevice_notifier(&ip_mr_notifier);
1938 if (err)
1939 goto reg_notif_fail;
1898#ifdef CONFIG_PROC_FS 1940#ifdef CONFIG_PROC_FS
1899 proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); 1941 err = -ENOMEM;
1900 proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops); 1942 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1943 goto proc_vif_fail;
1944 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1945 goto proc_cache_fail;
1901#endif 1946#endif
1947 return 0;
1948reg_notif_fail:
1949 kmem_cache_destroy(mrt_cachep);
1950#ifdef CONFIG_PROC_FS
1951proc_vif_fail:
1952 unregister_netdevice_notifier(&ip_mr_notifier);
1953proc_cache_fail:
1954 proc_net_remove(&init_net, "ip_mr_vif");
1955#endif
1956 return err;
1902} 1957}
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 535abe0c45e7..1f1897a1a702 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * ip_vs_app.c: Application module support for IPVS 2 * ip_vs_app.c: Application module support for IPVS
3 * 3 *
4 * Version: $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 65f1ba112752..f8bdae47a77f 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -5,8 +5,6 @@
5 * high-performance and highly available server based on a 5 * high-performance and highly available server based on a
6 * cluster of servers. 6 * cluster of servers.
7 * 7 *
8 * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $
9 *
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si> 9 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg> 10 * Julian Anastasov <ja@ssi.bg>
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 963981a9d501..a7879eafc3b5 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -5,8 +5,6 @@
5 * high-performance and highly available server based on a 5 * high-performance and highly available server based on a
6 * cluster of servers. 6 * cluster of servers.
7 * 7 *
8 * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $
9 *
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si> 9 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg> 10 * Julian Anastasov <ja@ssi.bg>
@@ -993,7 +991,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
993 == sysctl_ip_vs_sync_threshold[0])) || 991 == sysctl_ip_vs_sync_threshold[0])) ||
994 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && 992 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
995 ((cp->state == IP_VS_TCP_S_FIN_WAIT) || 993 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
996 (cp->state == IP_VS_TCP_S_CLOSE))))) 994 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
995 (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
997 ip_vs_sync_conn(cp); 996 ip_vs_sync_conn(cp);
998 cp->old_state = cp->state; 997 cp->old_state = cp->state;
999 998
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 94c5767c8e01..9a5ace0b4dd6 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -5,8 +5,6 @@
5 * high-performance and highly available server based on a 5 * high-performance and highly available server based on a
6 * cluster of servers. 6 * cluster of servers.
7 * 7 *
8 * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
9 *
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si> 9 * Peter Kese <peter.kese@ijs.si>
12 * Julian Anastasov <ja@ssi.bg> 10 * Julian Anastasov <ja@ssi.bg>
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index dcf5d46aaa5e..8afc1503ed20 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * IPVS: Destination Hashing scheduling module 2 * IPVS: Destination Hashing scheduling module
3 * 3 *
4 * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@gnuchina.org> 4 * Authors: Wensong Zhang <wensong@gnuchina.org>
7 * 5 *
8 * Inspired by the consistent hashing scheduler patch from 6 * Inspired by the consistent hashing scheduler patch from
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index dfa0d713c801..bc04eedd6dbb 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * ip_vs_est.c: simple rate estimator for IPVS 2 * ip_vs_est.c: simple rate estimator for IPVS
3 * 3 *
4 * Version: $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 59aa166b7678..c1c758e4f733 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * ip_vs_ftp.c: IPVS ftp application module 2 * ip_vs_ftp.c: IPVS ftp application module
3 * 3 *
4 * Version: $Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * 5 *
8 * Changes: 6 * Changes:
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 3888642706ad..0efa3db4b180 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * IPVS: Locality-Based Least-Connection scheduling module 2 * IPVS: Locality-Based Least-Connection scheduling module
3 * 3 *
4 * Version: $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@gnuchina.org> 4 * Authors: Wensong Zhang <wensong@gnuchina.org>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index daa260eb21cf..8e3bbeb45138 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * IPVS: Locality-Based Least-Connection with Replication scheduler 2 * IPVS: Locality-Based Least-Connection with Replication scheduler
3 * 3 *
4 * Version: $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@gnuchina.org> 4 * Authors: Wensong Zhang <wensong@gnuchina.org>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
index d88fef90a641..ac9f08e065d5 100644
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * IPVS: Least-Connection Scheduling module 2 * IPVS: Least-Connection Scheduling module
3 * 3 *
4 * Version: $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
index bc2a9e5f2a7b..a46bf258d420 100644
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * IPVS: Never Queue scheduling module 2 * IPVS: Never Queue scheduling module
3 * 3 *
4 * Version: $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 4b1c16cbb16b..876714f23d65 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * ip_vs_proto.c: transport protocol load balancing support for IPVS 2 * ip_vs_proto.c: transport protocol load balancing support for IPVS
3 * 3 *
4 * Version: $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * Julian Anastasov <ja@ssi.bg> 5 * Julian Anastasov <ja@ssi.bg>
8 * 6 *
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 4bf835e1d86d..73e0ea87c1f5 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS 2 * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS
3 * 3 *
4 * Version: $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
5 *
6 * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 4 * Authors: Julian Anastasov <ja@ssi.bg>, February 2002
7 * Wensong Zhang <wensong@linuxvirtualserver.org> 5 * Wensong Zhang <wensong@linuxvirtualserver.org>
8 * 6 *
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index db6a6b7b1a0b..21d70c8ffa54 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS 2 * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS
3 * 3 *
4 * Version: $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
5 *
6 * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 4 * Authors: Julian Anastasov <ja@ssi.bg>, February 2002
7 * Wensong Zhang <wensong@linuxvirtualserver.org> 5 * Wensong Zhang <wensong@linuxvirtualserver.org>
8 * 6 *
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index b83dc14b0a4d..d0ea467986a0 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * ip_vs_proto_tcp.c: TCP load balancing support for IPVS 2 * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
3 * 3 *
4 * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * Julian Anastasov <ja@ssi.bg> 5 * Julian Anastasov <ja@ssi.bg>
8 * 6 *
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 75771cb3cd6f..c6be5d56823f 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * ip_vs_proto_udp.c: UDP load balancing support for IPVS 2 * ip_vs_proto_udp.c: UDP load balancing support for IPVS
3 * 3 *
4 * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * Julian Anastasov <ja@ssi.bg> 5 * Julian Anastasov <ja@ssi.bg>
8 * 6 *
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
index 433f8a947924..c8db12d39e61 100644
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * IPVS: Round-Robin Scheduling module 2 * IPVS: Round-Robin Scheduling module
3 * 3 *
4 * Version: $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * Peter Kese <peter.kese@ijs.si> 5 * Peter Kese <peter.kese@ijs.si>
8 * 6 *
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 121a32b1b756..b64767309855 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -5,8 +5,6 @@
5 * high-performance and highly available server based on a 5 * high-performance and highly available server based on a
6 * cluster of servers. 6 * cluster of servers.
7 * 7 *
8 * Version: $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $
9 *
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * Peter Kese <peter.kese@ijs.si> 9 * Peter Kese <peter.kese@ijs.si>
12 * 10 *
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
index dd7c128f9db3..2a7d31358181 100644
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * IPVS: Shortest Expected Delay scheduling module 2 * IPVS: Shortest Expected Delay scheduling module
3 * 3 *
4 * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 1b25b00ef1e1..b8fdfac65001 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * IPVS: Source Hashing scheduling module 2 * IPVS: Source Hashing scheduling module
3 * 3 *
4 * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@gnuchina.org> 4 * Authors: Wensong Zhang <wensong@gnuchina.org>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index eff54efe0351..45e9bd96c286 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -5,8 +5,6 @@
5 * high-performance and highly available server based on a 5 * high-performance and highly available server based on a
6 * cluster of servers. 6 * cluster of servers.
7 * 7 *
8 * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $
9 *
10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
11 * 9 *
12 * ip_vs_sync: sync connection info from master load balancer to backups 10 * ip_vs_sync: sync connection info from master load balancer to backups
@@ -29,10 +27,12 @@
29#include <linux/in.h> 27#include <linux/in.h>
30#include <linux/igmp.h> /* for ip_mc_join_group */ 28#include <linux/igmp.h> /* for ip_mc_join_group */
31#include <linux/udp.h> 29#include <linux/udp.h>
30#include <linux/err.h>
31#include <linux/kthread.h>
32#include <linux/wait.h>
32 33
33#include <net/ip.h> 34#include <net/ip.h>
34#include <net/sock.h> 35#include <net/sock.h>
35#include <asm/uaccess.h> /* for get_fs and set_fs */
36 36
37#include <net/ip_vs.h> 37#include <net/ip_vs.h>
38 38
@@ -68,8 +68,8 @@ struct ip_vs_sync_conn_options {
68}; 68};
69 69
70struct ip_vs_sync_thread_data { 70struct ip_vs_sync_thread_data {
71 struct completion *startup; 71 struct socket *sock;
72 int state; 72 char *buf;
73}; 73};
74 74
75#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) 75#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
@@ -140,18 +140,19 @@ volatile int ip_vs_backup_syncid = 0;
140char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 140char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
141char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 141char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
142 142
143/* sync daemon tasks */
144static struct task_struct *sync_master_thread;
145static struct task_struct *sync_backup_thread;
146
143/* multicast addr */ 147/* multicast addr */
144static struct sockaddr_in mcast_addr; 148static struct sockaddr_in mcast_addr = {
149 .sin_family = AF_INET,
150 .sin_port = __constant_htons(IP_VS_SYNC_PORT),
151 .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP),
152};
145 153
146 154
147static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) 155static inline struct ip_vs_sync_buff *sb_dequeue(void)
148{
149 spin_lock(&ip_vs_sync_lock);
150 list_add_tail(&sb->list, &ip_vs_sync_queue);
151 spin_unlock(&ip_vs_sync_lock);
152}
153
154static inline struct ip_vs_sync_buff * sb_dequeue(void)
155{ 156{
156 struct ip_vs_sync_buff *sb; 157 struct ip_vs_sync_buff *sb;
157 158
@@ -195,6 +196,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
195 kfree(sb); 196 kfree(sb);
196} 197}
197 198
199static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
200{
201 spin_lock(&ip_vs_sync_lock);
202 if (ip_vs_sync_state & IP_VS_STATE_MASTER)
203 list_add_tail(&sb->list, &ip_vs_sync_queue);
204 else
205 ip_vs_sync_buff_release(sb);
206 spin_unlock(&ip_vs_sync_lock);
207}
208
198/* 209/*
199 * Get the current sync buffer if it has been created for more 210 * Get the current sync buffer if it has been created for more
200 * than the specified time or the specified time is zero. 211 * than the specified time or the specified time is zero.
@@ -574,14 +585,17 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
574static struct socket * make_send_sock(void) 585static struct socket * make_send_sock(void)
575{ 586{
576 struct socket *sock; 587 struct socket *sock;
588 int result;
577 589
578 /* First create a socket */ 590 /* First create a socket */
579 if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { 591 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
592 if (result < 0) {
580 IP_VS_ERR("Error during creation of socket; terminating\n"); 593 IP_VS_ERR("Error during creation of socket; terminating\n");
581 return NULL; 594 return ERR_PTR(result);
582 } 595 }
583 596
584 if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) { 597 result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
598 if (result < 0) {
585 IP_VS_ERR("Error setting outbound mcast interface\n"); 599 IP_VS_ERR("Error setting outbound mcast interface\n");
586 goto error; 600 goto error;
587 } 601 }
@@ -589,14 +603,15 @@ static struct socket * make_send_sock(void)
589 set_mcast_loop(sock->sk, 0); 603 set_mcast_loop(sock->sk, 0);
590 set_mcast_ttl(sock->sk, 1); 604 set_mcast_ttl(sock->sk, 1);
591 605
592 if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) { 606 result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
607 if (result < 0) {
593 IP_VS_ERR("Error binding address of the mcast interface\n"); 608 IP_VS_ERR("Error binding address of the mcast interface\n");
594 goto error; 609 goto error;
595 } 610 }
596 611
597 if (sock->ops->connect(sock, 612 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
598 (struct sockaddr*)&mcast_addr, 613 sizeof(struct sockaddr), 0);
599 sizeof(struct sockaddr), 0) < 0) { 614 if (result < 0) {
600 IP_VS_ERR("Error connecting to the multicast addr\n"); 615 IP_VS_ERR("Error connecting to the multicast addr\n");
601 goto error; 616 goto error;
602 } 617 }
@@ -605,7 +620,7 @@ static struct socket * make_send_sock(void)
605 620
606 error: 621 error:
607 sock_release(sock); 622 sock_release(sock);
608 return NULL; 623 return ERR_PTR(result);
609} 624}
610 625
611 626
@@ -615,27 +630,30 @@ static struct socket * make_send_sock(void)
615static struct socket * make_receive_sock(void) 630static struct socket * make_receive_sock(void)
616{ 631{
617 struct socket *sock; 632 struct socket *sock;
633 int result;
618 634
619 /* First create a socket */ 635 /* First create a socket */
620 if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { 636 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
637 if (result < 0) {
621 IP_VS_ERR("Error during creation of socket; terminating\n"); 638 IP_VS_ERR("Error during creation of socket; terminating\n");
622 return NULL; 639 return ERR_PTR(result);
623 } 640 }
624 641
625 /* it is equivalent to the REUSEADDR option in user-space */ 642 /* it is equivalent to the REUSEADDR option in user-space */
626 sock->sk->sk_reuse = 1; 643 sock->sk->sk_reuse = 1;
627 644
628 if (sock->ops->bind(sock, 645 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
629 (struct sockaddr*)&mcast_addr, 646 sizeof(struct sockaddr));
630 sizeof(struct sockaddr)) < 0) { 647 if (result < 0) {
631 IP_VS_ERR("Error binding to the multicast addr\n"); 648 IP_VS_ERR("Error binding to the multicast addr\n");
632 goto error; 649 goto error;
633 } 650 }
634 651
635 /* join the multicast group */ 652 /* join the multicast group */
636 if (join_mcast_group(sock->sk, 653 result = join_mcast_group(sock->sk,
637 (struct in_addr*)&mcast_addr.sin_addr, 654 (struct in_addr *) &mcast_addr.sin_addr,
638 ip_vs_backup_mcast_ifn) < 0) { 655 ip_vs_backup_mcast_ifn);
656 if (result < 0) {
639 IP_VS_ERR("Error joining to the multicast group\n"); 657 IP_VS_ERR("Error joining to the multicast group\n");
640 goto error; 658 goto error;
641 } 659 }
@@ -644,7 +662,7 @@ static struct socket * make_receive_sock(void)
644 662
645 error: 663 error:
646 sock_release(sock); 664 sock_release(sock);
647 return NULL; 665 return ERR_PTR(result);
648} 666}
649 667
650 668
@@ -702,44 +720,29 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
702} 720}
703 721
704 722
705static DECLARE_WAIT_QUEUE_HEAD(sync_wait); 723static int sync_thread_master(void *data)
706static pid_t sync_master_pid = 0;
707static pid_t sync_backup_pid = 0;
708
709static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
710static int stop_master_sync = 0;
711static int stop_backup_sync = 0;
712
713static void sync_master_loop(void)
714{ 724{
715 struct socket *sock; 725 struct ip_vs_sync_thread_data *tinfo = data;
716 struct ip_vs_sync_buff *sb; 726 struct ip_vs_sync_buff *sb;
717 727
718 /* create the sending multicast socket */
719 sock = make_send_sock();
720 if (!sock)
721 return;
722
723 IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, " 728 IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
724 "syncid = %d\n", 729 "syncid = %d\n",
725 ip_vs_master_mcast_ifn, ip_vs_master_syncid); 730 ip_vs_master_mcast_ifn, ip_vs_master_syncid);
726 731
727 for (;;) { 732 while (!kthread_should_stop()) {
728 while ((sb=sb_dequeue())) { 733 while ((sb = sb_dequeue())) {
729 ip_vs_send_sync_msg(sock, sb->mesg); 734 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
730 ip_vs_sync_buff_release(sb); 735 ip_vs_sync_buff_release(sb);
731 } 736 }
732 737
733 /* check if entries stay in curr_sb for 2 seconds */ 738 /* check if entries stay in curr_sb for 2 seconds */
734 if ((sb = get_curr_sync_buff(2*HZ))) { 739 sb = get_curr_sync_buff(2 * HZ);
735 ip_vs_send_sync_msg(sock, sb->mesg); 740 if (sb) {
741 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
736 ip_vs_sync_buff_release(sb); 742 ip_vs_sync_buff_release(sb);
737 } 743 }
738 744
739 if (stop_master_sync) 745 schedule_timeout_interruptible(HZ);
740 break;
741
742 msleep_interruptible(1000);
743 } 746 }
744 747
745 /* clean up the sync_buff queue */ 748 /* clean up the sync_buff queue */
@@ -753,267 +756,175 @@ static void sync_master_loop(void)
753 } 756 }
754 757
755 /* release the sending multicast socket */ 758 /* release the sending multicast socket */
756 sock_release(sock); 759 sock_release(tinfo->sock);
760 kfree(tinfo);
761
762 return 0;
757} 763}
758 764
759 765
760static void sync_backup_loop(void) 766static int sync_thread_backup(void *data)
761{ 767{
762 struct socket *sock; 768 struct ip_vs_sync_thread_data *tinfo = data;
763 char *buf;
764 int len; 769 int len;
765 770
766 if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) {
767 IP_VS_ERR("sync_backup_loop: kmalloc error\n");
768 return;
769 }
770
771 /* create the receiving multicast socket */
772 sock = make_receive_sock();
773 if (!sock)
774 goto out;
775
776 IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, " 771 IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
777 "syncid = %d\n", 772 "syncid = %d\n",
778 ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); 773 ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
779 774
780 for (;;) { 775 while (!kthread_should_stop()) {
781 /* do you have data now? */ 776 wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
782 while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) { 777 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
783 if ((len = 778 || kthread_should_stop());
784 ip_vs_receive(sock, buf, 779
785 sync_recv_mesg_maxlen)) <= 0) { 780 /* do we have data now? */
781 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
782 len = ip_vs_receive(tinfo->sock, tinfo->buf,
783 sync_recv_mesg_maxlen);
784 if (len <= 0) {
786 IP_VS_ERR("receiving message error\n"); 785 IP_VS_ERR("receiving message error\n");
787 break; 786 break;
788 } 787 }
789 /* disable bottom half, because it accessed the data 788
789 /* disable bottom half, because it accesses the data
790 shared by softirq while getting/creating conns */ 790 shared by softirq while getting/creating conns */
791 local_bh_disable(); 791 local_bh_disable();
792 ip_vs_process_message(buf, len); 792 ip_vs_process_message(tinfo->buf, len);
793 local_bh_enable(); 793 local_bh_enable();
794 } 794 }
795
796 if (stop_backup_sync)
797 break;
798
799 msleep_interruptible(1000);
800 } 795 }
801 796
802 /* release the sending multicast socket */ 797 /* release the sending multicast socket */
803 sock_release(sock); 798 sock_release(tinfo->sock);
799 kfree(tinfo->buf);
800 kfree(tinfo);
804 801
805 out: 802 return 0;
806 kfree(buf);
807} 803}
808 804
809 805
810static void set_sync_pid(int sync_state, pid_t sync_pid) 806int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
811{
812 if (sync_state == IP_VS_STATE_MASTER)
813 sync_master_pid = sync_pid;
814 else if (sync_state == IP_VS_STATE_BACKUP)
815 sync_backup_pid = sync_pid;
816}
817
818static void set_stop_sync(int sync_state, int set)
819{ 807{
820 if (sync_state == IP_VS_STATE_MASTER) 808 struct ip_vs_sync_thread_data *tinfo;
821 stop_master_sync = set; 809 struct task_struct **realtask, *task;
822 else if (sync_state == IP_VS_STATE_BACKUP) 810 struct socket *sock;
823 stop_backup_sync = set; 811 char *name, *buf = NULL;
824 else { 812 int (*threadfn)(void *data);
825 stop_master_sync = set; 813 int result = -ENOMEM;
826 stop_backup_sync = set;
827 }
828}
829 814
830static int sync_thread(void *startup) 815 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
831{ 816 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
832 DECLARE_WAITQUEUE(wait, current); 817 sizeof(struct ip_vs_sync_conn));
833 mm_segment_t oldmm;
834 int state;
835 const char *name;
836 struct ip_vs_sync_thread_data *tinfo = startup;
837 818
838 /* increase the module use count */ 819 if (state == IP_VS_STATE_MASTER) {
839 ip_vs_use_count_inc(); 820 if (sync_master_thread)
821 return -EEXIST;
840 822
841 if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) { 823 strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
842 state = IP_VS_STATE_MASTER; 824 sizeof(ip_vs_master_mcast_ifn));
825 ip_vs_master_syncid = syncid;
826 realtask = &sync_master_thread;
843 name = "ipvs_syncmaster"; 827 name = "ipvs_syncmaster";
844 } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) { 828 threadfn = sync_thread_master;
845 state = IP_VS_STATE_BACKUP; 829 sock = make_send_sock();
830 } else if (state == IP_VS_STATE_BACKUP) {
831 if (sync_backup_thread)
832 return -EEXIST;
833
834 strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
835 sizeof(ip_vs_backup_mcast_ifn));
836 ip_vs_backup_syncid = syncid;
837 realtask = &sync_backup_thread;
846 name = "ipvs_syncbackup"; 838 name = "ipvs_syncbackup";
839 threadfn = sync_thread_backup;
840 sock = make_receive_sock();
847 } else { 841 } else {
848 IP_VS_BUG();
849 ip_vs_use_count_dec();
850 return -EINVAL; 842 return -EINVAL;
851 } 843 }
852 844
853 daemonize(name); 845 if (IS_ERR(sock)) {
854 846 result = PTR_ERR(sock);
855 oldmm = get_fs(); 847 goto out;
856 set_fs(KERNEL_DS); 848 }
857
858 /* Block all signals */
859 spin_lock_irq(&current->sighand->siglock);
860 siginitsetinv(&current->blocked, 0);
861 recalc_sigpending();
862 spin_unlock_irq(&current->sighand->siglock);
863 849
864 /* set the maximum length of sync message */
865 set_sync_mesg_maxlen(state); 850 set_sync_mesg_maxlen(state);
851 if (state == IP_VS_STATE_BACKUP) {
852 buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
853 if (!buf)
854 goto outsocket;
855 }
866 856
867 /* set up multicast address */ 857 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
868 mcast_addr.sin_family = AF_INET; 858 if (!tinfo)
869 mcast_addr.sin_port = htons(IP_VS_SYNC_PORT); 859 goto outbuf;
870 mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP);
871
872 add_wait_queue(&sync_wait, &wait);
873
874 set_sync_pid(state, task_pid_nr(current));
875 complete(tinfo->startup);
876
877 /*
878 * once we call the completion queue above, we should
879 * null out that reference, since its allocated on the
880 * stack of the creating kernel thread
881 */
882 tinfo->startup = NULL;
883
884 /* processing master/backup loop here */
885 if (state == IP_VS_STATE_MASTER)
886 sync_master_loop();
887 else if (state == IP_VS_STATE_BACKUP)
888 sync_backup_loop();
889 else IP_VS_BUG();
890
891 remove_wait_queue(&sync_wait, &wait);
892
893 /* thread exits */
894
895 /*
896 * If we weren't explicitly stopped, then we
897 * exited in error, and should undo our state
898 */
899 if ((!stop_master_sync) && (!stop_backup_sync))
900 ip_vs_sync_state -= tinfo->state;
901 860
902 set_sync_pid(state, 0); 861 tinfo->sock = sock;
903 IP_VS_INFO("sync thread stopped!\n"); 862 tinfo->buf = buf;
904 863
905 set_fs(oldmm); 864 task = kthread_run(threadfn, tinfo, name);
865 if (IS_ERR(task)) {
866 result = PTR_ERR(task);
867 goto outtinfo;
868 }
906 869
907 /* decrease the module use count */ 870 /* mark as active */
908 ip_vs_use_count_dec(); 871 *realtask = task;
872 ip_vs_sync_state |= state;
909 873
910 set_stop_sync(state, 0); 874 /* increase the module use count */
911 wake_up(&stop_sync_wait); 875 ip_vs_use_count_inc();
912 876
913 /*
914 * we need to free the structure that was allocated
915 * for us in start_sync_thread
916 */
917 kfree(tinfo);
918 return 0; 877 return 0;
919}
920
921
922static int fork_sync_thread(void *startup)
923{
924 pid_t pid;
925
926 /* fork the sync thread here, then the parent process of the
927 sync thread is the init process after this thread exits. */
928 repeat:
929 if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
930 IP_VS_ERR("could not create sync_thread due to %d... "
931 "retrying.\n", pid);
932 msleep_interruptible(1000);
933 goto repeat;
934 }
935 878
936 return 0; 879outtinfo:
880 kfree(tinfo);
881outbuf:
882 kfree(buf);
883outsocket:
884 sock_release(sock);
885out:
886 return result;
937} 887}
938 888
939 889
940int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) 890int stop_sync_thread(int state)
941{ 891{
942 DECLARE_COMPLETION_ONSTACK(startup);
943 pid_t pid;
944 struct ip_vs_sync_thread_data *tinfo;
945
946 if ((state == IP_VS_STATE_MASTER && sync_master_pid) ||
947 (state == IP_VS_STATE_BACKUP && sync_backup_pid))
948 return -EEXIST;
949
950 /*
951 * Note that tinfo will be freed in sync_thread on exit
952 */
953 tinfo = kmalloc(sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL);
954 if (!tinfo)
955 return -ENOMEM;
956
957 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); 892 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
958 IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
959 sizeof(struct ip_vs_sync_conn));
960 893
961 ip_vs_sync_state |= state;
962 if (state == IP_VS_STATE_MASTER) { 894 if (state == IP_VS_STATE_MASTER) {
963 strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, 895 if (!sync_master_thread)
964 sizeof(ip_vs_master_mcast_ifn)); 896 return -ESRCH;
965 ip_vs_master_syncid = syncid;
966 } else {
967 strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
968 sizeof(ip_vs_backup_mcast_ifn));
969 ip_vs_backup_syncid = syncid;
970 }
971
972 tinfo->state = state;
973 tinfo->startup = &startup;
974
975 repeat:
976 if ((pid = kernel_thread(fork_sync_thread, tinfo, 0)) < 0) {
977 IP_VS_ERR("could not create fork_sync_thread due to %d... "
978 "retrying.\n", pid);
979 msleep_interruptible(1000);
980 goto repeat;
981 }
982
983 wait_for_completion(&startup);
984
985 return 0;
986}
987 897
898 IP_VS_INFO("stopping master sync thread %d ...\n",
899 task_pid_nr(sync_master_thread));
988 900
989int stop_sync_thread(int state) 901 /*
990{ 902 * The lock synchronizes with sb_queue_tail(), so that we don't
991 DECLARE_WAITQUEUE(wait, current); 903 * add sync buffers to the queue, when we are already in
904 * progress of stopping the master sync daemon.
905 */
992 906
993 if ((state == IP_VS_STATE_MASTER && !sync_master_pid) || 907 spin_lock(&ip_vs_sync_lock);
994 (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) 908 ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
995 return -ESRCH; 909 spin_unlock(&ip_vs_sync_lock);
910 kthread_stop(sync_master_thread);
911 sync_master_thread = NULL;
912 } else if (state == IP_VS_STATE_BACKUP) {
913 if (!sync_backup_thread)
914 return -ESRCH;
915
916 IP_VS_INFO("stopping backup sync thread %d ...\n",
917 task_pid_nr(sync_backup_thread));
918
919 ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
920 kthread_stop(sync_backup_thread);
921 sync_backup_thread = NULL;
922 } else {
923 return -EINVAL;
924 }
996 925
997 IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); 926 /* decrease the module use count */
998 IP_VS_INFO("stopping sync thread %d ...\n", 927 ip_vs_use_count_dec();
999 (state == IP_VS_STATE_MASTER) ?
1000 sync_master_pid : sync_backup_pid);
1001
1002 __set_current_state(TASK_UNINTERRUPTIBLE);
1003 add_wait_queue(&stop_sync_wait, &wait);
1004 set_stop_sync(state, 1);
1005 ip_vs_sync_state -= state;
1006 wake_up(&sync_wait);
1007 schedule();
1008 __set_current_state(TASK_RUNNING);
1009 remove_wait_queue(&stop_sync_wait, &wait);
1010
1011 /* Note: no need to reap the sync thread, because its parent
1012 process is the init process */
1013
1014 if ((state == IP_VS_STATE_MASTER && stop_master_sync) ||
1015 (state == IP_VS_STATE_BACKUP && stop_backup_sync))
1016 IP_VS_BUG();
1017 928
1018 return 0; 929 return 0;
1019} 930}
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
index 8a9d913261d8..772c3cb4eca1 100644
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * IPVS: Weighted Least-Connection Scheduling module 2 * IPVS: Weighted Least-Connection Scheduling module
3 * 3 *
4 * Version: $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * Peter Kese <peter.kese@ijs.si> 5 * Peter Kese <peter.kese@ijs.si>
8 * 6 *
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
index 85c680add6df..1d6932d7dc97 100644
--- a/net/ipv4/ipvs/ip_vs_wrr.c
+++ b/net/ipv4/ipvs/ip_vs_wrr.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * IPVS: Weighted Round-Robin Scheduling module 2 * IPVS: Weighted Round-Robin Scheduling module
3 * 3 *
4 * Version: $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index f63006caea03..9892d4aca42e 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS 2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 * 3 *
4 * Version: $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $
5 *
6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
7 * Julian Anastasov <ja@ssi.bg> 5 * Julian Anastasov <ja@ssi.bg>
8 * 6 *
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 2767841a8cef..f23e60c93ef9 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -213,8 +213,7 @@ config IP_NF_TARGET_NETMAP
213 help 213 help
214 NETMAP is an implementation of static 1:1 NAT mapping of network 214 NETMAP is an implementation of static 1:1 NAT mapping of network
215 addresses. It maps the network address part, while keeping the host 215 addresses. It maps the network address part, while keeping the host
216 address part intact. It is similar to Fast NAT, except that 216 address part intact.
217 Netfilter's connection tracking doesn't work well with Fast NAT.
218 217
219 To compile it as a module, choose M here. If unsure, say N. 218 To compile it as a module, choose M here. If unsure, say N.
220 219
@@ -365,6 +364,18 @@ config IP_NF_RAW
365 If you want to compile it as a module, say M here and read 364 If you want to compile it as a module, say M here and read
366 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. 365 <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
367 366
367# security table for MAC policy
368config IP_NF_SECURITY
369 tristate "Security table"
370 depends on IP_NF_IPTABLES
371 depends on SECURITY
372 default m if NETFILTER_ADVANCED=n
373 help
374 This option adds a `security' table to iptables, for use
375 with Mandatory Access Control (MAC) policy.
376
377 If unsure, say N.
378
368# ARP tables 379# ARP tables
369config IP_NF_ARPTABLES 380config IP_NF_ARPTABLES
370 tristate "ARP tables support" 381 tristate "ARP tables support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index d9b92fbf5579..3f31291f37ce 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
42obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o 42obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
43obj-$(CONFIG_NF_NAT) += iptable_nat.o 43obj-$(CONFIG_NF_NAT) += iptable_nat.o
44obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o 44obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
45obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
45 46
46# matches 47# matches
47obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o 48obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 26a37cedcf2e..432ce9d1c11c 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -156,7 +156,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
156 case IPQ_COPY_META: 156 case IPQ_COPY_META:
157 case IPQ_COPY_NONE: 157 case IPQ_COPY_NONE:
158 size = NLMSG_SPACE(sizeof(*pmsg)); 158 size = NLMSG_SPACE(sizeof(*pmsg));
159 data_len = 0;
160 break; 159 break;
161 160
162 case IPQ_COPY_PACKET: 161 case IPQ_COPY_PACKET:
@@ -224,8 +223,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
224 return skb; 223 return skb;
225 224
226nlmsg_failure: 225nlmsg_failure:
227 if (skb)
228 kfree_skb(skb);
229 *errp = -EINVAL; 226 *errp = -EINVAL;
230 printk(KERN_ERR "ip_queue: error creating packet message\n"); 227 printk(KERN_ERR "ip_queue: error creating packet message\n");
231 return NULL; 228 return NULL;
@@ -480,7 +477,7 @@ ipq_rcv_dev_event(struct notifier_block *this,
480{ 477{
481 struct net_device *dev = ptr; 478 struct net_device *dev = ptr;
482 479
483 if (dev_net(dev) != &init_net) 480 if (!net_eq(dev_net(dev), &init_net))
484 return NOTIFY_DONE; 481 return NOTIFY_DONE;
485 482
486 /* Drop any packets associated with the downed device */ 483 /* Drop any packets associated with the downed device */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 84c26dd27d81..0841aefaa503 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -120,7 +120,7 @@ static int masq_device_event(struct notifier_block *this,
120{ 120{
121 const struct net_device *dev = ptr; 121 const struct net_device *dev = ptr;
122 122
123 if (dev_net(dev) != &init_net) 123 if (!net_eq(dev_net(dev), &init_net))
124 return NOTIFY_DONE; 124 return NOTIFY_DONE;
125 125
126 if (event == NETDEV_DOWN) { 126 if (event == NETDEV_DOWN) {
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
new file mode 100644
index 000000000000..2b472ac2263a
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -0,0 +1,180 @@
1/*
2 * "security" table
3 *
4 * This is for use by Mandatory Access Control (MAC) security models,
5 * which need to be able to manage security policy in separate context
6 * to DAC.
7 *
8 * Based on iptable_mangle.c
9 *
10 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
11 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
12 * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License version 2 as
16 * published by the Free Software Foundation.
17 */
18#include <linux/module.h>
19#include <linux/netfilter_ipv4/ip_tables.h>
20#include <net/ip.h>
21
22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
24MODULE_DESCRIPTION("iptables security table, for MAC rules");
25
26#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \
27 (1 << NF_INET_FORWARD) | \
28 (1 << NF_INET_LOCAL_OUT)
29
30static struct
31{
32 struct ipt_replace repl;
33 struct ipt_standard entries[3];
34 struct ipt_error term;
35} initial_table __initdata = {
36 .repl = {
37 .name = "security",
38 .valid_hooks = SECURITY_VALID_HOOKS,
39 .num_entries = 4,
40 .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
41 .hook_entry = {
42 [NF_INET_LOCAL_IN] = 0,
43 [NF_INET_FORWARD] = sizeof(struct ipt_standard),
44 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
45 },
46 .underflow = {
47 [NF_INET_LOCAL_IN] = 0,
48 [NF_INET_FORWARD] = sizeof(struct ipt_standard),
49 [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
50 },
51 },
52 .entries = {
53 IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
54 IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
55 IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
56 },
57 .term = IPT_ERROR_INIT, /* ERROR */
58};
59
60static struct xt_table security_table = {
61 .name = "security",
62 .valid_hooks = SECURITY_VALID_HOOKS,
63 .lock = __RW_LOCK_UNLOCKED(security_table.lock),
64 .me = THIS_MODULE,
65 .af = AF_INET,
66};
67
68static unsigned int
69ipt_local_in_hook(unsigned int hook,
70 struct sk_buff *skb,
71 const struct net_device *in,
72 const struct net_device *out,
73 int (*okfn)(struct sk_buff *))
74{
75 return ipt_do_table(skb, hook, in, out,
76 nf_local_in_net(in, out)->ipv4.iptable_security);
77}
78
79static unsigned int
80ipt_forward_hook(unsigned int hook,
81 struct sk_buff *skb,
82 const struct net_device *in,
83 const struct net_device *out,
84 int (*okfn)(struct sk_buff *))
85{
86 return ipt_do_table(skb, hook, in, out,
87 nf_forward_net(in, out)->ipv4.iptable_security);
88}
89
90static unsigned int
91ipt_local_out_hook(unsigned int hook,
92 struct sk_buff *skb,
93 const struct net_device *in,
94 const struct net_device *out,
95 int (*okfn)(struct sk_buff *))
96{
97 /* Somebody is playing with raw sockets. */
98 if (skb->len < sizeof(struct iphdr)
99 || ip_hdrlen(skb) < sizeof(struct iphdr)) {
100 if (net_ratelimit())
101 printk(KERN_INFO "iptable_security: ignoring short "
102 "SOCK_RAW packet.\n");
103 return NF_ACCEPT;
104 }
105 return ipt_do_table(skb, hook, in, out,
106 nf_local_out_net(in, out)->ipv4.iptable_security);
107}
108
109static struct nf_hook_ops ipt_ops[] __read_mostly = {
110 {
111 .hook = ipt_local_in_hook,
112 .owner = THIS_MODULE,
113 .pf = PF_INET,
114 .hooknum = NF_INET_LOCAL_IN,
115 .priority = NF_IP_PRI_SECURITY,
116 },
117 {
118 .hook = ipt_forward_hook,
119 .owner = THIS_MODULE,
120 .pf = PF_INET,
121 .hooknum = NF_INET_FORWARD,
122 .priority = NF_IP_PRI_SECURITY,
123 },
124 {
125 .hook = ipt_local_out_hook,
126 .owner = THIS_MODULE,
127 .pf = PF_INET,
128 .hooknum = NF_INET_LOCAL_OUT,
129 .priority = NF_IP_PRI_SECURITY,
130 },
131};
132
133static int __net_init iptable_security_net_init(struct net *net)
134{
135 net->ipv4.iptable_security =
136 ipt_register_table(net, &security_table, &initial_table.repl);
137
138 if (IS_ERR(net->ipv4.iptable_security))
139 return PTR_ERR(net->ipv4.iptable_security);
140
141 return 0;
142}
143
144static void __net_exit iptable_security_net_exit(struct net *net)
145{
146 ipt_unregister_table(net->ipv4.iptable_security);
147}
148
149static struct pernet_operations iptable_security_net_ops = {
150 .init = iptable_security_net_init,
151 .exit = iptable_security_net_exit,
152};
153
154static int __init iptable_security_init(void)
155{
156 int ret;
157
158 ret = register_pernet_subsys(&iptable_security_net_ops);
159 if (ret < 0)
160 return ret;
161
162 ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
163 if (ret < 0)
164 goto cleanup_table;
165
166 return ret;
167
168cleanup_table:
169 unregister_pernet_subsys(&iptable_security_net_ops);
170 return ret;
171}
172
173static void __exit iptable_security_fini(void)
174{
175 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
176 unregister_pernet_subsys(&iptable_security_net_ops);
177}
178
179module_init(iptable_security_init);
180module_exit(iptable_security_fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 78ab19accace..97791048fa9b 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -87,9 +87,8 @@ static int icmp_packet(struct nf_conn *ct,
87 means this will only run once even if count hits zero twice 87 means this will only run once even if count hits zero twice
88 (theoretically possible with SMP) */ 88 (theoretically possible with SMP) */
89 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { 89 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
90 if (atomic_dec_and_test(&ct->proto.icmp.count) 90 if (atomic_dec_and_test(&ct->proto.icmp.count))
91 && del_timer(&ct->timeout)) 91 nf_ct_kill_acct(ct, ctinfo, skb);
92 ct->timeout.function((unsigned long)ct);
93 } else { 92 } else {
94 atomic_inc(&ct->proto.icmp.count); 93 atomic_inc(&ct->proto.icmp.count);
95 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); 94 nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 82e4c0e286b8..65e470bc6123 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb,
36 sctp_sctphdr_t *hdr; 36 sctp_sctphdr_t *hdr;
37 unsigned int hdroff = iphdroff + iph->ihl*4; 37 unsigned int hdroff = iphdroff + iph->ihl*4;
38 __be32 oldip, newip; 38 __be32 oldip, newip;
39 u32 crc32; 39 __be32 crc32;
40 40
41 if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) 41 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
42 return false; 42 return false;
@@ -61,7 +61,7 @@ sctp_manip_pkt(struct sk_buff *skb,
61 crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb), 61 crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb),
62 crc32); 62 crc32);
63 crc32 = sctp_end_cksum(crc32); 63 crc32 = sctp_end_cksum(crc32);
64 hdr->checksum = htonl(crc32); 64 hdr->checksum = crc32;
65 65
66 return true; 66 return true;
67} 67}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 552169b41b16..834356ea99df 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -7,8 +7,6 @@
7 * PROC file system. It is mainly used for debugging and 7 * PROC file system. It is mainly used for debugging and
8 * statistics. 8 * statistics.
9 * 9 *
10 * Version: $Id: proc.c,v 1.45 2001/05/16 16:45:35 davem Exp $
11 *
12 * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 10 * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
13 * Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de> 11 * Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
14 * Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de> 12 * Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de>
@@ -73,32 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
73 71
74static int sockstat_seq_open(struct inode *inode, struct file *file) 72static int sockstat_seq_open(struct inode *inode, struct file *file)
75{ 73{
76 int err; 74 return single_open_net(inode, file, sockstat_seq_show);
77 struct net *net;
78
79 err = -ENXIO;
80 net = get_proc_net(inode);
81 if (net == NULL)
82 goto err_net;
83
84 err = single_open(file, sockstat_seq_show, net);
85 if (err < 0)
86 goto err_open;
87
88 return 0;
89
90err_open:
91 put_net(net);
92err_net:
93 return err;
94}
95
96static int sockstat_seq_release(struct inode *inode, struct file *file)
97{
98 struct net *net = ((struct seq_file *)file->private_data)->private;
99
100 put_net(net);
101 return single_release(inode, file);
102} 75}
103 76
104static const struct file_operations sockstat_seq_fops = { 77static const struct file_operations sockstat_seq_fops = {
@@ -106,7 +79,7 @@ static const struct file_operations sockstat_seq_fops = {
106 .open = sockstat_seq_open, 79 .open = sockstat_seq_open,
107 .read = seq_read, 80 .read = seq_read,
108 .llseek = seq_lseek, 81 .llseek = seq_lseek,
109 .release = sockstat_seq_release, 82 .release = single_release_net,
110}; 83};
111 84
112/* snmp items */ 85/* snmp items */
@@ -268,11 +241,12 @@ static void icmpmsg_put(struct seq_file *seq)
268 241
269 int j, i, count; 242 int j, i, count;
270 static int out[PERLINE]; 243 static int out[PERLINE];
244 struct net *net = seq->private;
271 245
272 count = 0; 246 count = 0;
273 for (i = 0; i < ICMPMSG_MIB_MAX; i++) { 247 for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
274 248
275 if (snmp_fold_field((void **) icmpmsg_statistics, i)) 249 if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i))
276 out[count++] = i; 250 out[count++] = i;
277 if (count < PERLINE) 251 if (count < PERLINE)
278 continue; 252 continue;
@@ -284,7 +258,7 @@ static void icmpmsg_put(struct seq_file *seq)
284 seq_printf(seq, "\nIcmpMsg: "); 258 seq_printf(seq, "\nIcmpMsg: ");
285 for (j = 0; j < PERLINE; ++j) 259 for (j = 0; j < PERLINE; ++j)
286 seq_printf(seq, " %lu", 260 seq_printf(seq, " %lu",
287 snmp_fold_field((void **) icmpmsg_statistics, 261 snmp_fold_field((void **) net->mib.icmpmsg_statistics,
288 out[j])); 262 out[j]));
289 seq_putc(seq, '\n'); 263 seq_putc(seq, '\n');
290 } 264 }
@@ -296,7 +270,7 @@ static void icmpmsg_put(struct seq_file *seq)
296 seq_printf(seq, "\nIcmpMsg:"); 270 seq_printf(seq, "\nIcmpMsg:");
297 for (j = 0; j < count; ++j) 271 for (j = 0; j < count; ++j)
298 seq_printf(seq, " %lu", snmp_fold_field((void **) 272 seq_printf(seq, " %lu", snmp_fold_field((void **)
299 icmpmsg_statistics, out[j])); 273 net->mib.icmpmsg_statistics, out[j]));
300 } 274 }
301 275
302#undef PERLINE 276#undef PERLINE
@@ -305,6 +279,7 @@ static void icmpmsg_put(struct seq_file *seq)
305static void icmp_put(struct seq_file *seq) 279static void icmp_put(struct seq_file *seq)
306{ 280{
307 int i; 281 int i;
282 struct net *net = seq->private;
308 283
309 seq_puts(seq, "\nIcmp: InMsgs InErrors"); 284 seq_puts(seq, "\nIcmp: InMsgs InErrors");
310 for (i=0; icmpmibmap[i].name != NULL; i++) 285 for (i=0; icmpmibmap[i].name != NULL; i++)
@@ -313,18 +288,18 @@ static void icmp_put(struct seq_file *seq)
313 for (i=0; icmpmibmap[i].name != NULL; i++) 288 for (i=0; icmpmibmap[i].name != NULL; i++)
314 seq_printf(seq, " Out%s", icmpmibmap[i].name); 289 seq_printf(seq, " Out%s", icmpmibmap[i].name);
315 seq_printf(seq, "\nIcmp: %lu %lu", 290 seq_printf(seq, "\nIcmp: %lu %lu",
316 snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INMSGS), 291 snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
317 snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INERRORS)); 292 snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
318 for (i=0; icmpmibmap[i].name != NULL; i++) 293 for (i=0; icmpmibmap[i].name != NULL; i++)
319 seq_printf(seq, " %lu", 294 seq_printf(seq, " %lu",
320 snmp_fold_field((void **) icmpmsg_statistics, 295 snmp_fold_field((void **) net->mib.icmpmsg_statistics,
321 icmpmibmap[i].index)); 296 icmpmibmap[i].index));
322 seq_printf(seq, " %lu %lu", 297 seq_printf(seq, " %lu %lu",
323 snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTMSGS), 298 snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
324 snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTERRORS)); 299 snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
325 for (i=0; icmpmibmap[i].name != NULL; i++) 300 for (i=0; icmpmibmap[i].name != NULL; i++)
326 seq_printf(seq, " %lu", 301 seq_printf(seq, " %lu",
327 snmp_fold_field((void **) icmpmsg_statistics, 302 snmp_fold_field((void **) net->mib.icmpmsg_statistics,
328 icmpmibmap[i].index | 0x100)); 303 icmpmibmap[i].index | 0x100));
329} 304}
330 305
@@ -334,6 +309,7 @@ static void icmp_put(struct seq_file *seq)
334static int snmp_seq_show(struct seq_file *seq, void *v) 309static int snmp_seq_show(struct seq_file *seq, void *v)
335{ 310{
336 int i; 311 int i;
312 struct net *net = seq->private;
337 313
338 seq_puts(seq, "Ip: Forwarding DefaultTTL"); 314 seq_puts(seq, "Ip: Forwarding DefaultTTL");
339 315
@@ -341,12 +317,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
341 seq_printf(seq, " %s", snmp4_ipstats_list[i].name); 317 seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
342 318
343 seq_printf(seq, "\nIp: %d %d", 319 seq_printf(seq, "\nIp: %d %d",
344 IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2, 320 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
345 sysctl_ip_default_ttl); 321 sysctl_ip_default_ttl);
346 322
347 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 323 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
348 seq_printf(seq, " %lu", 324 seq_printf(seq, " %lu",
349 snmp_fold_field((void **)ip_statistics, 325 snmp_fold_field((void **)net->mib.ip_statistics,
350 snmp4_ipstats_list[i].entry)); 326 snmp4_ipstats_list[i].entry));
351 327
352 icmp_put(seq); /* RFC 2011 compatibility */ 328 icmp_put(seq); /* RFC 2011 compatibility */
@@ -361,11 +337,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
361 /* MaxConn field is signed, RFC 2012 */ 337 /* MaxConn field is signed, RFC 2012 */
362 if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) 338 if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
363 seq_printf(seq, " %ld", 339 seq_printf(seq, " %ld",
364 snmp_fold_field((void **)tcp_statistics, 340 snmp_fold_field((void **)net->mib.tcp_statistics,
365 snmp4_tcp_list[i].entry)); 341 snmp4_tcp_list[i].entry));
366 else 342 else
367 seq_printf(seq, " %lu", 343 seq_printf(seq, " %lu",
368 snmp_fold_field((void **)tcp_statistics, 344 snmp_fold_field((void **)net->mib.tcp_statistics,
369 snmp4_tcp_list[i].entry)); 345 snmp4_tcp_list[i].entry));
370 } 346 }
371 347
@@ -376,7 +352,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
376 seq_puts(seq, "\nUdp:"); 352 seq_puts(seq, "\nUdp:");
377 for (i = 0; snmp4_udp_list[i].name != NULL; i++) 353 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
378 seq_printf(seq, " %lu", 354 seq_printf(seq, " %lu",
379 snmp_fold_field((void **)udp_statistics, 355 snmp_fold_field((void **)net->mib.udp_statistics,
380 snmp4_udp_list[i].entry)); 356 snmp4_udp_list[i].entry));
381 357
382 /* the UDP and UDP-Lite MIBs are the same */ 358 /* the UDP and UDP-Lite MIBs are the same */
@@ -387,7 +363,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
387 seq_puts(seq, "\nUdpLite:"); 363 seq_puts(seq, "\nUdpLite:");
388 for (i = 0; snmp4_udp_list[i].name != NULL; i++) 364 for (i = 0; snmp4_udp_list[i].name != NULL; i++)
389 seq_printf(seq, " %lu", 365 seq_printf(seq, " %lu",
390 snmp_fold_field((void **)udplite_statistics, 366 snmp_fold_field((void **)net->mib.udplite_statistics,
391 snmp4_udp_list[i].entry)); 367 snmp4_udp_list[i].entry));
392 368
393 seq_putc(seq, '\n'); 369 seq_putc(seq, '\n');
@@ -396,7 +372,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
396 372
397static int snmp_seq_open(struct inode *inode, struct file *file) 373static int snmp_seq_open(struct inode *inode, struct file *file)
398{ 374{
399 return single_open(file, snmp_seq_show, NULL); 375 return single_open_net(inode, file, snmp_seq_show);
400} 376}
401 377
402static const struct file_operations snmp_seq_fops = { 378static const struct file_operations snmp_seq_fops = {
@@ -404,7 +380,7 @@ static const struct file_operations snmp_seq_fops = {
404 .open = snmp_seq_open, 380 .open = snmp_seq_open,
405 .read = seq_read, 381 .read = seq_read,
406 .llseek = seq_lseek, 382 .llseek = seq_lseek,
407 .release = single_release, 383 .release = single_release_net,
408}; 384};
409 385
410 386
@@ -415,6 +391,7 @@ static const struct file_operations snmp_seq_fops = {
415static int netstat_seq_show(struct seq_file *seq, void *v) 391static int netstat_seq_show(struct seq_file *seq, void *v)
416{ 392{
417 int i; 393 int i;
394 struct net *net = seq->private;
418 395
419 seq_puts(seq, "TcpExt:"); 396 seq_puts(seq, "TcpExt:");
420 for (i = 0; snmp4_net_list[i].name != NULL; i++) 397 for (i = 0; snmp4_net_list[i].name != NULL; i++)
@@ -423,7 +400,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
423 seq_puts(seq, "\nTcpExt:"); 400 seq_puts(seq, "\nTcpExt:");
424 for (i = 0; snmp4_net_list[i].name != NULL; i++) 401 for (i = 0; snmp4_net_list[i].name != NULL; i++)
425 seq_printf(seq, " %lu", 402 seq_printf(seq, " %lu",
426 snmp_fold_field((void **)net_statistics, 403 snmp_fold_field((void **)net->mib.net_statistics,
427 snmp4_net_list[i].entry)); 404 snmp4_net_list[i].entry));
428 405
429 seq_puts(seq, "\nIpExt:"); 406 seq_puts(seq, "\nIpExt:");
@@ -433,7 +410,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
433 seq_puts(seq, "\nIpExt:"); 410 seq_puts(seq, "\nIpExt:");
434 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) 411 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
435 seq_printf(seq, " %lu", 412 seq_printf(seq, " %lu",
436 snmp_fold_field((void **)ip_statistics, 413 snmp_fold_field((void **)net->mib.ip_statistics,
437 snmp4_ipextstats_list[i].entry)); 414 snmp4_ipextstats_list[i].entry));
438 415
439 seq_putc(seq, '\n'); 416 seq_putc(seq, '\n');
@@ -442,7 +419,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
442 419
443static int netstat_seq_open(struct inode *inode, struct file *file) 420static int netstat_seq_open(struct inode *inode, struct file *file)
444{ 421{
445 return single_open(file, netstat_seq_show, NULL); 422 return single_open_net(inode, file, netstat_seq_show);
446} 423}
447 424
448static const struct file_operations netstat_seq_fops = { 425static const struct file_operations netstat_seq_fops = {
@@ -450,18 +427,32 @@ static const struct file_operations netstat_seq_fops = {
450 .open = netstat_seq_open, 427 .open = netstat_seq_open,
451 .read = seq_read, 428 .read = seq_read,
452 .llseek = seq_lseek, 429 .llseek = seq_lseek,
453 .release = single_release, 430 .release = single_release_net,
454}; 431};
455 432
456static __net_init int ip_proc_init_net(struct net *net) 433static __net_init int ip_proc_init_net(struct net *net)
457{ 434{
458 if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops)) 435 if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops))
459 return -ENOMEM; 436 goto out_sockstat;
437 if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops))
438 goto out_netstat;
439 if (!proc_net_fops_create(net, "snmp", S_IRUGO, &snmp_seq_fops))
440 goto out_snmp;
441
460 return 0; 442 return 0;
443
444out_snmp:
445 proc_net_remove(net, "netstat");
446out_netstat:
447 proc_net_remove(net, "sockstat");
448out_sockstat:
449 return -ENOMEM;
461} 450}
462 451
463static __net_exit void ip_proc_exit_net(struct net *net) 452static __net_exit void ip_proc_exit_net(struct net *net)
464{ 453{
454 proc_net_remove(net, "snmp");
455 proc_net_remove(net, "netstat");
465 proc_net_remove(net, "sockstat"); 456 proc_net_remove(net, "sockstat");
466} 457}
467 458
@@ -472,24 +463,6 @@ static __net_initdata struct pernet_operations ip_proc_ops = {
472 463
473int __init ip_misc_proc_init(void) 464int __init ip_misc_proc_init(void)
474{ 465{
475 int rc = 0; 466 return register_pernet_subsys(&ip_proc_ops);
476
477 if (register_pernet_subsys(&ip_proc_ops))
478 goto out_pernet;
479
480 if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops))
481 goto out_netstat;
482
483 if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
484 goto out_snmp;
485out:
486 return rc;
487out_snmp:
488 proc_net_remove(&init_net, "netstat");
489out_netstat:
490 unregister_pernet_subsys(&ip_proc_ops);
491out_pernet:
492 rc = -ENOMEM;
493 goto out;
494} 467}
495 468
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 971ab9356e51..ea50da0649fd 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * INET protocol dispatch tables. 6 * INET protocol dispatch tables.
7 * 7 *
8 * Version: $Id: protocol.c,v 1.14 2001/05/18 02:25:49 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * 10 *
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 37a1ecd9d600..cd975743bcd2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * RAW - implementation of IP "raw" sockets. 6 * RAW - implementation of IP "raw" sockets.
7 * 7 *
8 * Version: $Id: raw.c,v 1.64 2002/02/01 22:01:04 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * 10 *
@@ -322,6 +320,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
322 unsigned int flags) 320 unsigned int flags)
323{ 321{
324 struct inet_sock *inet = inet_sk(sk); 322 struct inet_sock *inet = inet_sk(sk);
323 struct net *net = sock_net(sk);
325 struct iphdr *iph; 324 struct iphdr *iph;
326 struct sk_buff *skb; 325 struct sk_buff *skb;
327 unsigned int iphlen; 326 unsigned int iphlen;
@@ -370,7 +369,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
370 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 369 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
371 } 370 }
372 if (iph->protocol == IPPROTO_ICMP) 371 if (iph->protocol == IPPROTO_ICMP)
373 icmp_out_count(((struct icmphdr *) 372 icmp_out_count(net, ((struct icmphdr *)
374 skb_transport_header(skb))->type); 373 skb_transport_header(skb))->type);
375 374
376 err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 375 err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
@@ -386,7 +385,7 @@ error_fault:
386 err = -EFAULT; 385 err = -EFAULT;
387 kfree_skb(skb); 386 kfree_skb(skb);
388error: 387error:
389 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS); 388 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
390 return err; 389 return err;
391} 390}
392 391
@@ -608,12 +607,11 @@ static void raw_close(struct sock *sk, long timeout)
608 sk_common_release(sk); 607 sk_common_release(sk);
609} 608}
610 609
611static int raw_destroy(struct sock *sk) 610static void raw_destroy(struct sock *sk)
612{ 611{
613 lock_sock(sk); 612 lock_sock(sk);
614 ip_flush_pending_frames(sk); 613 ip_flush_pending_frames(sk);
615 release_sock(sk); 614 release_sock(sk);
616 return 0;
617} 615}
618 616
619/* This gets rid of all the nasties in af_inet. -DaveM */ 617/* This gets rid of all the nasties in af_inet. -DaveM */
@@ -947,7 +945,7 @@ static int raw_seq_show(struct seq_file *seq, void *v)
947 if (v == SEQ_START_TOKEN) 945 if (v == SEQ_START_TOKEN)
948 seq_printf(seq, " sl local_address rem_address st tx_queue " 946 seq_printf(seq, " sl local_address rem_address st tx_queue "
949 "rx_queue tr tm->when retrnsmt uid timeout " 947 "rx_queue tr tm->when retrnsmt uid timeout "
950 "inode drops\n"); 948 "inode ref pointer drops\n");
951 else 949 else
952 raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); 950 raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
953 return 0; 951 return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 96be336064fb..e4ab0ac94f92 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * ROUTE - implementation of the IP router. 6 * ROUTE - implementation of the IP router.
7 * 7 *
8 * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org> 10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
@@ -134,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
134 132
135static void rt_worker_func(struct work_struct *work); 133static void rt_worker_func(struct work_struct *work);
136static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); 134static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
137static struct timer_list rt_secret_timer;
138 135
139/* 136/*
140 * Interface to generic destination cache. 137 * Interface to generic destination cache.
@@ -253,20 +250,25 @@ static inline void rt_hash_lock_init(void)
253static struct rt_hash_bucket *rt_hash_table __read_mostly; 250static struct rt_hash_bucket *rt_hash_table __read_mostly;
254static unsigned rt_hash_mask __read_mostly; 251static unsigned rt_hash_mask __read_mostly;
255static unsigned int rt_hash_log __read_mostly; 252static unsigned int rt_hash_log __read_mostly;
256static atomic_t rt_genid __read_mostly;
257 253
258static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 254static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
259#define RT_CACHE_STAT_INC(field) \ 255#define RT_CACHE_STAT_INC(field) \
260 (__raw_get_cpu_var(rt_cache_stat).field++) 256 (__raw_get_cpu_var(rt_cache_stat).field++)
261 257
262static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx) 258static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
259 int genid)
263{ 260{
264 return jhash_3words((__force u32)(__be32)(daddr), 261 return jhash_3words((__force u32)(__be32)(daddr),
265 (__force u32)(__be32)(saddr), 262 (__force u32)(__be32)(saddr),
266 idx, atomic_read(&rt_genid)) 263 idx, genid)
267 & rt_hash_mask; 264 & rt_hash_mask;
268} 265}
269 266
267static inline int rt_genid(struct net *net)
268{
269 return atomic_read(&net->ipv4.rt_genid);
270}
271
270#ifdef CONFIG_PROC_FS 272#ifdef CONFIG_PROC_FS
271struct rt_cache_iter_state { 273struct rt_cache_iter_state {
272 struct seq_net_private p; 274 struct seq_net_private p;
@@ -336,7 +338,7 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
336 struct rt_cache_iter_state *st = seq->private; 338 struct rt_cache_iter_state *st = seq->private;
337 if (*pos) 339 if (*pos)
338 return rt_cache_get_idx(seq, *pos - 1); 340 return rt_cache_get_idx(seq, *pos - 1);
339 st->genid = atomic_read(&rt_genid); 341 st->genid = rt_genid(seq_file_net(seq));
340 return SEQ_START_TOKEN; 342 return SEQ_START_TOKEN;
341} 343}
342 344
@@ -683,6 +685,11 @@ static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
683 return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); 685 return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
684} 686}
685 687
688static inline int rt_is_expired(struct rtable *rth)
689{
690 return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev));
691}
692
686/* 693/*
687 * Perform a full scan of hash table and free all entries. 694 * Perform a full scan of hash table and free all entries.
688 * Can be called by a softirq or a process. 695 * Can be called by a softirq or a process.
@@ -692,6 +699,7 @@ static void rt_do_flush(int process_context)
692{ 699{
693 unsigned int i; 700 unsigned int i;
694 struct rtable *rth, *next; 701 struct rtable *rth, *next;
702 struct rtable * tail;
695 703
696 for (i = 0; i <= rt_hash_mask; i++) { 704 for (i = 0; i <= rt_hash_mask; i++) {
697 if (process_context && need_resched()) 705 if (process_context && need_resched())
@@ -701,11 +709,39 @@ static void rt_do_flush(int process_context)
701 continue; 709 continue;
702 710
703 spin_lock_bh(rt_hash_lock_addr(i)); 711 spin_lock_bh(rt_hash_lock_addr(i));
712#ifdef CONFIG_NET_NS
713 {
714 struct rtable ** prev, * p;
715
716 rth = rt_hash_table[i].chain;
717
718 /* defer releasing the head of the list after spin_unlock */
719 for (tail = rth; tail; tail = tail->u.dst.rt_next)
720 if (!rt_is_expired(tail))
721 break;
722 if (rth != tail)
723 rt_hash_table[i].chain = tail;
724
725 /* call rt_free on entries after the tail requiring flush */
726 prev = &rt_hash_table[i].chain;
727 for (p = *prev; p; p = next) {
728 next = p->u.dst.rt_next;
729 if (!rt_is_expired(p)) {
730 prev = &p->u.dst.rt_next;
731 } else {
732 *prev = next;
733 rt_free(p);
734 }
735 }
736 }
737#else
704 rth = rt_hash_table[i].chain; 738 rth = rt_hash_table[i].chain;
705 rt_hash_table[i].chain = NULL; 739 rt_hash_table[i].chain = NULL;
740 tail = NULL;
741#endif
706 spin_unlock_bh(rt_hash_lock_addr(i)); 742 spin_unlock_bh(rt_hash_lock_addr(i));
707 743
708 for (; rth; rth = next) { 744 for (; rth != tail; rth = next) {
709 next = rth->u.dst.rt_next; 745 next = rth->u.dst.rt_next;
710 rt_free(rth); 746 rt_free(rth);
711 } 747 }
@@ -738,7 +774,7 @@ static void rt_check_expire(void)
738 continue; 774 continue;
739 spin_lock_bh(rt_hash_lock_addr(i)); 775 spin_lock_bh(rt_hash_lock_addr(i));
740 while ((rth = *rthp) != NULL) { 776 while ((rth = *rthp) != NULL) {
741 if (rth->rt_genid != atomic_read(&rt_genid)) { 777 if (rt_is_expired(rth)) {
742 *rthp = rth->u.dst.rt_next; 778 *rthp = rth->u.dst.rt_next;
743 rt_free(rth); 779 rt_free(rth);
744 continue; 780 continue;
@@ -781,21 +817,21 @@ static void rt_worker_func(struct work_struct *work)
781 * many times (2^24) without giving recent rt_genid. 817 * many times (2^24) without giving recent rt_genid.
782 * Jenkins hash is strong enough that litle changes of rt_genid are OK. 818 * Jenkins hash is strong enough that litle changes of rt_genid are OK.
783 */ 819 */
784static void rt_cache_invalidate(void) 820static void rt_cache_invalidate(struct net *net)
785{ 821{
786 unsigned char shuffle; 822 unsigned char shuffle;
787 823
788 get_random_bytes(&shuffle, sizeof(shuffle)); 824 get_random_bytes(&shuffle, sizeof(shuffle));
789 atomic_add(shuffle + 1U, &rt_genid); 825 atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
790} 826}
791 827
792/* 828/*
793 * delay < 0 : invalidate cache (fast : entries will be deleted later) 829 * delay < 0 : invalidate cache (fast : entries will be deleted later)
794 * delay >= 0 : invalidate & flush cache (can be long) 830 * delay >= 0 : invalidate & flush cache (can be long)
795 */ 831 */
796void rt_cache_flush(int delay) 832void rt_cache_flush(struct net *net, int delay)
797{ 833{
798 rt_cache_invalidate(); 834 rt_cache_invalidate(net);
799 if (delay >= 0) 835 if (delay >= 0)
800 rt_do_flush(!in_softirq()); 836 rt_do_flush(!in_softirq());
801} 837}
@@ -803,10 +839,11 @@ void rt_cache_flush(int delay)
803/* 839/*
804 * We change rt_genid and let gc do the cleanup 840 * We change rt_genid and let gc do the cleanup
805 */ 841 */
806static void rt_secret_rebuild(unsigned long dummy) 842static void rt_secret_rebuild(unsigned long __net)
807{ 843{
808 rt_cache_invalidate(); 844 struct net *net = (struct net *)__net;
809 mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); 845 rt_cache_invalidate(net);
846 mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
810} 847}
811 848
812/* 849/*
@@ -882,7 +919,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
882 rthp = &rt_hash_table[k].chain; 919 rthp = &rt_hash_table[k].chain;
883 spin_lock_bh(rt_hash_lock_addr(k)); 920 spin_lock_bh(rt_hash_lock_addr(k));
884 while ((rth = *rthp) != NULL) { 921 while ((rth = *rthp) != NULL) {
885 if (rth->rt_genid == atomic_read(&rt_genid) && 922 if (!rt_is_expired(rth) &&
886 !rt_may_expire(rth, tmo, expire)) { 923 !rt_may_expire(rth, tmo, expire)) {
887 tmo >>= 1; 924 tmo >>= 1;
888 rthp = &rth->u.dst.rt_next; 925 rthp = &rth->u.dst.rt_next;
@@ -964,7 +1001,7 @@ restart:
964 1001
965 spin_lock_bh(rt_hash_lock_addr(hash)); 1002 spin_lock_bh(rt_hash_lock_addr(hash));
966 while ((rth = *rthp) != NULL) { 1003 while ((rth = *rthp) != NULL) {
967 if (rth->rt_genid != atomic_read(&rt_genid)) { 1004 if (rt_is_expired(rth)) {
968 *rthp = rth->u.dst.rt_next; 1005 *rthp = rth->u.dst.rt_next;
969 rt_free(rth); 1006 rt_free(rth);
970 continue; 1007 continue;
@@ -1140,7 +1177,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
1140 spin_lock_bh(rt_hash_lock_addr(hash)); 1177 spin_lock_bh(rt_hash_lock_addr(hash));
1141 ip_rt_put(rt); 1178 ip_rt_put(rt);
1142 while ((aux = *rthp) != NULL) { 1179 while ((aux = *rthp) != NULL) {
1143 if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { 1180 if (aux == rt || rt_is_expired(aux)) {
1144 *rthp = aux->u.dst.rt_next; 1181 *rthp = aux->u.dst.rt_next;
1145 rt_free(aux); 1182 rt_free(aux);
1146 continue; 1183 continue;
@@ -1182,7 +1219,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1182 1219
1183 for (i = 0; i < 2; i++) { 1220 for (i = 0; i < 2; i++) {
1184 for (k = 0; k < 2; k++) { 1221 for (k = 0; k < 2; k++) {
1185 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); 1222 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1223 rt_genid(net));
1186 1224
1187 rthp=&rt_hash_table[hash].chain; 1225 rthp=&rt_hash_table[hash].chain;
1188 1226
@@ -1194,7 +1232,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1194 rth->fl.fl4_src != skeys[i] || 1232 rth->fl.fl4_src != skeys[i] ||
1195 rth->fl.oif != ikeys[k] || 1233 rth->fl.oif != ikeys[k] ||
1196 rth->fl.iif != 0 || 1234 rth->fl.iif != 0 ||
1197 rth->rt_genid != atomic_read(&rt_genid) || 1235 rt_is_expired(rth) ||
1198 !net_eq(dev_net(rth->u.dst.dev), net)) { 1236 !net_eq(dev_net(rth->u.dst.dev), net)) {
1199 rthp = &rth->u.dst.rt_next; 1237 rthp = &rth->u.dst.rt_next;
1200 continue; 1238 continue;
@@ -1233,7 +1271,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1233 rt->u.dst.neighbour = NULL; 1271 rt->u.dst.neighbour = NULL;
1234 rt->u.dst.hh = NULL; 1272 rt->u.dst.hh = NULL;
1235 rt->u.dst.xfrm = NULL; 1273 rt->u.dst.xfrm = NULL;
1236 rt->rt_genid = atomic_read(&rt_genid); 1274 rt->rt_genid = rt_genid(net);
1237 rt->rt_flags |= RTCF_REDIRECTED; 1275 rt->rt_flags |= RTCF_REDIRECTED;
1238 1276
1239 /* Gateway is different ... */ 1277 /* Gateway is different ... */
@@ -1297,7 +1335,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1297 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1335 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1298 rt->u.dst.expires) { 1336 rt->u.dst.expires) {
1299 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1337 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1300 rt->fl.oif); 1338 rt->fl.oif,
1339 rt_genid(dev_net(dst->dev)));
1301#if RT_CACHE_DEBUG >= 1 1340#if RT_CACHE_DEBUG >= 1
1302 printk(KERN_DEBUG "ipv4_negative_advice: redirect to " 1341 printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
1303 NIPQUAD_FMT "/%02x dropped\n", 1342 NIPQUAD_FMT "/%02x dropped\n",
@@ -1390,7 +1429,8 @@ static int ip_error(struct sk_buff *skb)
1390 break; 1429 break;
1391 case ENETUNREACH: 1430 case ENETUNREACH:
1392 code = ICMP_NET_UNREACH; 1431 code = ICMP_NET_UNREACH;
1393 IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); 1432 IP_INC_STATS_BH(dev_net(rt->u.dst.dev),
1433 IPSTATS_MIB_INNOROUTES);
1394 break; 1434 break;
1395 case EACCES: 1435 case EACCES:
1396 code = ICMP_PKT_FILTERED; 1436 code = ICMP_PKT_FILTERED;
@@ -1446,7 +1486,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1446 1486
1447 for (k = 0; k < 2; k++) { 1487 for (k = 0; k < 2; k++) {
1448 for (i = 0; i < 2; i++) { 1488 for (i = 0; i < 2; i++) {
1449 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); 1489 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1490 rt_genid(net));
1450 1491
1451 rcu_read_lock(); 1492 rcu_read_lock();
1452 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1493 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1461,7 +1502,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1461 rth->fl.iif != 0 || 1502 rth->fl.iif != 0 ||
1462 dst_metric_locked(&rth->u.dst, RTAX_MTU) || 1503 dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
1463 !net_eq(dev_net(rth->u.dst.dev), net) || 1504 !net_eq(dev_net(rth->u.dst.dev), net) ||
1464 rth->rt_genid != atomic_read(&rt_genid)) 1505 !rt_is_expired(rth))
1465 continue; 1506 continue;
1466 1507
1467 if (new_mtu < 68 || new_mtu >= old_mtu) { 1508 if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -1696,7 +1737,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1696 rth->fl.oif = 0; 1737 rth->fl.oif = 0;
1697 rth->rt_gateway = daddr; 1738 rth->rt_gateway = daddr;
1698 rth->rt_spec_dst= spec_dst; 1739 rth->rt_spec_dst= spec_dst;
1699 rth->rt_genid = atomic_read(&rt_genid); 1740 rth->rt_genid = rt_genid(dev_net(dev));
1700 rth->rt_flags = RTCF_MULTICAST; 1741 rth->rt_flags = RTCF_MULTICAST;
1701 rth->rt_type = RTN_MULTICAST; 1742 rth->rt_type = RTN_MULTICAST;
1702 if (our) { 1743 if (our) {
@@ -1711,7 +1752,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1711 RT_CACHE_STAT_INC(in_slow_mc); 1752 RT_CACHE_STAT_INC(in_slow_mc);
1712 1753
1713 in_dev_put(in_dev); 1754 in_dev_put(in_dev);
1714 hash = rt_hash(daddr, saddr, dev->ifindex); 1755 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1715 return rt_intern_hash(hash, rth, &skb->rtable); 1756 return rt_intern_hash(hash, rth, &skb->rtable);
1716 1757
1717e_nobufs: 1758e_nobufs:
@@ -1837,7 +1878,7 @@ static int __mkroute_input(struct sk_buff *skb,
1837 1878
1838 rth->u.dst.input = ip_forward; 1879 rth->u.dst.input = ip_forward;
1839 rth->u.dst.output = ip_output; 1880 rth->u.dst.output = ip_output;
1840 rth->rt_genid = atomic_read(&rt_genid); 1881 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
1841 1882
1842 rt_set_nexthop(rth, res, itag); 1883 rt_set_nexthop(rth, res, itag);
1843 1884
@@ -1872,7 +1913,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
1872 return err; 1913 return err;
1873 1914
1874 /* put it into the cache */ 1915 /* put it into the cache */
1875 hash = rt_hash(daddr, saddr, fl->iif); 1916 hash = rt_hash(daddr, saddr, fl->iif,
1917 rt_genid(dev_net(rth->u.dst.dev)));
1876 return rt_intern_hash(hash, rth, &skb->rtable); 1918 return rt_intern_hash(hash, rth, &skb->rtable);
1877} 1919}
1878 1920
@@ -1998,7 +2040,7 @@ local_input:
1998 goto e_nobufs; 2040 goto e_nobufs;
1999 2041
2000 rth->u.dst.output= ip_rt_bug; 2042 rth->u.dst.output= ip_rt_bug;
2001 rth->rt_genid = atomic_read(&rt_genid); 2043 rth->rt_genid = rt_genid(net);
2002 2044
2003 atomic_set(&rth->u.dst.__refcnt, 1); 2045 atomic_set(&rth->u.dst.__refcnt, 1);
2004 rth->u.dst.flags= DST_HOST; 2046 rth->u.dst.flags= DST_HOST;
@@ -2028,7 +2070,7 @@ local_input:
2028 rth->rt_flags &= ~RTCF_LOCAL; 2070 rth->rt_flags &= ~RTCF_LOCAL;
2029 } 2071 }
2030 rth->rt_type = res.type; 2072 rth->rt_type = res.type;
2031 hash = rt_hash(daddr, saddr, fl.iif); 2073 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2032 err = rt_intern_hash(hash, rth, &skb->rtable); 2074 err = rt_intern_hash(hash, rth, &skb->rtable);
2033 goto done; 2075 goto done;
2034 2076
@@ -2079,7 +2121,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2079 2121
2080 net = dev_net(dev); 2122 net = dev_net(dev);
2081 tos &= IPTOS_RT_MASK; 2123 tos &= IPTOS_RT_MASK;
2082 hash = rt_hash(daddr, saddr, iif); 2124 hash = rt_hash(daddr, saddr, iif, rt_genid(net));
2083 2125
2084 rcu_read_lock(); 2126 rcu_read_lock();
2085 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2127 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2091,7 +2133,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2091 (rth->fl.fl4_tos ^ tos)) == 0 && 2133 (rth->fl.fl4_tos ^ tos)) == 0 &&
2092 rth->fl.mark == skb->mark && 2134 rth->fl.mark == skb->mark &&
2093 net_eq(dev_net(rth->u.dst.dev), net) && 2135 net_eq(dev_net(rth->u.dst.dev), net) &&
2094 rth->rt_genid == atomic_read(&rt_genid)) { 2136 !rt_is_expired(rth)) {
2095 dst_use(&rth->u.dst, jiffies); 2137 dst_use(&rth->u.dst, jiffies);
2096 RT_CACHE_STAT_INC(in_hit); 2138 RT_CACHE_STAT_INC(in_hit);
2097 rcu_read_unlock(); 2139 rcu_read_unlock();
@@ -2219,7 +2261,7 @@ static int __mkroute_output(struct rtable **result,
2219 rth->rt_spec_dst= fl->fl4_src; 2261 rth->rt_spec_dst= fl->fl4_src;
2220 2262
2221 rth->u.dst.output=ip_output; 2263 rth->u.dst.output=ip_output;
2222 rth->rt_genid = atomic_read(&rt_genid); 2264 rth->rt_genid = rt_genid(dev_net(dev_out));
2223 2265
2224 RT_CACHE_STAT_INC(out_slow_tot); 2266 RT_CACHE_STAT_INC(out_slow_tot);
2225 2267
@@ -2268,7 +2310,8 @@ static int ip_mkroute_output(struct rtable **rp,
2268 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2310 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2269 unsigned hash; 2311 unsigned hash;
2270 if (err == 0) { 2312 if (err == 0) {
2271 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); 2313 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
2314 rt_genid(dev_net(dev_out)));
2272 err = rt_intern_hash(hash, rth, rp); 2315 err = rt_intern_hash(hash, rth, rp);
2273 } 2316 }
2274 2317
@@ -2480,7 +2523,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2480 unsigned hash; 2523 unsigned hash;
2481 struct rtable *rth; 2524 struct rtable *rth;
2482 2525
2483 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); 2526 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
2484 2527
2485 rcu_read_lock_bh(); 2528 rcu_read_lock_bh();
2486 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2529 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2493,7 +2536,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2493 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2536 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2494 (IPTOS_RT_MASK | RTO_ONLINK)) && 2537 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2495 net_eq(dev_net(rth->u.dst.dev), net) && 2538 net_eq(dev_net(rth->u.dst.dev), net) &&
2496 rth->rt_genid == atomic_read(&rt_genid)) { 2539 !rt_is_expired(rth)) {
2497 dst_use(&rth->u.dst, jiffies); 2540 dst_use(&rth->u.dst, jiffies);
2498 RT_CACHE_STAT_INC(out_hit); 2541 RT_CACHE_STAT_INC(out_hit);
2499 rcu_read_unlock_bh(); 2542 rcu_read_unlock_bh();
@@ -2524,7 +2567,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2524}; 2567};
2525 2568
2526 2569
2527static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) 2570static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
2528{ 2571{
2529 struct rtable *ort = *rp; 2572 struct rtable *ort = *rp;
2530 struct rtable *rt = (struct rtable *) 2573 struct rtable *rt = (struct rtable *)
@@ -2548,7 +2591,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
2548 rt->idev = ort->idev; 2591 rt->idev = ort->idev;
2549 if (rt->idev) 2592 if (rt->idev)
2550 in_dev_hold(rt->idev); 2593 in_dev_hold(rt->idev);
2551 rt->rt_genid = atomic_read(&rt_genid); 2594 rt->rt_genid = rt_genid(net);
2552 rt->rt_flags = ort->rt_flags; 2595 rt->rt_flags = ort->rt_flags;
2553 rt->rt_type = ort->rt_type; 2596 rt->rt_type = ort->rt_type;
2554 rt->rt_dst = ort->rt_dst; 2597 rt->rt_dst = ort->rt_dst;
@@ -2584,7 +2627,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2584 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, 2627 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
2585 flags ? XFRM_LOOKUP_WAIT : 0); 2628 flags ? XFRM_LOOKUP_WAIT : 0);
2586 if (err == -EREMOTE) 2629 if (err == -EREMOTE)
2587 err = ipv4_dst_blackhole(rp, flp); 2630 err = ipv4_dst_blackhole(net, rp, flp);
2588 2631
2589 return err; 2632 return err;
2590 } 2633 }
@@ -2803,7 +2846,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2803 rt = rcu_dereference(rt->u.dst.rt_next), idx++) { 2846 rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
2804 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) 2847 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
2805 continue; 2848 continue;
2806 if (rt->rt_genid != atomic_read(&rt_genid)) 2849 if (rt_is_expired(rt))
2807 continue; 2850 continue;
2808 skb->dst = dst_clone(&rt->u.dst); 2851 skb->dst = dst_clone(&rt->u.dst);
2809 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2852 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
@@ -2827,19 +2870,25 @@ done:
2827 2870
2828void ip_rt_multicast_event(struct in_device *in_dev) 2871void ip_rt_multicast_event(struct in_device *in_dev)
2829{ 2872{
2830 rt_cache_flush(0); 2873 rt_cache_flush(dev_net(in_dev->dev), 0);
2831} 2874}
2832 2875
2833#ifdef CONFIG_SYSCTL 2876#ifdef CONFIG_SYSCTL
2834static int flush_delay; 2877static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
2835
2836static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
2837 struct file *filp, void __user *buffer, 2878 struct file *filp, void __user *buffer,
2838 size_t *lenp, loff_t *ppos) 2879 size_t *lenp, loff_t *ppos)
2839{ 2880{
2840 if (write) { 2881 if (write) {
2841 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2882 int flush_delay;
2842 rt_cache_flush(flush_delay); 2883 ctl_table ctl;
2884 struct net *net;
2885
2886 memcpy(&ctl, __ctl, sizeof(ctl));
2887 ctl.data = &flush_delay;
2888 proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
2889
2890 net = (struct net *)__ctl->extra1;
2891 rt_cache_flush(net, flush_delay);
2843 return 0; 2892 return 0;
2844 } 2893 }
2845 2894
@@ -2855,25 +2904,18 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
2855 size_t newlen) 2904 size_t newlen)
2856{ 2905{
2857 int delay; 2906 int delay;
2907 struct net *net;
2858 if (newlen != sizeof(int)) 2908 if (newlen != sizeof(int))
2859 return -EINVAL; 2909 return -EINVAL;
2860 if (get_user(delay, (int __user *)newval)) 2910 if (get_user(delay, (int __user *)newval))
2861 return -EFAULT; 2911 return -EFAULT;
2862 rt_cache_flush(delay); 2912 net = (struct net *)table->extra1;
2913 rt_cache_flush(net, delay);
2863 return 0; 2914 return 0;
2864} 2915}
2865 2916
2866ctl_table ipv4_route_table[] = { 2917ctl_table ipv4_route_table[] = {
2867 { 2918 {
2868 .ctl_name = NET_IPV4_ROUTE_FLUSH,
2869 .procname = "flush",
2870 .data = &flush_delay,
2871 .maxlen = sizeof(int),
2872 .mode = 0200,
2873 .proc_handler = &ipv4_sysctl_rtcache_flush,
2874 .strategy = &ipv4_sysctl_rtcache_flush_strategy,
2875 },
2876 {
2877 .ctl_name = NET_IPV4_ROUTE_GC_THRESH, 2919 .ctl_name = NET_IPV4_ROUTE_GC_THRESH,
2878 .procname = "gc_thresh", 2920 .procname = "gc_thresh",
2879 .data = &ipv4_dst_ops.gc_thresh, 2921 .data = &ipv4_dst_ops.gc_thresh,
@@ -3011,8 +3053,97 @@ ctl_table ipv4_route_table[] = {
3011 }, 3053 },
3012 { .ctl_name = 0 } 3054 { .ctl_name = 0 }
3013}; 3055};
3056
3057static __net_initdata struct ctl_path ipv4_route_path[] = {
3058 { .procname = "net", .ctl_name = CTL_NET, },
3059 { .procname = "ipv4", .ctl_name = NET_IPV4, },
3060 { .procname = "route", .ctl_name = NET_IPV4_ROUTE, },
3061 { },
3062};
3063
3064
3065static struct ctl_table ipv4_route_flush_table[] = {
3066 {
3067 .ctl_name = NET_IPV4_ROUTE_FLUSH,
3068 .procname = "flush",
3069 .maxlen = sizeof(int),
3070 .mode = 0200,
3071 .proc_handler = &ipv4_sysctl_rtcache_flush,
3072 .strategy = &ipv4_sysctl_rtcache_flush_strategy,
3073 },
3074 { .ctl_name = 0 },
3075};
3076
3077static __net_init int sysctl_route_net_init(struct net *net)
3078{
3079 struct ctl_table *tbl;
3080
3081 tbl = ipv4_route_flush_table;
3082 if (net != &init_net) {
3083 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
3084 if (tbl == NULL)
3085 goto err_dup;
3086 }
3087 tbl[0].extra1 = net;
3088
3089 net->ipv4.route_hdr =
3090 register_net_sysctl_table(net, ipv4_route_path, tbl);
3091 if (net->ipv4.route_hdr == NULL)
3092 goto err_reg;
3093 return 0;
3094
3095err_reg:
3096 if (tbl != ipv4_route_flush_table)
3097 kfree(tbl);
3098err_dup:
3099 return -ENOMEM;
3100}
3101
3102static __net_exit void sysctl_route_net_exit(struct net *net)
3103{
3104 struct ctl_table *tbl;
3105
3106 tbl = net->ipv4.route_hdr->ctl_table_arg;
3107 unregister_net_sysctl_table(net->ipv4.route_hdr);
3108 BUG_ON(tbl == ipv4_route_flush_table);
3109 kfree(tbl);
3110}
3111
3112static __net_initdata struct pernet_operations sysctl_route_ops = {
3113 .init = sysctl_route_net_init,
3114 .exit = sysctl_route_net_exit,
3115};
3014#endif 3116#endif
3015 3117
3118
3119static __net_init int rt_secret_timer_init(struct net *net)
3120{
3121 atomic_set(&net->ipv4.rt_genid,
3122 (int) ((num_physpages ^ (num_physpages>>8)) ^
3123 (jiffies ^ (jiffies >> 7))));
3124
3125 net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
3126 net->ipv4.rt_secret_timer.data = (unsigned long)net;
3127 init_timer_deferrable(&net->ipv4.rt_secret_timer);
3128
3129 net->ipv4.rt_secret_timer.expires =
3130 jiffies + net_random() % ip_rt_secret_interval +
3131 ip_rt_secret_interval;
3132 add_timer(&net->ipv4.rt_secret_timer);
3133 return 0;
3134}
3135
3136static __net_exit void rt_secret_timer_exit(struct net *net)
3137{
3138 del_timer_sync(&net->ipv4.rt_secret_timer);
3139}
3140
3141static __net_initdata struct pernet_operations rt_secret_timer_ops = {
3142 .init = rt_secret_timer_init,
3143 .exit = rt_secret_timer_exit,
3144};
3145
3146
3016#ifdef CONFIG_NET_CLS_ROUTE 3147#ifdef CONFIG_NET_CLS_ROUTE
3017struct ip_rt_acct *ip_rt_acct __read_mostly; 3148struct ip_rt_acct *ip_rt_acct __read_mostly;
3018#endif /* CONFIG_NET_CLS_ROUTE */ 3149#endif /* CONFIG_NET_CLS_ROUTE */
@@ -3031,9 +3162,6 @@ int __init ip_rt_init(void)
3031{ 3162{
3032 int rc = 0; 3163 int rc = 0;
3033 3164
3034 atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^
3035 (jiffies ^ (jiffies >> 7))));
3036
3037#ifdef CONFIG_NET_CLS_ROUTE 3165#ifdef CONFIG_NET_CLS_ROUTE
3038 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); 3166 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
3039 if (!ip_rt_acct) 3167 if (!ip_rt_acct)
@@ -3065,19 +3193,14 @@ int __init ip_rt_init(void)
3065 devinet_init(); 3193 devinet_init();
3066 ip_fib_init(); 3194 ip_fib_init();
3067 3195
3068 rt_secret_timer.function = rt_secret_rebuild;
3069 rt_secret_timer.data = 0;
3070 init_timer_deferrable(&rt_secret_timer);
3071
3072 /* All the timers, started at system startup tend 3196 /* All the timers, started at system startup tend
3073 to synchronize. Perturb it a bit. 3197 to synchronize. Perturb it a bit.
3074 */ 3198 */
3075 schedule_delayed_work(&expires_work, 3199 schedule_delayed_work(&expires_work,
3076 net_random() % ip_rt_gc_interval + ip_rt_gc_interval); 3200 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3077 3201
3078 rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + 3202 if (register_pernet_subsys(&rt_secret_timer_ops))
3079 ip_rt_secret_interval; 3203 printk(KERN_ERR "Unable to setup rt_secret_timer\n");
3080 add_timer(&rt_secret_timer);
3081 3204
3082 if (ip_rt_proc_init()) 3205 if (ip_rt_proc_init())
3083 printk(KERN_ERR "Unable to create route proc files\n"); 3206 printk(KERN_ERR "Unable to create route proc files\n");
@@ -3087,6 +3210,9 @@ int __init ip_rt_init(void)
3087#endif 3210#endif
3088 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); 3211 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
3089 3212
3213#ifdef CONFIG_SYSCTL
3214 register_pernet_subsys(&sysctl_route_ops);
3215#endif
3090 return rc; 3216 return rc;
3091} 3217}
3092 3218
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d182a2a26291..51bc24d3b8a7 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -8,8 +8,6 @@
8 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version. 10 * 2 of the License, or (at your option) any later version.
11 *
12 * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
13 */ 11 */
14 12
15#include <linux/tcp.h> 13#include <linux/tcp.h>
@@ -175,7 +173,7 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
175 ; 173 ;
176 *mssp = msstab[mssind] + 1; 174 *mssp = msstab[mssind] + 1;
177 175
178 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); 176 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
179 177
180 return secure_tcp_syn_cookie(iph->saddr, iph->daddr, 178 return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
181 th->source, th->dest, ntohl(th->seq), 179 th->source, th->dest, ntohl(th->seq),
@@ -271,11 +269,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
271 269
272 if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || 270 if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
273 (mss = cookie_check(skb, cookie)) == 0) { 271 (mss = cookie_check(skb, cookie)) == 0) {
274 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED); 272 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
275 goto out; 273 goto out;
276 } 274 }
277 275
278 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); 276 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
279 277
280 /* check for timestamp cookie support */ 278 /* check for timestamp cookie support */
281 memset(&tcp_opt, 0, sizeof(tcp_opt)); 279 memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c437f804ee38..14ef202a2254 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. 2 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
3 * 3 *
4 * $Id: sysctl_net_ipv4.c,v 1.50 2001/10/20 00:00:11 davem Exp $
5 *
6 * Begun April 1, 1996, Mike Shaver. 4 * Begun April 1, 1996, Mike Shaver.
7 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS] 5 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
8 */ 6 */
@@ -795,7 +793,8 @@ static struct ctl_table ipv4_net_table[] = {
795 .data = &init_net.ipv4.sysctl_icmp_ratelimit, 793 .data = &init_net.ipv4.sysctl_icmp_ratelimit,
796 .maxlen = sizeof(int), 794 .maxlen = sizeof(int),
797 .mode = 0644, 795 .mode = 0644,
798 .proc_handler = &proc_dointvec 796 .proc_handler = &proc_dointvec_ms_jiffies,
797 .strategy = &sysctl_ms_jiffies
799 }, 798 },
800 { 799 {
801 .ctl_name = NET_IPV4_ICMP_RATEMASK, 800 .ctl_name = NET_IPV4_ICMP_RATEMASK,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1d723de18686..0b491bf03db4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Implementation of the Transmission Control Protocol(TCP). 6 * Implementation of the Transmission Control Protocol(TCP).
7 * 7 *
8 * Version: $Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 10 * Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -279,8 +277,6 @@
279 277
280int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; 278int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
281 279
282DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
283
284atomic_t tcp_orphan_count = ATOMIC_INIT(0); 280atomic_t tcp_orphan_count = ATOMIC_INIT(0);
285 281
286EXPORT_SYMBOL_GPL(tcp_orphan_count); 282EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -318,10 +314,10 @@ int tcp_memory_pressure __read_mostly;
318 314
319EXPORT_SYMBOL(tcp_memory_pressure); 315EXPORT_SYMBOL(tcp_memory_pressure);
320 316
321void tcp_enter_memory_pressure(void) 317void tcp_enter_memory_pressure(struct sock *sk)
322{ 318{
323 if (!tcp_memory_pressure) { 319 if (!tcp_memory_pressure) {
324 NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES); 320 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
325 tcp_memory_pressure = 1; 321 tcp_memory_pressure = 1;
326 } 322 }
327} 323}
@@ -346,8 +342,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
346 return inet_csk_listen_poll(sk); 342 return inet_csk_listen_poll(sk);
347 343
348 /* Socket is not locked. We are protected from async events 344 /* Socket is not locked. We are protected from async events
349 by poll logic and correct handling of state changes 345 * by poll logic and correct handling of state changes
350 made by another threads is impossible in any case. 346 * made by other threads is impossible in any case.
351 */ 347 */
352 348
353 mask = 0; 349 mask = 0;
@@ -373,10 +369,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
373 * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP 369 * in state CLOSE_WAIT. One solution is evident --- to set POLLHUP
374 * if and only if shutdown has been made in both directions. 370 * if and only if shutdown has been made in both directions.
375 * Actually, it is interesting to look how Solaris and DUX 371 * Actually, it is interesting to look how Solaris and DUX
376 * solve this dilemma. I would prefer, if PULLHUP were maskable, 372 * solve this dilemma. I would prefer, if POLLHUP were maskable,
377 * then we could set it on SND_SHUTDOWN. BTW examples given 373 * then we could set it on SND_SHUTDOWN. BTW examples given
378 * in Stevens' books assume exactly this behaviour, it explains 374 * in Stevens' books assume exactly this behaviour, it explains
379 * why PULLHUP is incompatible with POLLOUT. --ANK 375 * why POLLHUP is incompatible with POLLOUT. --ANK
380 * 376 *
381 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent 377 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
382 * blocking on fresh not-connected or disconnected socket. --ANK 378 * blocking on fresh not-connected or disconnected socket. --ANK
@@ -651,7 +647,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
651 } 647 }
652 __kfree_skb(skb); 648 __kfree_skb(skb);
653 } else { 649 } else {
654 sk->sk_prot->enter_memory_pressure(); 650 sk->sk_prot->enter_memory_pressure(sk);
655 sk_stream_moderate_sndbuf(sk); 651 sk_stream_moderate_sndbuf(sk);
656 } 652 }
657 return NULL; 653 return NULL;
@@ -1155,7 +1151,7 @@ static void tcp_prequeue_process(struct sock *sk)
1155 struct sk_buff *skb; 1151 struct sk_buff *skb;
1156 struct tcp_sock *tp = tcp_sk(sk); 1152 struct tcp_sock *tp = tcp_sk(sk);
1157 1153
1158 NET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED); 1154 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1159 1155
1160 /* RX process wants to run with disabled BHs, though it is not 1156 /* RX process wants to run with disabled BHs, though it is not
1161 * necessary */ 1157 * necessary */
@@ -1477,7 +1473,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1477 /* __ Restore normal policy in scheduler __ */ 1473 /* __ Restore normal policy in scheduler __ */
1478 1474
1479 if ((chunk = len - tp->ucopy.len) != 0) { 1475 if ((chunk = len - tp->ucopy.len) != 0) {
1480 NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); 1476 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1481 len -= chunk; 1477 len -= chunk;
1482 copied += chunk; 1478 copied += chunk;
1483 } 1479 }
@@ -1488,7 +1484,7 @@ do_prequeue:
1488 tcp_prequeue_process(sk); 1484 tcp_prequeue_process(sk);
1489 1485
1490 if ((chunk = len - tp->ucopy.len) != 0) { 1486 if ((chunk = len - tp->ucopy.len) != 0) {
1491 NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); 1487 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1492 len -= chunk; 1488 len -= chunk;
1493 copied += chunk; 1489 copied += chunk;
1494 } 1490 }
@@ -1603,7 +1599,7 @@ skip_copy:
1603 tcp_prequeue_process(sk); 1599 tcp_prequeue_process(sk);
1604 1600
1605 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { 1601 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1606 NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); 1602 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1607 len -= chunk; 1603 len -= chunk;
1608 copied += chunk; 1604 copied += chunk;
1609 } 1605 }
@@ -1670,12 +1666,12 @@ void tcp_set_state(struct sock *sk, int state)
1670 switch (state) { 1666 switch (state) {
1671 case TCP_ESTABLISHED: 1667 case TCP_ESTABLISHED:
1672 if (oldstate != TCP_ESTABLISHED) 1668 if (oldstate != TCP_ESTABLISHED)
1673 TCP_INC_STATS(TCP_MIB_CURRESTAB); 1669 TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1674 break; 1670 break;
1675 1671
1676 case TCP_CLOSE: 1672 case TCP_CLOSE:
1677 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) 1673 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1678 TCP_INC_STATS(TCP_MIB_ESTABRESETS); 1674 TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
1679 1675
1680 sk->sk_prot->unhash(sk); 1676 sk->sk_prot->unhash(sk);
1681 if (inet_csk(sk)->icsk_bind_hash && 1677 if (inet_csk(sk)->icsk_bind_hash &&
@@ -1684,7 +1680,7 @@ void tcp_set_state(struct sock *sk, int state)
1684 /* fall through */ 1680 /* fall through */
1685 default: 1681 default:
1686 if (oldstate==TCP_ESTABLISHED) 1682 if (oldstate==TCP_ESTABLISHED)
1687 TCP_DEC_STATS(TCP_MIB_CURRESTAB); 1683 TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1688 } 1684 }
1689 1685
1690 /* Change state AFTER socket is unhashed to avoid closed 1686 /* Change state AFTER socket is unhashed to avoid closed
@@ -1795,13 +1791,13 @@ void tcp_close(struct sock *sk, long timeout)
1795 */ 1791 */
1796 if (data_was_unread) { 1792 if (data_was_unread) {
1797 /* Unread data was tossed, zap the connection. */ 1793 /* Unread data was tossed, zap the connection. */
1798 NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE); 1794 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
1799 tcp_set_state(sk, TCP_CLOSE); 1795 tcp_set_state(sk, TCP_CLOSE);
1800 tcp_send_active_reset(sk, GFP_KERNEL); 1796 tcp_send_active_reset(sk, GFP_KERNEL);
1801 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 1797 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1802 /* Check zero linger _after_ checking for unread data. */ 1798 /* Check zero linger _after_ checking for unread data. */
1803 sk->sk_prot->disconnect(sk, 0); 1799 sk->sk_prot->disconnect(sk, 0);
1804 NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA); 1800 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
1805 } else if (tcp_close_state(sk)) { 1801 } else if (tcp_close_state(sk)) {
1806 /* We FIN if the application ate all the data before 1802 /* We FIN if the application ate all the data before
1807 * zapping the connection. 1803 * zapping the connection.
@@ -1873,7 +1869,8 @@ adjudge_to_death:
1873 if (tp->linger2 < 0) { 1869 if (tp->linger2 < 0) {
1874 tcp_set_state(sk, TCP_CLOSE); 1870 tcp_set_state(sk, TCP_CLOSE);
1875 tcp_send_active_reset(sk, GFP_ATOMIC); 1871 tcp_send_active_reset(sk, GFP_ATOMIC);
1876 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); 1872 NET_INC_STATS_BH(sock_net(sk),
1873 LINUX_MIB_TCPABORTONLINGER);
1877 } else { 1874 } else {
1878 const int tmo = tcp_fin_time(sk); 1875 const int tmo = tcp_fin_time(sk);
1879 1876
@@ -1895,7 +1892,8 @@ adjudge_to_death:
1895 "sockets\n"); 1892 "sockets\n");
1896 tcp_set_state(sk, TCP_CLOSE); 1893 tcp_set_state(sk, TCP_CLOSE);
1897 tcp_send_active_reset(sk, GFP_ATOMIC); 1894 tcp_send_active_reset(sk, GFP_ATOMIC);
1898 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); 1895 NET_INC_STATS_BH(sock_net(sk),
1896 LINUX_MIB_TCPABORTONMEMORY);
1899 } 1897 }
1900 } 1898 }
1901 1899
@@ -2590,12 +2588,69 @@ void __tcp_put_md5sig_pool(void)
2590} 2588}
2591 2589
2592EXPORT_SYMBOL(__tcp_put_md5sig_pool); 2590EXPORT_SYMBOL(__tcp_put_md5sig_pool);
2591
2592int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
2593 struct tcphdr *th)
2594{
2595 struct scatterlist sg;
2596 int err;
2597
2598 __sum16 old_checksum = th->check;
2599 th->check = 0;
2600 /* options aren't included in the hash */
2601 sg_init_one(&sg, th, sizeof(struct tcphdr));
2602 err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr));
2603 th->check = old_checksum;
2604 return err;
2605}
2606
2607EXPORT_SYMBOL(tcp_md5_hash_header);
2608
2609int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
2610 struct sk_buff *skb, unsigned header_len)
2611{
2612 struct scatterlist sg;
2613 const struct tcphdr *tp = tcp_hdr(skb);
2614 struct hash_desc *desc = &hp->md5_desc;
2615 unsigned i;
2616 const unsigned head_data_len = skb_headlen(skb) > header_len ?
2617 skb_headlen(skb) - header_len : 0;
2618 const struct skb_shared_info *shi = skb_shinfo(skb);
2619
2620 sg_init_table(&sg, 1);
2621
2622 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
2623 if (crypto_hash_update(desc, &sg, head_data_len))
2624 return 1;
2625
2626 for (i = 0; i < shi->nr_frags; ++i) {
2627 const struct skb_frag_struct *f = &shi->frags[i];
2628 sg_set_page(&sg, f->page, f->size, f->page_offset);
2629 if (crypto_hash_update(desc, &sg, f->size))
2630 return 1;
2631 }
2632
2633 return 0;
2634}
2635
2636EXPORT_SYMBOL(tcp_md5_hash_skb_data);
2637
2638int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
2639{
2640 struct scatterlist sg;
2641
2642 sg_init_one(&sg, key->key, key->keylen);
2643 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
2644}
2645
2646EXPORT_SYMBOL(tcp_md5_hash_key);
2647
2593#endif 2648#endif
2594 2649
2595void tcp_done(struct sock *sk) 2650void tcp_done(struct sock *sk)
2596{ 2651{
2597 if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) 2652 if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
2598 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 2653 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
2599 2654
2600 tcp_set_state(sk, TCP_CLOSE); 2655 tcp_set_state(sk, TCP_CLOSE);
2601 tcp_clear_xmit_timers(sk); 2656 tcp_clear_xmit_timers(sk);
@@ -2732,4 +2787,3 @@ EXPORT_SYMBOL(tcp_splice_read);
2732EXPORT_SYMBOL(tcp_sendpage); 2787EXPORT_SYMBOL(tcp_sendpage);
2733EXPORT_SYMBOL(tcp_setsockopt); 2788EXPORT_SYMBOL(tcp_setsockopt);
2734EXPORT_SYMBOL(tcp_shutdown); 2789EXPORT_SYMBOL(tcp_shutdown);
2735EXPORT_SYMBOL(tcp_statistics);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 2fbcc7d1b1a0..838d491dfda7 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * tcp_diag.c Module for monitoring TCP transport protocols sockets. 2 * tcp_diag.c Module for monitoring TCP transport protocols sockets.
3 * 3 *
4 * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
5 *
6 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 4 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
7 * 5 *
8 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cad73b7dfef0..1f5e6049883e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Implementation of the Transmission Control Protocol(TCP). 6 * Implementation of the Transmission Control Protocol(TCP).
7 * 7 *
8 * Version: $Id: tcp_input.c,v 1.243 2002/02/01 22:01:04 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 10 * Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -604,7 +602,7 @@ static u32 tcp_rto_min(struct sock *sk)
604 u32 rto_min = TCP_RTO_MIN; 602 u32 rto_min = TCP_RTO_MIN;
605 603
606 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) 604 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
607 rto_min = dst_metric(dst, RTAX_RTO_MIN); 605 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
608 return rto_min; 606 return rto_min;
609} 607}
610 608
@@ -731,6 +729,7 @@ void tcp_update_metrics(struct sock *sk)
731 if (dst && (dst->flags & DST_HOST)) { 729 if (dst && (dst->flags & DST_HOST)) {
732 const struct inet_connection_sock *icsk = inet_csk(sk); 730 const struct inet_connection_sock *icsk = inet_csk(sk);
733 int m; 731 int m;
732 unsigned long rtt;
734 733
735 if (icsk->icsk_backoff || !tp->srtt) { 734 if (icsk->icsk_backoff || !tp->srtt) {
736 /* This session failed to estimate rtt. Why? 735 /* This session failed to estimate rtt. Why?
@@ -742,7 +741,8 @@ void tcp_update_metrics(struct sock *sk)
742 return; 741 return;
743 } 742 }
744 743
745 m = dst_metric(dst, RTAX_RTT) - tp->srtt; 744 rtt = dst_metric_rtt(dst, RTAX_RTT);
745 m = rtt - tp->srtt;
746 746
747 /* If newly calculated rtt larger than stored one, 747 /* If newly calculated rtt larger than stored one,
748 * store new one. Otherwise, use EWMA. Remember, 748 * store new one. Otherwise, use EWMA. Remember,
@@ -750,12 +750,13 @@ void tcp_update_metrics(struct sock *sk)
750 */ 750 */
751 if (!(dst_metric_locked(dst, RTAX_RTT))) { 751 if (!(dst_metric_locked(dst, RTAX_RTT))) {
752 if (m <= 0) 752 if (m <= 0)
753 dst->metrics[RTAX_RTT - 1] = tp->srtt; 753 set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
754 else 754 else
755 dst->metrics[RTAX_RTT - 1] -= (m >> 3); 755 set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
756 } 756 }
757 757
758 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { 758 if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
759 unsigned long var;
759 if (m < 0) 760 if (m < 0)
760 m = -m; 761 m = -m;
761 762
@@ -764,11 +765,13 @@ void tcp_update_metrics(struct sock *sk)
764 if (m < tp->mdev) 765 if (m < tp->mdev)
765 m = tp->mdev; 766 m = tp->mdev;
766 767
767 if (m >= dst_metric(dst, RTAX_RTTVAR)) 768 var = dst_metric_rtt(dst, RTAX_RTTVAR);
768 dst->metrics[RTAX_RTTVAR - 1] = m; 769 if (m >= var)
770 var = m;
769 else 771 else
770 dst->metrics[RTAX_RTTVAR-1] -= 772 var -= (var - m) >> 2;
771 (dst_metric(dst, RTAX_RTTVAR) - m)>>2; 773
774 set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
772 } 775 }
773 776
774 if (tp->snd_ssthresh >= 0xFFFF) { 777 if (tp->snd_ssthresh >= 0xFFFF) {
@@ -899,7 +902,7 @@ static void tcp_init_metrics(struct sock *sk)
899 if (dst_metric(dst, RTAX_RTT) == 0) 902 if (dst_metric(dst, RTAX_RTT) == 0)
900 goto reset; 903 goto reset;
901 904
902 if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) 905 if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
903 goto reset; 906 goto reset;
904 907
905 /* Initial rtt is determined from SYN,SYN-ACK. 908 /* Initial rtt is determined from SYN,SYN-ACK.
@@ -916,12 +919,12 @@ static void tcp_init_metrics(struct sock *sk)
916 * to low value, and then abruptly stops to do it and starts to delay 919 * to low value, and then abruptly stops to do it and starts to delay
917 * ACKs, wait for troubles. 920 * ACKs, wait for troubles.
918 */ 921 */
919 if (dst_metric(dst, RTAX_RTT) > tp->srtt) { 922 if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
920 tp->srtt = dst_metric(dst, RTAX_RTT); 923 tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
921 tp->rtt_seq = tp->snd_nxt; 924 tp->rtt_seq = tp->snd_nxt;
922 } 925 }
923 if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) { 926 if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
924 tp->mdev = dst_metric(dst, RTAX_RTTVAR); 927 tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
925 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 928 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
926 } 929 }
927 tcp_set_rto(sk); 930 tcp_set_rto(sk);
@@ -949,17 +952,21 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
949{ 952{
950 struct tcp_sock *tp = tcp_sk(sk); 953 struct tcp_sock *tp = tcp_sk(sk);
951 if (metric > tp->reordering) { 954 if (metric > tp->reordering) {
955 int mib_idx;
956
952 tp->reordering = min(TCP_MAX_REORDERING, metric); 957 tp->reordering = min(TCP_MAX_REORDERING, metric);
953 958
954 /* This exciting event is worth to be remembered. 8) */ 959 /* This exciting event is worth to be remembered. 8) */
955 if (ts) 960 if (ts)
956 NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER); 961 mib_idx = LINUX_MIB_TCPTSREORDER;
957 else if (tcp_is_reno(tp)) 962 else if (tcp_is_reno(tp))
958 NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER); 963 mib_idx = LINUX_MIB_TCPRENOREORDER;
959 else if (tcp_is_fack(tp)) 964 else if (tcp_is_fack(tp))
960 NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER); 965 mib_idx = LINUX_MIB_TCPFACKREORDER;
961 else 966 else
962 NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); 967 mib_idx = LINUX_MIB_TCPSACKREORDER;
968
969 NET_INC_STATS_BH(sock_net(sk), mib_idx);
963#if FASTRETRANS_DEBUG > 1 970#if FASTRETRANS_DEBUG > 1
964 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", 971 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
965 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, 972 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -1155,7 +1162,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
1155 tp->lost_out += tcp_skb_pcount(skb); 1162 tp->lost_out += tcp_skb_pcount(skb);
1156 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1163 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1157 } 1164 }
1158 NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); 1165 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1159 } else { 1166 } else {
1160 if (before(ack_seq, new_low_seq)) 1167 if (before(ack_seq, new_low_seq))
1161 new_low_seq = ack_seq; 1168 new_low_seq = ack_seq;
@@ -1167,10 +1174,11 @@ static void tcp_mark_lost_retrans(struct sock *sk)
1167 tp->lost_retrans_low = new_low_seq; 1174 tp->lost_retrans_low = new_low_seq;
1168} 1175}
1169 1176
1170static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, 1177static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
1171 struct tcp_sack_block_wire *sp, int num_sacks, 1178 struct tcp_sack_block_wire *sp, int num_sacks,
1172 u32 prior_snd_una) 1179 u32 prior_snd_una)
1173{ 1180{
1181 struct tcp_sock *tp = tcp_sk(sk);
1174 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq); 1182 u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
1175 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq); 1183 u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
1176 int dup_sack = 0; 1184 int dup_sack = 0;
@@ -1178,7 +1186,7 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
1178 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { 1186 if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
1179 dup_sack = 1; 1187 dup_sack = 1;
1180 tcp_dsack_seen(tp); 1188 tcp_dsack_seen(tp);
1181 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); 1189 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
1182 } else if (num_sacks > 1) { 1190 } else if (num_sacks > 1) {
1183 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); 1191 u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
1184 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); 1192 u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
@@ -1187,7 +1195,8 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
1187 !before(start_seq_0, start_seq_1)) { 1195 !before(start_seq_0, start_seq_1)) {
1188 dup_sack = 1; 1196 dup_sack = 1;
1189 tcp_dsack_seen(tp); 1197 tcp_dsack_seen(tp);
1190 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); 1198 NET_INC_STATS_BH(sock_net(sk),
1199 LINUX_MIB_TCPDSACKOFORECV);
1191 } 1200 }
1192 } 1201 }
1193 1202
@@ -1414,10 +1423,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1414 unsigned char *ptr = (skb_transport_header(ack_skb) + 1423 unsigned char *ptr = (skb_transport_header(ack_skb) +
1415 TCP_SKB_CB(ack_skb)->sacked); 1424 TCP_SKB_CB(ack_skb)->sacked);
1416 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); 1425 struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
1417 struct tcp_sack_block sp[4]; 1426 struct tcp_sack_block sp[TCP_NUM_SACKS];
1418 struct tcp_sack_block *cache; 1427 struct tcp_sack_block *cache;
1419 struct sk_buff *skb; 1428 struct sk_buff *skb;
1420 int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3; 1429 int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
1421 int used_sacks; 1430 int used_sacks;
1422 int reord = tp->packets_out; 1431 int reord = tp->packets_out;
1423 int flag = 0; 1432 int flag = 0;
@@ -1432,7 +1441,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1432 tcp_highest_sack_reset(sk); 1441 tcp_highest_sack_reset(sk);
1433 } 1442 }
1434 1443
1435 found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire, 1444 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1436 num_sacks, prior_snd_una); 1445 num_sacks, prior_snd_una);
1437 if (found_dup_sack) 1446 if (found_dup_sack)
1438 flag |= FLAG_DSACKING_ACK; 1447 flag |= FLAG_DSACKING_ACK;
@@ -1458,18 +1467,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1458 if (!tcp_is_sackblock_valid(tp, dup_sack, 1467 if (!tcp_is_sackblock_valid(tp, dup_sack,
1459 sp[used_sacks].start_seq, 1468 sp[used_sacks].start_seq,
1460 sp[used_sacks].end_seq)) { 1469 sp[used_sacks].end_seq)) {
1470 int mib_idx;
1471
1461 if (dup_sack) { 1472 if (dup_sack) {
1462 if (!tp->undo_marker) 1473 if (!tp->undo_marker)
1463 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); 1474 mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
1464 else 1475 else
1465 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); 1476 mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
1466 } else { 1477 } else {
1467 /* Don't count olds caused by ACK reordering */ 1478 /* Don't count olds caused by ACK reordering */
1468 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && 1479 if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
1469 !after(sp[used_sacks].end_seq, tp->snd_una)) 1480 !after(sp[used_sacks].end_seq, tp->snd_una))
1470 continue; 1481 continue;
1471 NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); 1482 mib_idx = LINUX_MIB_TCPSACKDISCARD;
1472 } 1483 }
1484
1485 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1473 if (i == 0) 1486 if (i == 0)
1474 first_sack_index = -1; 1487 first_sack_index = -1;
1475 continue; 1488 continue;
@@ -1962,7 +1975,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
1962{ 1975{
1963 if (flag & FLAG_SACK_RENEGING) { 1976 if (flag & FLAG_SACK_RENEGING) {
1964 struct inet_connection_sock *icsk = inet_csk(sk); 1977 struct inet_connection_sock *icsk = inet_csk(sk);
1965 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); 1978 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
1966 1979
1967 tcp_enter_loss(sk, 1); 1980 tcp_enter_loss(sk, 1);
1968 icsk->icsk_retransmits++; 1981 icsk->icsk_retransmits++;
@@ -2382,15 +2395,19 @@ static int tcp_try_undo_recovery(struct sock *sk)
2382 struct tcp_sock *tp = tcp_sk(sk); 2395 struct tcp_sock *tp = tcp_sk(sk);
2383 2396
2384 if (tcp_may_undo(tp)) { 2397 if (tcp_may_undo(tp)) {
2398 int mib_idx;
2399
2385 /* Happy end! We did not retransmit anything 2400 /* Happy end! We did not retransmit anything
2386 * or our original transmission succeeded. 2401 * or our original transmission succeeded.
2387 */ 2402 */
2388 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); 2403 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
2389 tcp_undo_cwr(sk, 1); 2404 tcp_undo_cwr(sk, 1);
2390 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) 2405 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
2391 NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); 2406 mib_idx = LINUX_MIB_TCPLOSSUNDO;
2392 else 2407 else
2393 NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); 2408 mib_idx = LINUX_MIB_TCPFULLUNDO;
2409
2410 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2394 tp->undo_marker = 0; 2411 tp->undo_marker = 0;
2395 } 2412 }
2396 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { 2413 if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
@@ -2413,7 +2430,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
2413 DBGUNDO(sk, "D-SACK"); 2430 DBGUNDO(sk, "D-SACK");
2414 tcp_undo_cwr(sk, 1); 2431 tcp_undo_cwr(sk, 1);
2415 tp->undo_marker = 0; 2432 tp->undo_marker = 0;
2416 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); 2433 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
2417 } 2434 }
2418} 2435}
2419 2436
@@ -2436,7 +2453,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2436 2453
2437 DBGUNDO(sk, "Hoe"); 2454 DBGUNDO(sk, "Hoe");
2438 tcp_undo_cwr(sk, 0); 2455 tcp_undo_cwr(sk, 0);
2439 NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); 2456 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
2440 2457
2441 /* So... Do not make Hoe's retransmit yet. 2458 /* So... Do not make Hoe's retransmit yet.
2442 * If the first packet was delayed, the rest 2459 * If the first packet was delayed, the rest
@@ -2465,7 +2482,7 @@ static int tcp_try_undo_loss(struct sock *sk)
2465 DBGUNDO(sk, "partial loss"); 2482 DBGUNDO(sk, "partial loss");
2466 tp->lost_out = 0; 2483 tp->lost_out = 0;
2467 tcp_undo_cwr(sk, 1); 2484 tcp_undo_cwr(sk, 1);
2468 NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); 2485 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2469 inet_csk(sk)->icsk_retransmits = 0; 2486 inet_csk(sk)->icsk_retransmits = 0;
2470 tp->undo_marker = 0; 2487 tp->undo_marker = 0;
2471 if (tcp_is_sack(tp)) 2488 if (tcp_is_sack(tp))
@@ -2562,7 +2579,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2562 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 2579 int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
2563 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && 2580 int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2564 (tcp_fackets_out(tp) > tp->reordering)); 2581 (tcp_fackets_out(tp) > tp->reordering));
2565 int fast_rexmit = 0; 2582 int fast_rexmit = 0, mib_idx;
2566 2583
2567 if (WARN_ON(!tp->packets_out && tp->sacked_out)) 2584 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2568 tp->sacked_out = 0; 2585 tp->sacked_out = 0;
@@ -2584,7 +2601,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2584 icsk->icsk_ca_state != TCP_CA_Open && 2601 icsk->icsk_ca_state != TCP_CA_Open &&
2585 tp->fackets_out > tp->reordering) { 2602 tp->fackets_out > tp->reordering) {
2586 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); 2603 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
2587 NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); 2604 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
2588 } 2605 }
2589 2606
2590 /* D. Check consistency of the current state. */ 2607 /* D. Check consistency of the current state. */
@@ -2685,9 +2702,11 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2685 /* Otherwise enter Recovery state */ 2702 /* Otherwise enter Recovery state */
2686 2703
2687 if (tcp_is_reno(tp)) 2704 if (tcp_is_reno(tp))
2688 NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY); 2705 mib_idx = LINUX_MIB_TCPRENORECOVERY;
2689 else 2706 else
2690 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY); 2707 mib_idx = LINUX_MIB_TCPSACKRECOVERY;
2708
2709 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2691 2710
2692 tp->high_seq = tp->snd_nxt; 2711 tp->high_seq = tp->snd_nxt;
2693 tp->prior_ssthresh = 0; 2712 tp->prior_ssthresh = 0;
@@ -3198,7 +3217,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
3198 } 3217 }
3199 tp->frto_counter = 0; 3218 tp->frto_counter = 0;
3200 tp->undo_marker = 0; 3219 tp->undo_marker = 0;
3201 NET_INC_STATS_BH(LINUX_MIB_TCPSPURIOUSRTOS); 3220 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3202 } 3221 }
3203 return 0; 3222 return 0;
3204} 3223}
@@ -3251,12 +3270,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3251 3270
3252 tcp_ca_event(sk, CA_EVENT_FAST_ACK); 3271 tcp_ca_event(sk, CA_EVENT_FAST_ACK);
3253 3272
3254 NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS); 3273 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3255 } else { 3274 } else {
3256 if (ack_seq != TCP_SKB_CB(skb)->end_seq) 3275 if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3257 flag |= FLAG_DATA; 3276 flag |= FLAG_DATA;
3258 else 3277 else
3259 NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS); 3278 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3260 3279
3261 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); 3280 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3262 3281
@@ -3450,6 +3469,43 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3450 return 1; 3469 return 1;
3451} 3470}
3452 3471
3472#ifdef CONFIG_TCP_MD5SIG
3473/*
3474 * Parse MD5 Signature option
3475 */
3476u8 *tcp_parse_md5sig_option(struct tcphdr *th)
3477{
3478 int length = (th->doff << 2) - sizeof (*th);
3479 u8 *ptr = (u8*)(th + 1);
3480
3481 /* If the TCP option is too short, we can short cut */
3482 if (length < TCPOLEN_MD5SIG)
3483 return NULL;
3484
3485 while (length > 0) {
3486 int opcode = *ptr++;
3487 int opsize;
3488
3489 switch(opcode) {
3490 case TCPOPT_EOL:
3491 return NULL;
3492 case TCPOPT_NOP:
3493 length--;
3494 continue;
3495 default:
3496 opsize = *ptr++;
3497 if (opsize < 2 || opsize > length)
3498 return NULL;
3499 if (opcode == TCPOPT_MD5SIG)
3500 return ptr;
3501 }
3502 ptr += opsize - 2;
3503 length -= opsize;
3504 }
3505 return NULL;
3506}
3507#endif
3508
3453static inline void tcp_store_ts_recent(struct tcp_sock *tp) 3509static inline void tcp_store_ts_recent(struct tcp_sock *tp)
3454{ 3510{
3455 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; 3511 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
@@ -3662,26 +3718,33 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
3662 return 0; 3718 return 0;
3663} 3719}
3664 3720
3665static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) 3721static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
3666{ 3722{
3723 struct tcp_sock *tp = tcp_sk(sk);
3724
3667 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 3725 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
3726 int mib_idx;
3727
3668 if (before(seq, tp->rcv_nxt)) 3728 if (before(seq, tp->rcv_nxt))
3669 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT); 3729 mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
3670 else 3730 else
3671 NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT); 3731 mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
3732
3733 NET_INC_STATS_BH(sock_net(sk), mib_idx);
3672 3734
3673 tp->rx_opt.dsack = 1; 3735 tp->rx_opt.dsack = 1;
3674 tp->duplicate_sack[0].start_seq = seq; 3736 tp->duplicate_sack[0].start_seq = seq;
3675 tp->duplicate_sack[0].end_seq = end_seq; 3737 tp->duplicate_sack[0].end_seq = end_seq;
3676 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, 3738 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1;
3677 4 - tp->rx_opt.tstamp_ok);
3678 } 3739 }
3679} 3740}
3680 3741
3681static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) 3742static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
3682{ 3743{
3744 struct tcp_sock *tp = tcp_sk(sk);
3745
3683 if (!tp->rx_opt.dsack) 3746 if (!tp->rx_opt.dsack)
3684 tcp_dsack_set(tp, seq, end_seq); 3747 tcp_dsack_set(sk, seq, end_seq);
3685 else 3748 else
3686 tcp_sack_extend(tp->duplicate_sack, seq, end_seq); 3749 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
3687} 3750}
@@ -3692,7 +3755,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
3692 3755
3693 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 3756 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
3694 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { 3757 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
3695 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); 3758 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
3696 tcp_enter_quickack_mode(sk); 3759 tcp_enter_quickack_mode(sk);
3697 3760
3698 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 3761 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
@@ -3700,7 +3763,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
3700 3763
3701 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) 3764 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
3702 end_seq = tp->rcv_nxt; 3765 end_seq = tp->rcv_nxt;
3703 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq); 3766 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
3704 } 3767 }
3705 } 3768 }
3706 3769
@@ -3727,9 +3790,8 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
3727 * Decrease num_sacks. 3790 * Decrease num_sacks.
3728 */ 3791 */
3729 tp->rx_opt.num_sacks--; 3792 tp->rx_opt.num_sacks--;
3730 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 3793 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
3731 tp->rx_opt.dsack, 3794 tp->rx_opt.dsack;
3732 4 - tp->rx_opt.tstamp_ok);
3733 for (i = this_sack; i < tp->rx_opt.num_sacks; i++) 3795 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
3734 sp[i] = sp[i + 1]; 3796 sp[i] = sp[i + 1];
3735 continue; 3797 continue;
@@ -3779,7 +3841,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
3779 * 3841 *
3780 * If the sack array is full, forget about the last one. 3842 * If the sack array is full, forget about the last one.
3781 */ 3843 */
3782 if (this_sack >= 4) { 3844 if (this_sack >= TCP_NUM_SACKS) {
3783 this_sack--; 3845 this_sack--;
3784 tp->rx_opt.num_sacks--; 3846 tp->rx_opt.num_sacks--;
3785 sp--; 3847 sp--;
@@ -3792,8 +3854,7 @@ new_sack:
3792 sp->start_seq = seq; 3854 sp->start_seq = seq;
3793 sp->end_seq = end_seq; 3855 sp->end_seq = end_seq;
3794 tp->rx_opt.num_sacks++; 3856 tp->rx_opt.num_sacks++;
3795 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 3857 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
3796 4 - tp->rx_opt.tstamp_ok);
3797} 3858}
3798 3859
3799/* RCV.NXT advances, some SACKs should be eaten. */ 3860/* RCV.NXT advances, some SACKs should be eaten. */
@@ -3830,9 +3891,8 @@ static void tcp_sack_remove(struct tcp_sock *tp)
3830 } 3891 }
3831 if (num_sacks != tp->rx_opt.num_sacks) { 3892 if (num_sacks != tp->rx_opt.num_sacks) {
3832 tp->rx_opt.num_sacks = num_sacks; 3893 tp->rx_opt.num_sacks = num_sacks;
3833 tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 3894 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
3834 tp->rx_opt.dsack, 3895 tp->rx_opt.dsack;
3835 4 - tp->rx_opt.tstamp_ok);
3836 } 3896 }
3837} 3897}
3838 3898
@@ -3853,7 +3913,7 @@ static void tcp_ofo_queue(struct sock *sk)
3853 __u32 dsack = dsack_high; 3913 __u32 dsack = dsack_high;
3854 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high)) 3914 if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
3855 dsack_high = TCP_SKB_CB(skb)->end_seq; 3915 dsack_high = TCP_SKB_CB(skb)->end_seq;
3856 tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack); 3916 tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
3857 } 3917 }
3858 3918
3859 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { 3919 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
@@ -3911,8 +3971,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3911 3971
3912 if (tp->rx_opt.dsack) { 3972 if (tp->rx_opt.dsack) {
3913 tp->rx_opt.dsack = 0; 3973 tp->rx_opt.dsack = 0;
3914 tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks, 3974 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
3915 4 - tp->rx_opt.tstamp_ok);
3916 } 3975 }
3917 3976
3918 /* Queue data for delivery to the user. 3977 /* Queue data for delivery to the user.
@@ -3981,8 +4040,8 @@ queue_and_out:
3981 4040
3982 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { 4041 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
3983 /* A retransmit, 2nd most common case. Force an immediate ack. */ 4042 /* A retransmit, 2nd most common case. Force an immediate ack. */
3984 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); 4043 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
3985 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 4044 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
3986 4045
3987out_of_window: 4046out_of_window:
3988 tcp_enter_quickack_mode(sk); 4047 tcp_enter_quickack_mode(sk);
@@ -4004,7 +4063,7 @@ drop:
4004 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, 4063 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4005 TCP_SKB_CB(skb)->end_seq); 4064 TCP_SKB_CB(skb)->end_seq);
4006 4065
4007 tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); 4066 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4008 4067
4009 /* If window is closed, drop tail of packet. But after 4068 /* If window is closed, drop tail of packet. But after
4010 * remembering D-SACK for its head made in previous line. 4069 * remembering D-SACK for its head made in previous line.
@@ -4069,12 +4128,12 @@ drop:
4069 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4128 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4070 /* All the bits are present. Drop. */ 4129 /* All the bits are present. Drop. */
4071 __kfree_skb(skb); 4130 __kfree_skb(skb);
4072 tcp_dsack_set(tp, seq, end_seq); 4131 tcp_dsack_set(sk, seq, end_seq);
4073 goto add_sack; 4132 goto add_sack;
4074 } 4133 }
4075 if (after(seq, TCP_SKB_CB(skb1)->seq)) { 4134 if (after(seq, TCP_SKB_CB(skb1)->seq)) {
4076 /* Partial overlap. */ 4135 /* Partial overlap. */
4077 tcp_dsack_set(tp, seq, 4136 tcp_dsack_set(sk, seq,
4078 TCP_SKB_CB(skb1)->end_seq); 4137 TCP_SKB_CB(skb1)->end_seq);
4079 } else { 4138 } else {
4080 skb1 = skb1->prev; 4139 skb1 = skb1->prev;
@@ -4087,12 +4146,12 @@ drop:
4087 (struct sk_buff *)&tp->out_of_order_queue && 4146 (struct sk_buff *)&tp->out_of_order_queue &&
4088 after(end_seq, TCP_SKB_CB(skb1)->seq)) { 4147 after(end_seq, TCP_SKB_CB(skb1)->seq)) {
4089 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4148 if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
4090 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, 4149 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4091 end_seq); 4150 end_seq);
4092 break; 4151 break;
4093 } 4152 }
4094 __skb_unlink(skb1, &tp->out_of_order_queue); 4153 __skb_unlink(skb1, &tp->out_of_order_queue);
4095 tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, 4154 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
4096 TCP_SKB_CB(skb1)->end_seq); 4155 TCP_SKB_CB(skb1)->end_seq);
4097 __kfree_skb(skb1); 4156 __kfree_skb(skb1);
4098 } 4157 }
@@ -4123,7 +4182,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4123 struct sk_buff *next = skb->next; 4182 struct sk_buff *next = skb->next;
4124 __skb_unlink(skb, list); 4183 __skb_unlink(skb, list);
4125 __kfree_skb(skb); 4184 __kfree_skb(skb);
4126 NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); 4185 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4127 skb = next; 4186 skb = next;
4128 continue; 4187 continue;
4129 } 4188 }
@@ -4191,7 +4250,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
4191 struct sk_buff *next = skb->next; 4250 struct sk_buff *next = skb->next;
4192 __skb_unlink(skb, list); 4251 __skb_unlink(skb, list);
4193 __kfree_skb(skb); 4252 __kfree_skb(skb);
4194 NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); 4253 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
4195 skb = next; 4254 skb = next;
4196 if (skb == tail || 4255 if (skb == tail ||
4197 tcp_hdr(skb)->syn || 4256 tcp_hdr(skb)->syn ||
@@ -4254,7 +4313,7 @@ static int tcp_prune_ofo_queue(struct sock *sk)
4254 int res = 0; 4313 int res = 0;
4255 4314
4256 if (!skb_queue_empty(&tp->out_of_order_queue)) { 4315 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4257 NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); 4316 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
4258 __skb_queue_purge(&tp->out_of_order_queue); 4317 __skb_queue_purge(&tp->out_of_order_queue);
4259 4318
4260 /* Reset SACK state. A conforming SACK implementation will 4319 /* Reset SACK state. A conforming SACK implementation will
@@ -4283,7 +4342,7 @@ static int tcp_prune_queue(struct sock *sk)
4283 4342
4284 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); 4343 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
4285 4344
4286 NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED); 4345 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
4287 4346
4288 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 4347 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
4289 tcp_clamp_window(sk); 4348 tcp_clamp_window(sk);
@@ -4312,7 +4371,7 @@ static int tcp_prune_queue(struct sock *sk)
4312 * drop receive data on the floor. It will get retransmitted 4371 * drop receive data on the floor. It will get retransmitted
4313 * and hopefully then we'll have sufficient space. 4372 * and hopefully then we'll have sufficient space.
4314 */ 4373 */
4315 NET_INC_STATS_BH(LINUX_MIB_RCVPRUNED); 4374 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
4316 4375
4317 /* Massive buffer overcommit. */ 4376 /* Massive buffer overcommit. */
4318 tp->pred_flags = 0; 4377 tp->pred_flags = 0;
@@ -4742,7 +4801,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4742 tcp_data_snd_check(sk); 4801 tcp_data_snd_check(sk);
4743 return 0; 4802 return 0;
4744 } else { /* Header too small */ 4803 } else { /* Header too small */
4745 TCP_INC_STATS_BH(TCP_MIB_INERRS); 4804 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
4746 goto discard; 4805 goto discard;
4747 } 4806 }
4748 } else { 4807 } else {
@@ -4779,7 +4838,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4779 4838
4780 __skb_pull(skb, tcp_header_len); 4839 __skb_pull(skb, tcp_header_len);
4781 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4840 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4782 NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER); 4841 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
4783 } 4842 }
4784 if (copied_early) 4843 if (copied_early)
4785 tcp_cleanup_rbuf(sk, skb->len); 4844 tcp_cleanup_rbuf(sk, skb->len);
@@ -4802,7 +4861,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
4802 if ((int)skb->truesize > sk->sk_forward_alloc) 4861 if ((int)skb->truesize > sk->sk_forward_alloc)
4803 goto step5; 4862 goto step5;
4804 4863
4805 NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS); 4864 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
4806 4865
4807 /* Bulk data transfer: receiver */ 4866 /* Bulk data transfer: receiver */
4808 __skb_pull(skb, tcp_header_len); 4867 __skb_pull(skb, tcp_header_len);
@@ -4846,7 +4905,7 @@ slow_path:
4846 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && 4905 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
4847 tcp_paws_discard(sk, skb)) { 4906 tcp_paws_discard(sk, skb)) {
4848 if (!th->rst) { 4907 if (!th->rst) {
4849 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); 4908 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
4850 tcp_send_dupack(sk, skb); 4909 tcp_send_dupack(sk, skb);
4851 goto discard; 4910 goto discard;
4852 } 4911 }
@@ -4881,8 +4940,8 @@ slow_path:
4881 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); 4940 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
4882 4941
4883 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { 4942 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4884 TCP_INC_STATS_BH(TCP_MIB_INERRS); 4943 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
4885 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); 4944 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
4886 tcp_reset(sk); 4945 tcp_reset(sk);
4887 return 1; 4946 return 1;
4888 } 4947 }
@@ -4904,7 +4963,7 @@ step5:
4904 return 0; 4963 return 0;
4905 4964
4906csum_error: 4965csum_error:
4907 TCP_INC_STATS_BH(TCP_MIB_INERRS); 4966 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
4908 4967
4909discard: 4968discard:
4910 __kfree_skb(skb); 4969 __kfree_skb(skb);
@@ -4938,7 +4997,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4938 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 4997 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
4939 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, 4998 !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
4940 tcp_time_stamp)) { 4999 tcp_time_stamp)) {
4941 NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED); 5000 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
4942 goto reset_and_undo; 5001 goto reset_and_undo;
4943 } 5002 }
4944 5003
@@ -5222,7 +5281,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5222 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && 5281 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
5223 tcp_paws_discard(sk, skb)) { 5282 tcp_paws_discard(sk, skb)) {
5224 if (!th->rst) { 5283 if (!th->rst) {
5225 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); 5284 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
5226 tcp_send_dupack(sk, skb); 5285 tcp_send_dupack(sk, skb);
5227 goto discard; 5286 goto discard;
5228 } 5287 }
@@ -5251,7 +5310,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5251 * Check for a SYN in window. 5310 * Check for a SYN in window.
5252 */ 5311 */
5253 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { 5312 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
5254 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); 5313 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
5255 tcp_reset(sk); 5314 tcp_reset(sk);
5256 return 1; 5315 return 1;
5257 } 5316 }
@@ -5333,7 +5392,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5333 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 5392 (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5334 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { 5393 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
5335 tcp_done(sk); 5394 tcp_done(sk);
5336 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA); 5395 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5337 return 1; 5396 return 1;
5338 } 5397 }
5339 5398
@@ -5393,7 +5452,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5393 if (sk->sk_shutdown & RCV_SHUTDOWN) { 5452 if (sk->sk_shutdown & RCV_SHUTDOWN) {
5394 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 5453 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
5395 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { 5454 after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
5396 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA); 5455 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
5397 tcp_reset(sk); 5456 tcp_reset(sk);
5398 return 1; 5457 return 1;
5399 } 5458 }
@@ -5422,6 +5481,9 @@ EXPORT_SYMBOL(sysctl_tcp_ecn);
5422EXPORT_SYMBOL(sysctl_tcp_reordering); 5481EXPORT_SYMBOL(sysctl_tcp_reordering);
5423EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); 5482EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
5424EXPORT_SYMBOL(tcp_parse_options); 5483EXPORT_SYMBOL(tcp_parse_options);
5484#ifdef CONFIG_TCP_MD5SIG
5485EXPORT_SYMBOL(tcp_parse_md5sig_option);
5486#endif
5425EXPORT_SYMBOL(tcp_rcv_established); 5487EXPORT_SYMBOL(tcp_rcv_established);
5426EXPORT_SYMBOL(tcp_rcv_state_process); 5488EXPORT_SYMBOL(tcp_rcv_state_process);
5427EXPORT_SYMBOL(tcp_initialize_rcv_mss); 5489EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ffe869ac1bcf..a82df6307567 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Implementation of the Transmission Control Protocol(TCP). 6 * Implementation of the Transmission Control Protocol(TCP).
7 * 7 *
8 * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9 *
10 * IPv4 specific functions 8 * IPv4 specific functions
11 * 9 *
12 * 10 *
@@ -89,10 +87,14 @@ int sysctl_tcp_low_latency __read_mostly;
89#ifdef CONFIG_TCP_MD5SIG 87#ifdef CONFIG_TCP_MD5SIG
90static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, 88static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
91 __be32 addr); 89 __be32 addr);
92static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, 90static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
93 __be32 saddr, __be32 daddr, 91 __be32 daddr, __be32 saddr, struct tcphdr *th);
94 struct tcphdr *th, int protocol, 92#else
95 unsigned int tcplen); 93static inline
94struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
95{
96 return NULL;
97}
96#endif 98#endif
97 99
98struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { 100struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
@@ -172,7 +174,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
172 inet->sport, usin->sin_port, sk, 1); 174 inet->sport, usin->sin_port, sk, 1);
173 if (tmp < 0) { 175 if (tmp < 0) {
174 if (tmp == -ENETUNREACH) 176 if (tmp == -ENETUNREACH)
175 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 177 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
176 return tmp; 178 return tmp;
177 } 179 }
178 180
@@ -340,16 +342,17 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
340 struct sock *sk; 342 struct sock *sk;
341 __u32 seq; 343 __u32 seq;
342 int err; 344 int err;
345 struct net *net = dev_net(skb->dev);
343 346
344 if (skb->len < (iph->ihl << 2) + 8) { 347 if (skb->len < (iph->ihl << 2) + 8) {
345 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 348 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
346 return; 349 return;
347 } 350 }
348 351
349 sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest, 352 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
350 iph->saddr, th->source, inet_iif(skb)); 353 iph->saddr, th->source, inet_iif(skb));
351 if (!sk) { 354 if (!sk) {
352 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 355 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
353 return; 356 return;
354 } 357 }
355 if (sk->sk_state == TCP_TIME_WAIT) { 358 if (sk->sk_state == TCP_TIME_WAIT) {
@@ -362,7 +365,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
362 * servers this needs to be solved differently. 365 * servers this needs to be solved differently.
363 */ 366 */
364 if (sock_owned_by_user(sk)) 367 if (sock_owned_by_user(sk))
365 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); 368 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
366 369
367 if (sk->sk_state == TCP_CLOSE) 370 if (sk->sk_state == TCP_CLOSE)
368 goto out; 371 goto out;
@@ -371,7 +374,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
371 seq = ntohl(th->seq); 374 seq = ntohl(th->seq);
372 if (sk->sk_state != TCP_LISTEN && 375 if (sk->sk_state != TCP_LISTEN &&
373 !between(seq, tp->snd_una, tp->snd_nxt)) { 376 !between(seq, tp->snd_una, tp->snd_nxt)) {
374 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 377 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
375 goto out; 378 goto out;
376 } 379 }
377 380
@@ -418,7 +421,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
418 BUG_TRAP(!req->sk); 421 BUG_TRAP(!req->sk);
419 422
420 if (seq != tcp_rsk(req)->snt_isn) { 423 if (seq != tcp_rsk(req)->snt_isn) {
421 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 424 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
422 goto out; 425 goto out;
423 } 426 }
424 427
@@ -540,6 +543,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
540#ifdef CONFIG_TCP_MD5SIG 543#ifdef CONFIG_TCP_MD5SIG
541 struct tcp_md5sig_key *key; 544 struct tcp_md5sig_key *key;
542#endif 545#endif
546 struct net *net;
543 547
544 /* Never send a reset in response to a reset. */ 548 /* Never send a reset in response to a reset. */
545 if (th->rst) 549 if (th->rst)
@@ -578,12 +582,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
578 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 582 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
579 rep.th.doff = arg.iov[0].iov_len / 4; 583 rep.th.doff = arg.iov[0].iov_len / 4;
580 584
581 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], 585 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
582 key, 586 key, ip_hdr(skb)->daddr,
583 ip_hdr(skb)->daddr, 587 ip_hdr(skb)->saddr, &rep.th);
584 ip_hdr(skb)->saddr,
585 &rep.th, IPPROTO_TCP,
586 arg.iov[0].iov_len);
587 } 588 }
588#endif 589#endif
589 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 590 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
@@ -591,20 +592,21 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
591 sizeof(struct tcphdr), IPPROTO_TCP, 0); 592 sizeof(struct tcphdr), IPPROTO_TCP, 0);
592 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 593 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
593 594
594 ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb, 595 net = dev_net(skb->dst->dev);
596 ip_send_reply(net->ipv4.tcp_sock, skb,
595 &arg, arg.iov[0].iov_len); 597 &arg, arg.iov[0].iov_len);
596 598
597 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 599 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
598 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); 600 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
599} 601}
600 602
601/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 603/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
602 outside socket context is ugly, certainly. What can I do? 604 outside socket context is ugly, certainly. What can I do?
603 */ 605 */
604 606
605static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, 607static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
606 struct sk_buff *skb, u32 seq, u32 ack, 608 u32 win, u32 ts, int oif,
607 u32 win, u32 ts) 609 struct tcp_md5sig_key *key)
608{ 610{
609 struct tcphdr *th = tcp_hdr(skb); 611 struct tcphdr *th = tcp_hdr(skb);
610 struct { 612 struct {
@@ -616,10 +618,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
616 ]; 618 ];
617 } rep; 619 } rep;
618 struct ip_reply_arg arg; 620 struct ip_reply_arg arg;
619#ifdef CONFIG_TCP_MD5SIG 621 struct net *net = dev_net(skb->dev);
620 struct tcp_md5sig_key *key;
621 struct tcp_md5sig_key tw_key;
622#endif
623 622
624 memset(&rep.th, 0, sizeof(struct tcphdr)); 623 memset(&rep.th, 0, sizeof(struct tcphdr));
625 memset(&arg, 0, sizeof(arg)); 624 memset(&arg, 0, sizeof(arg));
@@ -645,23 +644,6 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
645 rep.th.window = htons(win); 644 rep.th.window = htons(win);
646 645
647#ifdef CONFIG_TCP_MD5SIG 646#ifdef CONFIG_TCP_MD5SIG
648 /*
649 * The SKB holds an imcoming packet, but may not have a valid ->sk
650 * pointer. This is especially the case when we're dealing with a
651 * TIME_WAIT ack, because the sk structure is long gone, and only
652 * the tcp_timewait_sock remains. So the md5 key is stashed in that
653 * structure, and we use it in preference. I believe that (twsk ||
654 * skb->sk) holds true, but we program defensively.
655 */
656 if (!twsk && skb->sk) {
657 key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
658 } else if (twsk && twsk->tw_md5_keylen) {
659 tw_key.key = twsk->tw_md5_key;
660 tw_key.keylen = twsk->tw_md5_keylen;
661 key = &tw_key;
662 } else
663 key = NULL;
664
665 if (key) { 647 if (key) {
666 int offset = (ts) ? 3 : 0; 648 int offset = (ts) ? 3 : 0;
667 649
@@ -672,25 +654,22 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
672 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 654 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
673 rep.th.doff = arg.iov[0].iov_len/4; 655 rep.th.doff = arg.iov[0].iov_len/4;
674 656
675 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], 657 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
676 key, 658 key, ip_hdr(skb)->daddr,
677 ip_hdr(skb)->daddr, 659 ip_hdr(skb)->saddr, &rep.th);
678 ip_hdr(skb)->saddr,
679 &rep.th, IPPROTO_TCP,
680 arg.iov[0].iov_len);
681 } 660 }
682#endif 661#endif
683 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 662 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
684 ip_hdr(skb)->saddr, /* XXX */ 663 ip_hdr(skb)->saddr, /* XXX */
685 arg.iov[0].iov_len, IPPROTO_TCP, 0); 664 arg.iov[0].iov_len, IPPROTO_TCP, 0);
686 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 665 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
687 if (twsk) 666 if (oif)
688 arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; 667 arg.bound_dev_if = oif;
689 668
690 ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb, 669 ip_send_reply(net->ipv4.tcp_sock, skb,
691 &arg, arg.iov[0].iov_len); 670 &arg, arg.iov[0].iov_len);
692 671
693 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); 672 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
694} 673}
695 674
696static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 675static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -698,9 +677,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
698 struct inet_timewait_sock *tw = inet_twsk(sk); 677 struct inet_timewait_sock *tw = inet_twsk(sk);
699 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 678 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
700 679
701 tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 680 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
702 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 681 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
703 tcptw->tw_ts_recent); 682 tcptw->tw_ts_recent,
683 tw->tw_bound_dev_if,
684 tcp_twsk_md5_key(tcptw)
685 );
704 686
705 inet_twsk_put(tw); 687 inet_twsk_put(tw);
706} 688}
@@ -708,9 +690,11 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
708static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, 690static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
709 struct request_sock *req) 691 struct request_sock *req)
710{ 692{
711 tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, 693 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
712 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, 694 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
713 req->ts_recent); 695 req->ts_recent,
696 0,
697 tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr));
714} 698}
715 699
716/* 700/*
@@ -1000,32 +984,13 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1000 newkey, cmd.tcpm_keylen); 984 newkey, cmd.tcpm_keylen);
1001} 985}
1002 986
1003static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, 987static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1004 __be32 saddr, __be32 daddr, 988 __be32 daddr, __be32 saddr, int nbytes)
1005 struct tcphdr *th, int protocol,
1006 unsigned int tcplen)
1007{ 989{
1008 struct scatterlist sg[4];
1009 __u16 data_len;
1010 int block = 0;
1011 __sum16 old_checksum;
1012 struct tcp_md5sig_pool *hp;
1013 struct tcp4_pseudohdr *bp; 990 struct tcp4_pseudohdr *bp;
1014 struct hash_desc *desc; 991 struct scatterlist sg;
1015 int err;
1016 unsigned int nbytes = 0;
1017
1018 /*
1019 * Okay, so RFC2385 is turned on for this connection,
1020 * so we need to generate the MD5 hash for the packet now.
1021 */
1022
1023 hp = tcp_get_md5sig_pool();
1024 if (!hp)
1025 goto clear_hash_noput;
1026 992
1027 bp = &hp->md5_blk.ip4; 993 bp = &hp->md5_blk.ip4;
1028 desc = &hp->md5_desc;
1029 994
1030 /* 995 /*
1031 * 1. the TCP pseudo-header (in the order: source IP address, 996 * 1. the TCP pseudo-header (in the order: source IP address,
@@ -1035,86 +1000,96 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1035 bp->saddr = saddr; 1000 bp->saddr = saddr;
1036 bp->daddr = daddr; 1001 bp->daddr = daddr;
1037 bp->pad = 0; 1002 bp->pad = 0;
1038 bp->protocol = protocol; 1003 bp->protocol = IPPROTO_TCP;
1039 bp->len = htons(tcplen); 1004 bp->len = cpu_to_be16(nbytes);
1040
1041 sg_init_table(sg, 4);
1042
1043 sg_set_buf(&sg[block++], bp, sizeof(*bp));
1044 nbytes += sizeof(*bp);
1045
1046 /* 2. the TCP header, excluding options, and assuming a
1047 * checksum of zero/
1048 */
1049 old_checksum = th->check;
1050 th->check = 0;
1051 sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1052 nbytes += sizeof(struct tcphdr);
1053 1005
1054 /* 3. the TCP segment data (if any) */ 1006 sg_init_one(&sg, bp, sizeof(*bp));
1055 data_len = tcplen - (th->doff << 2); 1007 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1056 if (data_len > 0) { 1008}
1057 unsigned char *data = (unsigned char *)th + (th->doff << 2);
1058 sg_set_buf(&sg[block++], data, data_len);
1059 nbytes += data_len;
1060 }
1061 1009
1062 /* 4. an independently-specified key or password, known to both 1010static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1063 * TCPs and presumably connection-specific 1011 __be32 daddr, __be32 saddr, struct tcphdr *th)
1064 */ 1012{
1065 sg_set_buf(&sg[block++], key->key, key->keylen); 1013 struct tcp_md5sig_pool *hp;
1066 nbytes += key->keylen; 1014 struct hash_desc *desc;
1067 1015
1068 sg_mark_end(&sg[block - 1]); 1016 hp = tcp_get_md5sig_pool();
1017 if (!hp)
1018 goto clear_hash_noput;
1019 desc = &hp->md5_desc;
1069 1020
1070 /* Now store the Hash into the packet */ 1021 if (crypto_hash_init(desc))
1071 err = crypto_hash_init(desc);
1072 if (err)
1073 goto clear_hash; 1022 goto clear_hash;
1074 err = crypto_hash_update(desc, sg, nbytes); 1023 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1075 if (err)
1076 goto clear_hash; 1024 goto clear_hash;
1077 err = crypto_hash_final(desc, md5_hash); 1025 if (tcp_md5_hash_header(hp, th))
1078 if (err) 1026 goto clear_hash;
1027 if (tcp_md5_hash_key(hp, key))
1028 goto clear_hash;
1029 if (crypto_hash_final(desc, md5_hash))
1079 goto clear_hash; 1030 goto clear_hash;
1080 1031
1081 /* Reset header, and free up the crypto */
1082 tcp_put_md5sig_pool(); 1032 tcp_put_md5sig_pool();
1083 th->check = old_checksum;
1084
1085out:
1086 return 0; 1033 return 0;
1034
1087clear_hash: 1035clear_hash:
1088 tcp_put_md5sig_pool(); 1036 tcp_put_md5sig_pool();
1089clear_hash_noput: 1037clear_hash_noput:
1090 memset(md5_hash, 0, 16); 1038 memset(md5_hash, 0, 16);
1091 goto out; 1039 return 1;
1092} 1040}
1093 1041
1094int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, 1042int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1095 struct sock *sk, 1043 struct sock *sk, struct request_sock *req,
1096 struct dst_entry *dst, 1044 struct sk_buff *skb)
1097 struct request_sock *req,
1098 struct tcphdr *th, int protocol,
1099 unsigned int tcplen)
1100{ 1045{
1046 struct tcp_md5sig_pool *hp;
1047 struct hash_desc *desc;
1048 struct tcphdr *th = tcp_hdr(skb);
1101 __be32 saddr, daddr; 1049 __be32 saddr, daddr;
1102 1050
1103 if (sk) { 1051 if (sk) {
1104 saddr = inet_sk(sk)->saddr; 1052 saddr = inet_sk(sk)->saddr;
1105 daddr = inet_sk(sk)->daddr; 1053 daddr = inet_sk(sk)->daddr;
1054 } else if (req) {
1055 saddr = inet_rsk(req)->loc_addr;
1056 daddr = inet_rsk(req)->rmt_addr;
1106 } else { 1057 } else {
1107 struct rtable *rt = (struct rtable *)dst; 1058 const struct iphdr *iph = ip_hdr(skb);
1108 BUG_ON(!rt); 1059 saddr = iph->saddr;
1109 saddr = rt->rt_src; 1060 daddr = iph->daddr;
1110 daddr = rt->rt_dst;
1111 } 1061 }
1112 return tcp_v4_do_calc_md5_hash(md5_hash, key, 1062
1113 saddr, daddr, 1063 hp = tcp_get_md5sig_pool();
1114 th, protocol, tcplen); 1064 if (!hp)
1065 goto clear_hash_noput;
1066 desc = &hp->md5_desc;
1067
1068 if (crypto_hash_init(desc))
1069 goto clear_hash;
1070
1071 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1072 goto clear_hash;
1073 if (tcp_md5_hash_header(hp, th))
1074 goto clear_hash;
1075 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1076 goto clear_hash;
1077 if (tcp_md5_hash_key(hp, key))
1078 goto clear_hash;
1079 if (crypto_hash_final(desc, md5_hash))
1080 goto clear_hash;
1081
1082 tcp_put_md5sig_pool();
1083 return 0;
1084
1085clear_hash:
1086 tcp_put_md5sig_pool();
1087clear_hash_noput:
1088 memset(md5_hash, 0, 16);
1089 return 1;
1115} 1090}
1116 1091
1117EXPORT_SYMBOL(tcp_v4_calc_md5_hash); 1092EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1118 1093
1119static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) 1094static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1120{ 1095{
@@ -1130,52 +1105,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1130 struct tcp_md5sig_key *hash_expected; 1105 struct tcp_md5sig_key *hash_expected;
1131 const struct iphdr *iph = ip_hdr(skb); 1106 const struct iphdr *iph = ip_hdr(skb);
1132 struct tcphdr *th = tcp_hdr(skb); 1107 struct tcphdr *th = tcp_hdr(skb);
1133 int length = (th->doff << 2) - sizeof(struct tcphdr);
1134 int genhash; 1108 int genhash;
1135 unsigned char *ptr;
1136 unsigned char newhash[16]; 1109 unsigned char newhash[16];
1137 1110
1138 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); 1111 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1112 hash_location = tcp_parse_md5sig_option(th);
1139 1113
1140 /*
1141 * If the TCP option length is less than the TCP_MD5SIG
1142 * option length, then we can shortcut
1143 */
1144 if (length < TCPOLEN_MD5SIG) {
1145 if (hash_expected)
1146 return 1;
1147 else
1148 return 0;
1149 }
1150
1151 /* Okay, we can't shortcut - we have to grub through the options */
1152 ptr = (unsigned char *)(th + 1);
1153 while (length > 0) {
1154 int opcode = *ptr++;
1155 int opsize;
1156
1157 switch (opcode) {
1158 case TCPOPT_EOL:
1159 goto done_opts;
1160 case TCPOPT_NOP:
1161 length--;
1162 continue;
1163 default:
1164 opsize = *ptr++;
1165 if (opsize < 2)
1166 goto done_opts;
1167 if (opsize > length)
1168 goto done_opts;
1169
1170 if (opcode == TCPOPT_MD5SIG) {
1171 hash_location = ptr;
1172 goto done_opts;
1173 }
1174 }
1175 ptr += opsize-2;
1176 length -= opsize;
1177 }
1178done_opts:
1179 /* We've parsed the options - do we have a hash? */ 1114 /* We've parsed the options - do we have a hash? */
1180 if (!hash_expected && !hash_location) 1115 if (!hash_expected && !hash_location)
1181 return 0; 1116 return 0;
@@ -1199,11 +1134,9 @@ done_opts:
1199 /* Okay, so this is hash_expected and hash_location - 1134 /* Okay, so this is hash_expected and hash_location -
1200 * so we need to calculate the checksum. 1135 * so we need to calculate the checksum.
1201 */ 1136 */
1202 genhash = tcp_v4_do_calc_md5_hash(newhash, 1137 genhash = tcp_v4_md5_hash_skb(newhash,
1203 hash_expected, 1138 hash_expected,
1204 iph->saddr, iph->daddr, 1139 NULL, NULL, skb);
1205 th, sk->sk_protocol,
1206 skb->len);
1207 1140
1208 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1141 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1209 if (net_ratelimit()) { 1142 if (net_ratelimit()) {
@@ -1347,7 +1280,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1347 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && 1280 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1348 (s32)(peer->tcp_ts - req->ts_recent) > 1281 (s32)(peer->tcp_ts - req->ts_recent) >
1349 TCP_PAWS_WINDOW) { 1282 TCP_PAWS_WINDOW) {
1350 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); 1283 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1351 goto drop_and_release; 1284 goto drop_and_release;
1352 } 1285 }
1353 } 1286 }
@@ -1452,6 +1385,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1452 if (newkey != NULL) 1385 if (newkey != NULL)
1453 tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr, 1386 tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1454 newkey, key->keylen); 1387 newkey, key->keylen);
1388 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1455 } 1389 }
1456#endif 1390#endif
1457 1391
@@ -1461,9 +1395,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1461 return newsk; 1395 return newsk;
1462 1396
1463exit_overflow: 1397exit_overflow:
1464 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); 1398 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1465exit: 1399exit:
1466 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); 1400 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1467 dst_release(dst); 1401 dst_release(dst);
1468 return NULL; 1402 return NULL;
1469} 1403}
@@ -1590,7 +1524,7 @@ discard:
1590 return 0; 1524 return 0;
1591 1525
1592csum_err: 1526csum_err:
1593 TCP_INC_STATS_BH(TCP_MIB_INERRS); 1527 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1594 goto discard; 1528 goto discard;
1595} 1529}
1596 1530
@@ -1604,12 +1538,13 @@ int tcp_v4_rcv(struct sk_buff *skb)
1604 struct tcphdr *th; 1538 struct tcphdr *th;
1605 struct sock *sk; 1539 struct sock *sk;
1606 int ret; 1540 int ret;
1541 struct net *net = dev_net(skb->dev);
1607 1542
1608 if (skb->pkt_type != PACKET_HOST) 1543 if (skb->pkt_type != PACKET_HOST)
1609 goto discard_it; 1544 goto discard_it;
1610 1545
1611 /* Count it even if it's bad */ 1546 /* Count it even if it's bad */
1612 TCP_INC_STATS_BH(TCP_MIB_INSEGS); 1547 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1613 1548
1614 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1549 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1615 goto discard_it; 1550 goto discard_it;
@@ -1638,7 +1573,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1638 TCP_SKB_CB(skb)->flags = iph->tos; 1573 TCP_SKB_CB(skb)->flags = iph->tos;
1639 TCP_SKB_CB(skb)->sacked = 0; 1574 TCP_SKB_CB(skb)->sacked = 0;
1640 1575
1641 sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, 1576 sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr,
1642 th->source, iph->daddr, th->dest, inet_iif(skb)); 1577 th->source, iph->daddr, th->dest, inet_iif(skb));
1643 if (!sk) 1578 if (!sk)
1644 goto no_tcp_socket; 1579 goto no_tcp_socket;
@@ -1685,7 +1620,7 @@ no_tcp_socket:
1685 1620
1686 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 1621 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1687bad_packet: 1622bad_packet:
1688 TCP_INC_STATS_BH(TCP_MIB_INERRS); 1623 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1689 } else { 1624 } else {
1690 tcp_v4_send_reset(NULL, skb); 1625 tcp_v4_send_reset(NULL, skb);
1691 } 1626 }
@@ -1706,7 +1641,7 @@ do_time_wait:
1706 } 1641 }
1707 1642
1708 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 1643 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1709 TCP_INC_STATS_BH(TCP_MIB_INERRS); 1644 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1710 inet_twsk_put(inet_twsk(sk)); 1645 inet_twsk_put(inet_twsk(sk));
1711 goto discard_it; 1646 goto discard_it;
1712 } 1647 }
@@ -1814,7 +1749,7 @@ struct inet_connection_sock_af_ops ipv4_specific = {
1814#ifdef CONFIG_TCP_MD5SIG 1749#ifdef CONFIG_TCP_MD5SIG
1815static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1750static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1816 .md5_lookup = tcp_v4_md5_lookup, 1751 .md5_lookup = tcp_v4_md5_lookup,
1817 .calc_md5_hash = tcp_v4_calc_md5_hash, 1752 .calc_md5_hash = tcp_v4_md5_hash_skb,
1818 .md5_add = tcp_v4_md5_add_func, 1753 .md5_add = tcp_v4_md5_add_func,
1819 .md5_parse = tcp_v4_parse_md5_keys, 1754 .md5_parse = tcp_v4_parse_md5_keys,
1820}; 1755};
@@ -1871,7 +1806,7 @@ static int tcp_v4_init_sock(struct sock *sk)
1871 return 0; 1806 return 0;
1872} 1807}
1873 1808
1874int tcp_v4_destroy_sock(struct sock *sk) 1809void tcp_v4_destroy_sock(struct sock *sk)
1875{ 1810{
1876 struct tcp_sock *tp = tcp_sk(sk); 1811 struct tcp_sock *tp = tcp_sk(sk);
1877 1812
@@ -1915,8 +1850,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
1915 } 1850 }
1916 1851
1917 atomic_dec(&tcp_sockets_allocated); 1852 atomic_dec(&tcp_sockets_allocated);
1918
1919 return 0;
1920} 1853}
1921 1854
1922EXPORT_SYMBOL(tcp_v4_destroy_sock); 1855EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -1959,8 +1892,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1959 req = req->dl_next; 1892 req = req->dl_next;
1960 while (1) { 1893 while (1) {
1961 while (req) { 1894 while (req) {
1962 if (req->rsk_ops->family == st->family && 1895 if (req->rsk_ops->family == st->family) {
1963 net_eq(sock_net(req->sk), net)) {
1964 cur = req; 1896 cur = req;
1965 goto out; 1897 goto out;
1966 } 1898 }
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8245247a6ceb..204c42162660 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Implementation of the Transmission Control Protocol(TCP). 6 * Implementation of the Transmission Control Protocol(TCP).
7 * 7 *
8 * Version: $Id: tcp_minisocks.c,v 1.15 2002/02/01 22:01:04 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 10 * Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -246,7 +244,7 @@ kill:
246 } 244 }
247 245
248 if (paws_reject) 246 if (paws_reject)
249 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); 247 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
250 248
251 if (!th->rst) { 249 if (!th->rst) {
252 /* In this case we must reset the TIMEWAIT timer. 250 /* In this case we must reset the TIMEWAIT timer.
@@ -482,7 +480,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
482 newtp->rx_opt.mss_clamp = req->mss; 480 newtp->rx_opt.mss_clamp = req->mss;
483 TCP_ECN_openreq_child(newtp, req); 481 TCP_ECN_openreq_child(newtp, req);
484 482
485 TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS); 483 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
486 } 484 }
487 return newsk; 485 return newsk;
488} 486}
@@ -613,7 +611,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
613 if (!(flg & TCP_FLAG_RST)) 611 if (!(flg & TCP_FLAG_RST))
614 req->rsk_ops->send_ack(skb, req); 612 req->rsk_ops->send_ack(skb, req);
615 if (paws_reject) 613 if (paws_reject)
616 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); 614 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
617 return NULL; 615 return NULL;
618 } 616 }
619 617
@@ -632,7 +630,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
632 * "fourth, check the SYN bit" 630 * "fourth, check the SYN bit"
633 */ 631 */
634 if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { 632 if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
635 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 633 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
636 goto embryonic_reset; 634 goto embryonic_reset;
637 } 635 }
638 636
@@ -697,7 +695,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
697 } 695 }
698 696
699 embryonic_reset: 697 embryonic_reset:
700 NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS); 698 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
701 if (!(flg & TCP_FLAG_RST)) 699 if (!(flg & TCP_FLAG_RST))
702 req->rsk_ops->send_reset(sk, skb); 700 req->rsk_ops->send_reset(sk, skb);
703 701
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ad993ecb4810..1fa683c0ba9b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Implementation of the Transmission Control Protocol(TCP). 6 * Implementation of the Transmission Control Protocol(TCP).
7 * 7 *
8 * Version: $Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 10 * Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -347,28 +345,82 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
347 TCP_SKB_CB(skb)->end_seq = seq; 345 TCP_SKB_CB(skb)->end_seq = seq;
348} 346}
349 347
350static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, 348#define OPTION_SACK_ADVERTISE (1 << 0)
351 __u32 tstamp, __u8 **md5_hash) 349#define OPTION_TS (1 << 1)
352{ 350#define OPTION_MD5 (1 << 2)
353 if (tp->rx_opt.tstamp_ok) { 351
352struct tcp_out_options {
353 u8 options; /* bit field of OPTION_* */
354 u8 ws; /* window scale, 0 to disable */
355 u8 num_sack_blocks; /* number of SACK blocks to include */
356 u16 mss; /* 0 to disable */
357 __u32 tsval, tsecr; /* need to include OPTION_TS */
358};
359
360static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
361 const struct tcp_out_options *opts,
362 __u8 **md5_hash) {
363 if (unlikely(OPTION_MD5 & opts->options)) {
364 *ptr++ = htonl((TCPOPT_NOP << 24) |
365 (TCPOPT_NOP << 16) |
366 (TCPOPT_MD5SIG << 8) |
367 TCPOLEN_MD5SIG);
368 *md5_hash = (__u8 *)ptr;
369 ptr += 4;
370 } else {
371 *md5_hash = NULL;
372 }
373
374 if (likely(OPTION_TS & opts->options)) {
375 if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
376 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
377 (TCPOLEN_SACK_PERM << 16) |
378 (TCPOPT_TIMESTAMP << 8) |
379 TCPOLEN_TIMESTAMP);
380 } else {
381 *ptr++ = htonl((TCPOPT_NOP << 24) |
382 (TCPOPT_NOP << 16) |
383 (TCPOPT_TIMESTAMP << 8) |
384 TCPOLEN_TIMESTAMP);
385 }
386 *ptr++ = htonl(opts->tsval);
387 *ptr++ = htonl(opts->tsecr);
388 }
389
390 if (unlikely(opts->mss)) {
391 *ptr++ = htonl((TCPOPT_MSS << 24) |
392 (TCPOLEN_MSS << 16) |
393 opts->mss);
394 }
395
396 if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
397 !(OPTION_TS & opts->options))) {
354 *ptr++ = htonl((TCPOPT_NOP << 24) | 398 *ptr++ = htonl((TCPOPT_NOP << 24) |
355 (TCPOPT_NOP << 16) | 399 (TCPOPT_NOP << 16) |
356 (TCPOPT_TIMESTAMP << 8) | 400 (TCPOPT_SACK_PERM << 8) |
357 TCPOLEN_TIMESTAMP); 401 TCPOLEN_SACK_PERM);
358 *ptr++ = htonl(tstamp); 402 }
359 *ptr++ = htonl(tp->rx_opt.ts_recent); 403
404 if (unlikely(opts->ws)) {
405 *ptr++ = htonl((TCPOPT_NOP << 24) |
406 (TCPOPT_WINDOW << 16) |
407 (TCPOLEN_WINDOW << 8) |
408 opts->ws);
360 } 409 }
361 if (tp->rx_opt.eff_sacks) { 410
362 struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; 411 if (unlikely(opts->num_sack_blocks)) {
412 struct tcp_sack_block *sp = tp->rx_opt.dsack ?
413 tp->duplicate_sack : tp->selective_acks;
363 int this_sack; 414 int this_sack;
364 415
365 *ptr++ = htonl((TCPOPT_NOP << 24) | 416 *ptr++ = htonl((TCPOPT_NOP << 24) |
366 (TCPOPT_NOP << 16) | 417 (TCPOPT_NOP << 16) |
367 (TCPOPT_SACK << 8) | 418 (TCPOPT_SACK << 8) |
368 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * 419 (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
369 TCPOLEN_SACK_PERBLOCK))); 420 TCPOLEN_SACK_PERBLOCK)));
370 421
371 for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { 422 for (this_sack = 0; this_sack < opts->num_sack_blocks;
423 ++this_sack) {
372 *ptr++ = htonl(sp[this_sack].start_seq); 424 *ptr++ = htonl(sp[this_sack].start_seq);
373 *ptr++ = htonl(sp[this_sack].end_seq); 425 *ptr++ = htonl(sp[this_sack].end_seq);
374 } 426 }
@@ -378,81 +430,137 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
378 tp->rx_opt.eff_sacks--; 430 tp->rx_opt.eff_sacks--;
379 } 431 }
380 } 432 }
433}
434
435static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
436 struct tcp_out_options *opts,
437 struct tcp_md5sig_key **md5) {
438 struct tcp_sock *tp = tcp_sk(sk);
439 unsigned size = 0;
440
381#ifdef CONFIG_TCP_MD5SIG 441#ifdef CONFIG_TCP_MD5SIG
382 if (md5_hash) { 442 *md5 = tp->af_specific->md5_lookup(sk, sk);
383 *ptr++ = htonl((TCPOPT_NOP << 24) | 443 if (*md5) {
384 (TCPOPT_NOP << 16) | 444 opts->options |= OPTION_MD5;
385 (TCPOPT_MD5SIG << 8) | 445 size += TCPOLEN_MD5SIG_ALIGNED;
386 TCPOLEN_MD5SIG);
387 *md5_hash = (__u8 *)ptr;
388 } 446 }
447#else
448 *md5 = NULL;
389#endif 449#endif
450
451 /* We always get an MSS option. The option bytes which will be seen in
452 * normal data packets should timestamps be used, must be in the MSS
453 * advertised. But we subtract them from tp->mss_cache so that
454 * calculations in tcp_sendmsg are simpler etc. So account for this
455 * fact here if necessary. If we don't do this correctly, as a
456 * receiver we won't recognize data packets as being full sized when we
457 * should, and thus we won't abide by the delayed ACK rules correctly.
458 * SACKs don't matter, we never delay an ACK when we have any of those
459 * going out. */
460 opts->mss = tcp_advertise_mss(sk);
461 size += TCPOLEN_MSS_ALIGNED;
462
463 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
464 opts->options |= OPTION_TS;
465 opts->tsval = TCP_SKB_CB(skb)->when;
466 opts->tsecr = tp->rx_opt.ts_recent;
467 size += TCPOLEN_TSTAMP_ALIGNED;
468 }
469 if (likely(sysctl_tcp_window_scaling)) {
470 opts->ws = tp->rx_opt.rcv_wscale;
471 size += TCPOLEN_WSCALE_ALIGNED;
472 }
473 if (likely(sysctl_tcp_sack)) {
474 opts->options |= OPTION_SACK_ADVERTISE;
475 if (unlikely(!OPTION_TS & opts->options))
476 size += TCPOLEN_SACKPERM_ALIGNED;
477 }
478
479 return size;
390} 480}
391 481
392/* Construct a tcp options header for a SYN or SYN_ACK packet. 482static unsigned tcp_synack_options(struct sock *sk,
393 * If this is every changed make sure to change the definition of 483 struct request_sock *req,
394 * MAX_SYN_SIZE to match the new maximum number of options that you 484 unsigned mss, struct sk_buff *skb,
395 * can generate. 485 struct tcp_out_options *opts,
396 * 486 struct tcp_md5sig_key **md5) {
397 * Note - that with the RFC2385 TCP option, we make room for the 487 unsigned size = 0;
398 * 16 byte MD5 hash. This will be filled in later, so the pointer for the 488 struct inet_request_sock *ireq = inet_rsk(req);
399 * location to be filled is passed back up. 489 char doing_ts;
400 */ 490
401static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
402 int offer_wscale, int wscale, __u32 tstamp,
403 __u32 ts_recent, __u8 **md5_hash)
404{
405 /* We always get an MSS option.
406 * The option bytes which will be seen in normal data
407 * packets should timestamps be used, must be in the MSS
408 * advertised. But we subtract them from tp->mss_cache so
409 * that calculations in tcp_sendmsg are simpler etc.
410 * So account for this fact here if necessary. If we
411 * don't do this correctly, as a receiver we won't
412 * recognize data packets as being full sized when we
413 * should, and thus we won't abide by the delayed ACK
414 * rules correctly.
415 * SACKs don't matter, we never delay an ACK when we
416 * have any of those going out.
417 */
418 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
419 if (ts) {
420 if (sack)
421 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
422 (TCPOLEN_SACK_PERM << 16) |
423 (TCPOPT_TIMESTAMP << 8) |
424 TCPOLEN_TIMESTAMP);
425 else
426 *ptr++ = htonl((TCPOPT_NOP << 24) |
427 (TCPOPT_NOP << 16) |
428 (TCPOPT_TIMESTAMP << 8) |
429 TCPOLEN_TIMESTAMP);
430 *ptr++ = htonl(tstamp); /* TSVAL */
431 *ptr++ = htonl(ts_recent); /* TSECR */
432 } else if (sack)
433 *ptr++ = htonl((TCPOPT_NOP << 24) |
434 (TCPOPT_NOP << 16) |
435 (TCPOPT_SACK_PERM << 8) |
436 TCPOLEN_SACK_PERM);
437 if (offer_wscale)
438 *ptr++ = htonl((TCPOPT_NOP << 24) |
439 (TCPOPT_WINDOW << 16) |
440 (TCPOLEN_WINDOW << 8) |
441 (wscale));
442#ifdef CONFIG_TCP_MD5SIG 491#ifdef CONFIG_TCP_MD5SIG
443 /* 492 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
444 * If MD5 is enabled, then we set the option, and include the size 493 if (*md5) {
445 * (always 18). The actual MD5 hash is added just before the 494 opts->options |= OPTION_MD5;
446 * packet is sent. 495 size += TCPOLEN_MD5SIG_ALIGNED;
447 */
448 if (md5_hash) {
449 *ptr++ = htonl((TCPOPT_NOP << 24) |
450 (TCPOPT_NOP << 16) |
451 (TCPOPT_MD5SIG << 8) |
452 TCPOLEN_MD5SIG);
453 *md5_hash = (__u8 *)ptr;
454 } 496 }
497#else
498 *md5 = NULL;
455#endif 499#endif
500
501 /* we can't fit any SACK blocks in a packet with MD5 + TS
502 options. There was discussion about disabling SACK rather than TS in
503 order to fit in better with old, buggy kernels, but that was deemed
504 to be unnecessary. */
505 doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok);
506
507 opts->mss = mss;
508 size += TCPOLEN_MSS_ALIGNED;
509
510 if (likely(ireq->wscale_ok)) {
511 opts->ws = ireq->rcv_wscale;
512 size += TCPOLEN_WSCALE_ALIGNED;
513 }
514 if (likely(doing_ts)) {
515 opts->options |= OPTION_TS;
516 opts->tsval = TCP_SKB_CB(skb)->when;
517 opts->tsecr = req->ts_recent;
518 size += TCPOLEN_TSTAMP_ALIGNED;
519 }
520 if (likely(ireq->sack_ok)) {
521 opts->options |= OPTION_SACK_ADVERTISE;
522 if (unlikely(!doing_ts))
523 size += TCPOLEN_SACKPERM_ALIGNED;
524 }
525
526 return size;
527}
528
529static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
530 struct tcp_out_options *opts,
531 struct tcp_md5sig_key **md5) {
532 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
533 struct tcp_sock *tp = tcp_sk(sk);
534 unsigned size = 0;
535
536#ifdef CONFIG_TCP_MD5SIG
537 *md5 = tp->af_specific->md5_lookup(sk, sk);
538 if (unlikely(*md5)) {
539 opts->options |= OPTION_MD5;
540 size += TCPOLEN_MD5SIG_ALIGNED;
541 }
542#else
543 *md5 = NULL;
544#endif
545
546 if (likely(tp->rx_opt.tstamp_ok)) {
547 opts->options |= OPTION_TS;
548 opts->tsval = tcb ? tcb->when : 0;
549 opts->tsecr = tp->rx_opt.ts_recent;
550 size += TCPOLEN_TSTAMP_ALIGNED;
551 }
552
553 if (unlikely(tp->rx_opt.eff_sacks)) {
554 const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
555 opts->num_sack_blocks =
556 min_t(unsigned, tp->rx_opt.eff_sacks,
557 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
558 TCPOLEN_SACK_PERBLOCK);
559 size += TCPOLEN_SACK_BASE_ALIGNED +
560 opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
561 }
562
563 return size;
456} 564}
457 565
458/* This routine actually transmits TCP packets queued in by 566/* This routine actually transmits TCP packets queued in by
@@ -473,13 +581,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
473 struct inet_sock *inet; 581 struct inet_sock *inet;
474 struct tcp_sock *tp; 582 struct tcp_sock *tp;
475 struct tcp_skb_cb *tcb; 583 struct tcp_skb_cb *tcb;
476 int tcp_header_size; 584 struct tcp_out_options opts;
477#ifdef CONFIG_TCP_MD5SIG 585 unsigned tcp_options_size, tcp_header_size;
478 struct tcp_md5sig_key *md5; 586 struct tcp_md5sig_key *md5;
479 __u8 *md5_hash_location; 587 __u8 *md5_hash_location;
480#endif
481 struct tcphdr *th; 588 struct tcphdr *th;
482 int sysctl_flags;
483 int err; 589 int err;
484 590
485 BUG_ON(!skb || !tcp_skb_pcount(skb)); 591 BUG_ON(!skb || !tcp_skb_pcount(skb));
@@ -502,50 +608,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
502 inet = inet_sk(sk); 608 inet = inet_sk(sk);
503 tp = tcp_sk(sk); 609 tp = tcp_sk(sk);
504 tcb = TCP_SKB_CB(skb); 610 tcb = TCP_SKB_CB(skb);
505 tcp_header_size = tp->tcp_header_len; 611 memset(&opts, 0, sizeof(opts));
506
507#define SYSCTL_FLAG_TSTAMPS 0x1
508#define SYSCTL_FLAG_WSCALE 0x2
509#define SYSCTL_FLAG_SACK 0x4
510 612
511 sysctl_flags = 0; 613 if (unlikely(tcb->flags & TCPCB_FLAG_SYN))
512 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { 614 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
513 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; 615 else
514 if (sysctl_tcp_timestamps) { 616 tcp_options_size = tcp_established_options(sk, skb, &opts,
515 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; 617 &md5);
516 sysctl_flags |= SYSCTL_FLAG_TSTAMPS; 618 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
517 }
518 if (sysctl_tcp_window_scaling) {
519 tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
520 sysctl_flags |= SYSCTL_FLAG_WSCALE;
521 }
522 if (sysctl_tcp_sack) {
523 sysctl_flags |= SYSCTL_FLAG_SACK;
524 if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
525 tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
526 }
527 } else if (unlikely(tp->rx_opt.eff_sacks)) {
528 /* A SACK is 2 pad bytes, a 2 byte header, plus
529 * 2 32-bit sequence numbers for each SACK block.
530 */
531 tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
532 (tp->rx_opt.eff_sacks *
533 TCPOLEN_SACK_PERBLOCK));
534 }
535 619
536 if (tcp_packets_in_flight(tp) == 0) 620 if (tcp_packets_in_flight(tp) == 0)
537 tcp_ca_event(sk, CA_EVENT_TX_START); 621 tcp_ca_event(sk, CA_EVENT_TX_START);
538 622
539#ifdef CONFIG_TCP_MD5SIG
540 /*
541 * Are we doing MD5 on this segment? If so - make
542 * room for it.
543 */
544 md5 = tp->af_specific->md5_lookup(sk, sk);
545 if (md5)
546 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
547#endif
548
549 skb_push(skb, tcp_header_size); 623 skb_push(skb, tcp_header_size);
550 skb_reset_transport_header(skb); 624 skb_reset_transport_header(skb);
551 skb_set_owner_w(skb, sk); 625 skb_set_owner_w(skb, sk);
@@ -576,39 +650,16 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
576 th->urg = 1; 650 th->urg = 1;
577 } 651 }
578 652
579 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { 653 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
580 tcp_syn_build_options((__be32 *)(th + 1), 654 if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
581 tcp_advertise_mss(sk),
582 (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
583 (sysctl_flags & SYSCTL_FLAG_SACK),
584 (sysctl_flags & SYSCTL_FLAG_WSCALE),
585 tp->rx_opt.rcv_wscale,
586 tcb->when,
587 tp->rx_opt.ts_recent,
588
589#ifdef CONFIG_TCP_MD5SIG
590 md5 ? &md5_hash_location :
591#endif
592 NULL);
593 } else {
594 tcp_build_and_update_options((__be32 *)(th + 1),
595 tp, tcb->when,
596#ifdef CONFIG_TCP_MD5SIG
597 md5 ? &md5_hash_location :
598#endif
599 NULL);
600 TCP_ECN_send(sk, skb, tcp_header_size); 655 TCP_ECN_send(sk, skb, tcp_header_size);
601 }
602 656
603#ifdef CONFIG_TCP_MD5SIG 657#ifdef CONFIG_TCP_MD5SIG
604 /* Calculate the MD5 hash, as we have all we need now */ 658 /* Calculate the MD5 hash, as we have all we need now */
605 if (md5) { 659 if (md5) {
660 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
606 tp->af_specific->calc_md5_hash(md5_hash_location, 661 tp->af_specific->calc_md5_hash(md5_hash_location,
607 md5, 662 md5, sk, NULL, skb);
608 sk, NULL, NULL,
609 tcp_hdr(skb),
610 sk->sk_protocol,
611 skb->len);
612 } 663 }
613#endif 664#endif
614 665
@@ -621,7 +672,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
621 tcp_event_data_sent(tp, skb, sk); 672 tcp_event_data_sent(tp, skb, sk);
622 673
623 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) 674 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
624 TCP_INC_STATS(TCP_MIB_OUTSEGS); 675 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
625 676
626 err = icsk->icsk_af_ops->queue_xmit(skb, 0); 677 err = icsk->icsk_af_ops->queue_xmit(skb, 0);
627 if (likely(err <= 0)) 678 if (likely(err <= 0))
@@ -630,10 +681,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
630 tcp_enter_cwr(sk, 1); 681 tcp_enter_cwr(sk, 1);
631 682
632 return net_xmit_eval(err); 683 return net_xmit_eval(err);
633
634#undef SYSCTL_FLAG_TSTAMPS
635#undef SYSCTL_FLAG_WSCALE
636#undef SYSCTL_FLAG_SACK
637} 684}
638 685
639/* This routine just queue's the buffer 686/* This routine just queue's the buffer
@@ -974,6 +1021,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
974 u32 mss_now; 1021 u32 mss_now;
975 u16 xmit_size_goal; 1022 u16 xmit_size_goal;
976 int doing_tso = 0; 1023 int doing_tso = 0;
1024 unsigned header_len;
1025 struct tcp_out_options opts;
1026 struct tcp_md5sig_key *md5;
977 1027
978 mss_now = tp->mss_cache; 1028 mss_now = tp->mss_cache;
979 1029
@@ -986,14 +1036,16 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
986 mss_now = tcp_sync_mss(sk, mtu); 1036 mss_now = tcp_sync_mss(sk, mtu);
987 } 1037 }
988 1038
989 if (tp->rx_opt.eff_sacks) 1039 header_len = tcp_established_options(sk, NULL, &opts, &md5) +
990 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + 1040 sizeof(struct tcphdr);
991 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); 1041 /* The mss_cache is sized based on tp->tcp_header_len, which assumes
992 1042 * some common options. If this is an odd packet (because we have SACK
993#ifdef CONFIG_TCP_MD5SIG 1043 * blocks etc) then our calculated header_len will be different, and
994 if (tp->af_specific->md5_lookup(sk, sk)) 1044 * we have to adjust mss_now correspondingly */
995 mss_now -= TCPOLEN_MD5SIG_ALIGNED; 1045 if (header_len != tp->tcp_header_len) {
996#endif 1046 int delta = (int) header_len - tp->tcp_header_len;
1047 mss_now -= delta;
1048 }
997 1049
998 xmit_size_goal = mss_now; 1050 xmit_size_goal = mss_now;
999 1051
@@ -1913,7 +1965,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1913 1965
1914 if (err == 0) { 1966 if (err == 0) {
1915 /* Update global TCP statistics. */ 1967 /* Update global TCP statistics. */
1916 TCP_INC_STATS(TCP_MIB_RETRANSSEGS); 1968 TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
1917 1969
1918 tp->total_retrans++; 1970 tp->total_retrans++;
1919 1971
@@ -1988,14 +2040,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1988 2040
1989 if (sacked & TCPCB_LOST) { 2041 if (sacked & TCPCB_LOST) {
1990 if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { 2042 if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
2043 int mib_idx;
2044
1991 if (tcp_retransmit_skb(sk, skb)) { 2045 if (tcp_retransmit_skb(sk, skb)) {
1992 tp->retransmit_skb_hint = NULL; 2046 tp->retransmit_skb_hint = NULL;
1993 return; 2047 return;
1994 } 2048 }
1995 if (icsk->icsk_ca_state != TCP_CA_Loss) 2049 if (icsk->icsk_ca_state != TCP_CA_Loss)
1996 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); 2050 mib_idx = LINUX_MIB_TCPFASTRETRANS;
1997 else 2051 else
1998 NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); 2052 mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
2053 NET_INC_STATS_BH(sock_net(sk), mib_idx);
1999 2054
2000 if (skb == tcp_write_queue_head(sk)) 2055 if (skb == tcp_write_queue_head(sk))
2001 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 2056 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -2065,7 +2120,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2065 inet_csk(sk)->icsk_rto, 2120 inet_csk(sk)->icsk_rto,
2066 TCP_RTO_MAX); 2121 TCP_RTO_MAX);
2067 2122
2068 NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); 2123 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS);
2069 } 2124 }
2070} 2125}
2071 2126
@@ -2119,7 +2174,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2119 /* NOTE: No TCP options attached and we never retransmit this. */ 2174 /* NOTE: No TCP options attached and we never retransmit this. */
2120 skb = alloc_skb(MAX_TCP_HEADER, priority); 2175 skb = alloc_skb(MAX_TCP_HEADER, priority);
2121 if (!skb) { 2176 if (!skb) {
2122 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2177 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2123 return; 2178 return;
2124 } 2179 }
2125 2180
@@ -2130,9 +2185,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2130 /* Send it off. */ 2185 /* Send it off. */
2131 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2186 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2132 if (tcp_transmit_skb(sk, skb, 0, priority)) 2187 if (tcp_transmit_skb(sk, skb, 0, priority))
2133 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2188 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2134 2189
2135 TCP_INC_STATS(TCP_MIB_OUTRSTS); 2190 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2136} 2191}
2137 2192
2138/* WARNING: This routine must only be called when we have already sent 2193/* WARNING: This routine must only be called when we have already sent
@@ -2180,11 +2235,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2180 struct tcp_sock *tp = tcp_sk(sk); 2235 struct tcp_sock *tp = tcp_sk(sk);
2181 struct tcphdr *th; 2236 struct tcphdr *th;
2182 int tcp_header_size; 2237 int tcp_header_size;
2238 struct tcp_out_options opts;
2183 struct sk_buff *skb; 2239 struct sk_buff *skb;
2184#ifdef CONFIG_TCP_MD5SIG
2185 struct tcp_md5sig_key *md5; 2240 struct tcp_md5sig_key *md5;
2186 __u8 *md5_hash_location; 2241 __u8 *md5_hash_location;
2187#endif
2188 2242
2189 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); 2243 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
2190 if (skb == NULL) 2244 if (skb == NULL)
@@ -2195,18 +2249,27 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2195 2249
2196 skb->dst = dst_clone(dst); 2250 skb->dst = dst_clone(dst);
2197 2251
2198 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + 2252 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
2199 (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + 2253 __u8 rcv_wscale;
2200 (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + 2254 /* Set this up on the first call only */
2201 /* SACK_PERM is in the place of NOP NOP of TS */ 2255 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2202 ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); 2256 /* tcp_full_space because it is guaranteed to be the first packet */
2257 tcp_select_initial_window(tcp_full_space(sk),
2258 dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2259 &req->rcv_wnd,
2260 &req->window_clamp,
2261 ireq->wscale_ok,
2262 &rcv_wscale);
2263 ireq->rcv_wscale = rcv_wscale;
2264 }
2265
2266 memset(&opts, 0, sizeof(opts));
2267 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2268 tcp_header_size = tcp_synack_options(sk, req,
2269 dst_metric(dst, RTAX_ADVMSS),
2270 skb, &opts, &md5) +
2271 sizeof(struct tcphdr);
2203 2272
2204#ifdef CONFIG_TCP_MD5SIG
2205 /* Are we doing MD5 on this segment? If so - make room for it */
2206 md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
2207 if (md5)
2208 tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
2209#endif
2210 skb_push(skb, tcp_header_size); 2273 skb_push(skb, tcp_header_size);
2211 skb_reset_transport_header(skb); 2274 skb_reset_transport_header(skb);
2212 2275
@@ -2224,19 +2287,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2224 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); 2287 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
2225 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2288 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2226 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); 2289 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2227 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
2228 __u8 rcv_wscale;
2229 /* Set this up on the first call only */
2230 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2231 /* tcp_full_space because it is guaranteed to be the first packet */
2232 tcp_select_initial_window(tcp_full_space(sk),
2233 dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
2234 &req->rcv_wnd,
2235 &req->window_clamp,
2236 ireq->wscale_ok,
2237 &rcv_wscale);
2238 ireq->rcv_wscale = rcv_wscale;
2239 }
2240 2290
2241 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 2291 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
2242 th->window = htons(min(req->rcv_wnd, 65535U)); 2292 th->window = htons(min(req->rcv_wnd, 65535U));
@@ -2245,29 +2295,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2245 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); 2295 TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
2246 else 2296 else
2247#endif 2297#endif
2248 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2298 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
2249 tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
2250 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
2251 TCP_SKB_CB(skb)->when,
2252 req->ts_recent,
2253 (
2254#ifdef CONFIG_TCP_MD5SIG
2255 md5 ? &md5_hash_location :
2256#endif
2257 NULL)
2258 );
2259
2260 th->doff = (tcp_header_size >> 2); 2299 th->doff = (tcp_header_size >> 2);
2261 TCP_INC_STATS(TCP_MIB_OUTSEGS); 2300 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
2262 2301
2263#ifdef CONFIG_TCP_MD5SIG 2302#ifdef CONFIG_TCP_MD5SIG
2264 /* Okay, we have all we need - do the md5 hash if needed */ 2303 /* Okay, we have all we need - do the md5 hash if needed */
2265 if (md5) { 2304 if (md5) {
2266 tp->af_specific->calc_md5_hash(md5_hash_location, 2305 tp->af_specific->calc_md5_hash(md5_hash_location,
2267 md5, 2306 md5, NULL, req, skb);
2268 NULL, dst, req,
2269 tcp_hdr(skb), sk->sk_protocol,
2270 skb->len);
2271 } 2307 }
2272#endif 2308#endif
2273 2309
@@ -2367,7 +2403,7 @@ int tcp_connect(struct sock *sk)
2367 */ 2403 */
2368 tp->snd_nxt = tp->write_seq; 2404 tp->snd_nxt = tp->write_seq;
2369 tp->pushed_seq = tp->write_seq; 2405 tp->pushed_seq = tp->write_seq;
2370 TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); 2406 TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
2371 2407
2372 /* Timer for repeating the SYN until an answer. */ 2408 /* Timer for repeating the SYN until an answer. */
2373 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 2409 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 63ed9d6830e7..328e0cf42b3c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * Implementation of the Transmission Control Protocol(TCP). 6 * Implementation of the Transmission Control Protocol(TCP).
7 * 7 *
8 * Version: $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 10 * Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -50,7 +48,7 @@ static void tcp_write_err(struct sock *sk)
50 sk->sk_error_report(sk); 48 sk->sk_error_report(sk);
51 49
52 tcp_done(sk); 50 tcp_done(sk);
53 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); 51 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
54} 52}
55 53
56/* Do not allow orphaned sockets to eat all our resources. 54/* Do not allow orphaned sockets to eat all our resources.
@@ -91,7 +89,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
91 if (do_reset) 89 if (do_reset)
92 tcp_send_active_reset(sk, GFP_ATOMIC); 90 tcp_send_active_reset(sk, GFP_ATOMIC);
93 tcp_done(sk); 91 tcp_done(sk);
94 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); 92 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
95 return 1; 93 return 1;
96 } 94 }
97 return 0; 95 return 0;
@@ -181,7 +179,7 @@ static void tcp_delack_timer(unsigned long data)
181 if (sock_owned_by_user(sk)) { 179 if (sock_owned_by_user(sk)) {
182 /* Try again later. */ 180 /* Try again later. */
183 icsk->icsk_ack.blocked = 1; 181 icsk->icsk_ack.blocked = 1;
184 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); 182 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
185 sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); 183 sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
186 goto out_unlock; 184 goto out_unlock;
187 } 185 }
@@ -200,7 +198,7 @@ static void tcp_delack_timer(unsigned long data)
200 if (!skb_queue_empty(&tp->ucopy.prequeue)) { 198 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
201 struct sk_buff *skb; 199 struct sk_buff *skb;
202 200
203 NET_INC_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED); 201 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
204 202
205 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) 203 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
206 sk->sk_backlog_rcv(sk, skb); 204 sk->sk_backlog_rcv(sk, skb);
@@ -220,7 +218,7 @@ static void tcp_delack_timer(unsigned long data)
220 icsk->icsk_ack.ato = TCP_ATO_MIN; 218 icsk->icsk_ack.ato = TCP_ATO_MIN;
221 } 219 }
222 tcp_send_ack(sk); 220 tcp_send_ack(sk);
223 NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); 221 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
224 } 222 }
225 TCP_CHECK_TIMER(sk); 223 TCP_CHECK_TIMER(sk);
226 224
@@ -328,24 +326,27 @@ static void tcp_retransmit_timer(struct sock *sk)
328 goto out; 326 goto out;
329 327
330 if (icsk->icsk_retransmits == 0) { 328 if (icsk->icsk_retransmits == 0) {
329 int mib_idx;
330
331 if (icsk->icsk_ca_state == TCP_CA_Disorder || 331 if (icsk->icsk_ca_state == TCP_CA_Disorder ||
332 icsk->icsk_ca_state == TCP_CA_Recovery) { 332 icsk->icsk_ca_state == TCP_CA_Recovery) {
333 if (tcp_is_sack(tp)) { 333 if (tcp_is_sack(tp)) {
334 if (icsk->icsk_ca_state == TCP_CA_Recovery) 334 if (icsk->icsk_ca_state == TCP_CA_Recovery)
335 NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); 335 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
336 else 336 else
337 NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); 337 mib_idx = LINUX_MIB_TCPSACKFAILURES;
338 } else { 338 } else {
339 if (icsk->icsk_ca_state == TCP_CA_Recovery) 339 if (icsk->icsk_ca_state == TCP_CA_Recovery)
340 NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); 340 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
341 else 341 else
342 NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); 342 mib_idx = LINUX_MIB_TCPRENOFAILURES;
343 } 343 }
344 } else if (icsk->icsk_ca_state == TCP_CA_Loss) { 344 } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
345 NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); 345 mib_idx = LINUX_MIB_TCPLOSSFAILURES;
346 } else { 346 } else {
347 NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); 347 mib_idx = LINUX_MIB_TCPTIMEOUTS;
348 } 348 }
349 NET_INC_STATS_BH(sock_net(sk), mib_idx);
349 } 350 }
350 351
351 if (tcp_use_frto(sk)) { 352 if (tcp_use_frto(sk)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 56fcda3694ba..a751770947a3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -5,8 +5,6 @@
5 * 5 *
6 * The User Datagram Protocol (UDP). 6 * The User Datagram Protocol (UDP).
7 * 7 *
8 * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $
9 *
10 * Authors: Ross Biro 8 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
@@ -110,9 +108,6 @@
110 * Snmp MIB for the UDP layer 108 * Snmp MIB for the UDP layer
111 */ 109 */
112 110
113DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
114EXPORT_SYMBOL(udp_statistics);
115
116DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; 111DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
117EXPORT_SYMBOL(udp_stats_in6); 112EXPORT_SYMBOL(udp_stats_in6);
118 113
@@ -136,7 +131,7 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
136 struct sock *sk; 131 struct sock *sk;
137 struct hlist_node *node; 132 struct hlist_node *node;
138 133
139 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) 134 sk_for_each(sk, node, &udptable[udp_hashfn(net, num)])
140 if (net_eq(sock_net(sk), net) && sk->sk_hash == num) 135 if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
141 return 1; 136 return 1;
142 return 0; 137 return 0;
@@ -176,7 +171,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
176 for (i = 0; i < UDP_HTABLE_SIZE; i++) { 171 for (i = 0; i < UDP_HTABLE_SIZE; i++) {
177 int size = 0; 172 int size = 0;
178 173
179 head = &udptable[rover & (UDP_HTABLE_SIZE - 1)]; 174 head = &udptable[udp_hashfn(net, rover)];
180 if (hlist_empty(head)) 175 if (hlist_empty(head))
181 goto gotit; 176 goto gotit;
182 177
@@ -213,7 +208,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
213gotit: 208gotit:
214 snum = rover; 209 snum = rover;
215 } else { 210 } else {
216 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; 211 head = &udptable[udp_hashfn(net, snum)];
217 212
218 sk_for_each(sk2, node, head) 213 sk_for_each(sk2, node, head)
219 if (sk2->sk_hash == snum && 214 if (sk2->sk_hash == snum &&
@@ -229,7 +224,7 @@ gotit:
229 inet_sk(sk)->num = snum; 224 inet_sk(sk)->num = snum;
230 sk->sk_hash = snum; 225 sk->sk_hash = snum;
231 if (sk_unhashed(sk)) { 226 if (sk_unhashed(sk)) {
232 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; 227 head = &udptable[udp_hashfn(net, snum)];
233 sk_add_node(sk, head); 228 sk_add_node(sk, head);
234 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 229 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
235 } 230 }
@@ -266,7 +261,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
266 int badness = -1; 261 int badness = -1;
267 262
268 read_lock(&udp_hash_lock); 263 read_lock(&udp_hash_lock);
269 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { 264 sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
270 struct inet_sock *inet = inet_sk(sk); 265 struct inet_sock *inet = inet_sk(sk);
271 266
272 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && 267 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -356,11 +351,12 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
356 struct sock *sk; 351 struct sock *sk;
357 int harderr; 352 int harderr;
358 int err; 353 int err;
354 struct net *net = dev_net(skb->dev);
359 355
360 sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest, 356 sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
361 iph->saddr, uh->source, skb->dev->ifindex, udptable); 357 iph->saddr, uh->source, skb->dev->ifindex, udptable);
362 if (sk == NULL) { 358 if (sk == NULL) {
363 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 359 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
364 return; /* No socket for error */ 360 return; /* No socket for error */
365 } 361 }
366 362
@@ -528,7 +524,8 @@ out:
528 up->len = 0; 524 up->len = 0;
529 up->pending = 0; 525 up->pending = 0;
530 if (!err) 526 if (!err)
531 UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); 527 UDP_INC_STATS_USER(sock_net(sk),
528 UDP_MIB_OUTDATAGRAMS, is_udplite);
532 return err; 529 return err;
533} 530}
534 531
@@ -656,11 +653,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
656 .uli_u = { .ports = 653 .uli_u = { .ports =
657 { .sport = inet->sport, 654 { .sport = inet->sport,
658 .dport = dport } } }; 655 .dport = dport } } };
656 struct net *net = sock_net(sk);
657
659 security_sk_classify_flow(sk, &fl); 658 security_sk_classify_flow(sk, &fl);
660 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); 659 err = ip_route_output_flow(net, &rt, &fl, sk, 1);
661 if (err) { 660 if (err) {
662 if (err == -ENETUNREACH) 661 if (err == -ENETUNREACH)
663 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 662 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
664 goto out; 663 goto out;
665 } 664 }
666 665
@@ -727,7 +726,8 @@ out:
727 * seems like overkill. 726 * seems like overkill.
728 */ 727 */
729 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 728 if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
730 UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); 729 UDP_INC_STATS_USER(sock_net(sk),
730 UDP_MIB_SNDBUFERRORS, is_udplite);
731 } 731 }
732 return err; 732 return err;
733 733
@@ -890,7 +890,8 @@ try_again:
890 goto out_free; 890 goto out_free;
891 891
892 if (!peeked) 892 if (!peeked)
893 UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); 893 UDP_INC_STATS_USER(sock_net(sk),
894 UDP_MIB_INDATAGRAMS, is_udplite);
894 895
895 sock_recv_timestamp(msg, sk, skb); 896 sock_recv_timestamp(msg, sk, skb);
896 897
@@ -919,7 +920,7 @@ out:
919csum_copy_err: 920csum_copy_err:
920 lock_sock(sk); 921 lock_sock(sk);
921 if (!skb_kill_datagram(sk, skb, flags)) 922 if (!skb_kill_datagram(sk, skb, flags))
922 UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); 923 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
923 release_sock(sk); 924 release_sock(sk);
924 925
925 if (noblock) 926 if (noblock)
@@ -990,7 +991,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
990 991
991 ret = (*up->encap_rcv)(sk, skb); 992 ret = (*up->encap_rcv)(sk, skb);
992 if (ret <= 0) { 993 if (ret <= 0) {
993 UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, 994 UDP_INC_STATS_BH(sock_net(sk),
995 UDP_MIB_INDATAGRAMS,
994 is_udplite); 996 is_udplite);
995 return -ret; 997 return -ret;
996 } 998 }
@@ -1042,15 +1044,18 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1042 1044
1043 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { 1045 if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
1044 /* Note that an ENOMEM error is charged twice */ 1046 /* Note that an ENOMEM error is charged twice */
1045 if (rc == -ENOMEM) 1047 if (rc == -ENOMEM) {
1046 UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); 1048 UDP_INC_STATS_BH(sock_net(sk),
1049 UDP_MIB_RCVBUFERRORS, is_udplite);
1050 atomic_inc(&sk->sk_drops);
1051 }
1047 goto drop; 1052 goto drop;
1048 } 1053 }
1049 1054
1050 return 0; 1055 return 0;
1051 1056
1052drop: 1057drop:
1053 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); 1058 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1054 kfree_skb(skb); 1059 kfree_skb(skb);
1055 return -1; 1060 return -1;
1056} 1061}
@@ -1061,7 +1066,7 @@ drop:
1061 * Note: called only from the BH handler context, 1066 * Note: called only from the BH handler context,
1062 * so we don't need to lock the hashes. 1067 * so we don't need to lock the hashes.
1063 */ 1068 */
1064static int __udp4_lib_mcast_deliver(struct sk_buff *skb, 1069static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1065 struct udphdr *uh, 1070 struct udphdr *uh,
1066 __be32 saddr, __be32 daddr, 1071 __be32 saddr, __be32 daddr,
1067 struct hlist_head udptable[]) 1072 struct hlist_head udptable[])
@@ -1070,7 +1075,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
1070 int dif; 1075 int dif;
1071 1076
1072 read_lock(&udp_hash_lock); 1077 read_lock(&udp_hash_lock);
1073 sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); 1078 sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
1074 dif = skb->dev->ifindex; 1079 dif = skb->dev->ifindex;
1075 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); 1080 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
1076 if (sk) { 1081 if (sk) {
@@ -1158,6 +1163,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1158 struct rtable *rt = (struct rtable*)skb->dst; 1163 struct rtable *rt = (struct rtable*)skb->dst;
1159 __be32 saddr = ip_hdr(skb)->saddr; 1164 __be32 saddr = ip_hdr(skb)->saddr;
1160 __be32 daddr = ip_hdr(skb)->daddr; 1165 __be32 daddr = ip_hdr(skb)->daddr;
1166 struct net *net = dev_net(skb->dev);
1161 1167
1162 /* 1168 /*
1163 * Validate the packet. 1169 * Validate the packet.
@@ -1180,9 +1186,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1180 goto csum_error; 1186 goto csum_error;
1181 1187
1182 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1188 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1183 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); 1189 return __udp4_lib_mcast_deliver(net, skb, uh,
1190 saddr, daddr, udptable);
1184 1191
1185 sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr, 1192 sk = __udp4_lib_lookup(net, saddr, uh->source, daddr,
1186 uh->dest, inet_iif(skb), udptable); 1193 uh->dest, inet_iif(skb), udptable);
1187 1194
1188 if (sk != NULL) { 1195 if (sk != NULL) {
@@ -1211,7 +1218,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1211 if (udp_lib_checksum_complete(skb)) 1218 if (udp_lib_checksum_complete(skb))
1212 goto csum_error; 1219 goto csum_error;
1213 1220
1214 UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); 1221 UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
1215 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 1222 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1216 1223
1217 /* 1224 /*
@@ -1245,7 +1252,7 @@ csum_error:
1245 ntohs(uh->dest), 1252 ntohs(uh->dest),
1246 ulen); 1253 ulen);
1247drop: 1254drop:
1248 UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 1255 UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
1249 kfree_skb(skb); 1256 kfree_skb(skb);
1250 return 0; 1257 return 0;
1251} 1258}
@@ -1255,12 +1262,11 @@ int udp_rcv(struct sk_buff *skb)
1255 return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); 1262 return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
1256} 1263}
1257 1264
1258int udp_destroy_sock(struct sock *sk) 1265void udp_destroy_sock(struct sock *sk)
1259{ 1266{
1260 lock_sock(sk); 1267 lock_sock(sk);
1261 udp_flush_pending_frames(sk); 1268 udp_flush_pending_frames(sk);
1262 release_sock(sk); 1269 release_sock(sk);
1263 return 0;
1264} 1270}
1265 1271
1266/* 1272/*
@@ -1453,7 +1459,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1453 spin_lock_bh(&rcvq->lock); 1459 spin_lock_bh(&rcvq->lock);
1454 while ((skb = skb_peek(rcvq)) != NULL && 1460 while ((skb = skb_peek(rcvq)) != NULL &&
1455 udp_lib_checksum_complete(skb)) { 1461 udp_lib_checksum_complete(skb)) {
1456 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); 1462 UDP_INC_STATS_BH(sock_net(sk),
1463 UDP_MIB_INERRORS, is_lite);
1457 __skb_unlink(skb, rcvq); 1464 __skb_unlink(skb, rcvq);
1458 kfree_skb(skb); 1465 kfree_skb(skb);
1459 } 1466 }
@@ -1629,12 +1636,13 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
1629 __u16 srcp = ntohs(inet->sport); 1636 __u16 srcp = ntohs(inet->sport);
1630 1637
1631 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 1638 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
1632 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p%n", 1639 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
1633 bucket, src, srcp, dest, destp, sp->sk_state, 1640 bucket, src, srcp, dest, destp, sp->sk_state,
1634 atomic_read(&sp->sk_wmem_alloc), 1641 atomic_read(&sp->sk_wmem_alloc),
1635 atomic_read(&sp->sk_rmem_alloc), 1642 atomic_read(&sp->sk_rmem_alloc),
1636 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), 1643 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
1637 atomic_read(&sp->sk_refcnt), sp, len); 1644 atomic_read(&sp->sk_refcnt), sp,
1645 atomic_read(&sp->sk_drops), len);
1638} 1646}
1639 1647
1640int udp4_seq_show(struct seq_file *seq, void *v) 1648int udp4_seq_show(struct seq_file *seq, void *v)
@@ -1643,7 +1651,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
1643 seq_printf(seq, "%-127s\n", 1651 seq_printf(seq, "%-127s\n",
1644 " sl local_address rem_address st tx_queue " 1652 " sl local_address rem_address st tx_queue "
1645 "rx_queue tr tm->when retrnsmt uid timeout " 1653 "rx_queue tr tm->when retrnsmt uid timeout "
1646 "inode"); 1654 "inode ref pointer drops");
1647 else { 1655 else {
1648 struct udp_iter_state *state = seq->private; 1656 struct udp_iter_state *state = seq->private;
1649 int len; 1657 int len;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 7288bf7977fb..2e9bad2fa1bc 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -26,7 +26,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
26extern int udp_sendpage(struct sock *sk, struct page *page, int offset, 26extern int udp_sendpage(struct sock *sk, struct page *page, int offset,
27 size_t size, int flags); 27 size_t size, int flags);
28extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); 28extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
29extern int udp_destroy_sock(struct sock *sk); 29extern void udp_destroy_sock(struct sock *sk);
30 30
31#ifdef CONFIG_PROC_FS 31#ifdef CONFIG_PROC_FS
32extern int udp4_seq_show(struct seq_file *seq, void *v); 32extern int udp4_seq_show(struct seq_file *seq, void *v);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 72ce26b6c4d3..3c807964da96 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). 2 * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828).
3 * 3 *
4 * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $
5 *
6 * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> 4 * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk>
7 * 5 *
8 * Changes: 6 * Changes:
@@ -13,7 +11,6 @@
13 * 2 of the License, or (at your option) any later version. 11 * 2 of the License, or (at your option) any later version.
14 */ 12 */
15#include "udp_impl.h" 13#include "udp_impl.h"
16DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly;
17 14
18struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; 15struct hlist_head udplite_hash[UDP_HTABLE_SIZE];
19 16