aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2010-04-20 10:02:01 -0400
committerPatrick McHardy <kaber@trash.net>2010-04-20 10:02:01 -0400
commit62910554656cdcd6b6f84a5154c4155aae4ca231 (patch)
treedcf14004f6fd2ef7154362ff948bfeba0f3ea92d /net/ipv4
parent22265a5c3c103cf8c50be62e6c90d045eb649e6d (diff)
parentab9304717f7624c41927f442e6b6d418b2d8b3e4 (diff)
Merge branch 'master' of /repos/git/net-next-2.6
Conflicts: Documentation/feature-removal-schedule.txt net/ipv6/netfilter/ip6t_REJECT.c net/netfilter/xt_limit.c Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig14
-rw-r--r--net/ipv4/af_inet.c41
-rw-r--r--net/ipv4/ah4.c1
-rw-r--r--net/ipv4/arp.c1
-rw-r--r--net/ipv4/cipso_ipv4.c1
-rw-r--r--net/ipv4/devinet.c7
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fib_hash.c1
-rw-r--r--net/ipv4/fib_rules.c22
-rw-r--r--net/ipv4/fib_semantics.c1
-rw-r--r--net/ipv4/fib_trie.c5
-rw-r--r--net/ipv4/icmp.c6
-rw-r--r--net/ipv4/igmp.c5
-rw-r--r--net/ipv4/inet_diag.c1
-rw-r--r--net/ipv4/inet_fragment.c1
-rw-r--r--net/ipv4/inet_timewait_sock.c1
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_fragment.c1
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/ip_input.c1
-rw-r--r--net/ipv4/ip_options.c1
-rw-r--r--net/ipv4/ip_output.c5
-rw-r--r--net/ipv4/ip_sockglue.c5
-rw-r--r--net/ipv4/ipconfig.c3
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/ipmr.c829
-rw-r--r--net/ipv4/netfilter.c1
-rw-r--r--net/ipv4/netfilter/arptable_filter.c1
-rw-r--r--net/ipv4/netfilter/ip_queue.c1
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c5
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c1
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c1
-rw-r--r--net/ipv4/netfilter/iptable_filter.c1
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c1
-rw-r--r--net/ipv4/netfilter/iptable_raw.c1
-rw-r--r--net/ipv4/netfilter/iptable_security.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c1
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/route.c39
-rw-r--r--net/ipv4/sysctl_net_ipv4.c1
-rw-r--r--net/ipv4/tcp.c67
-rw-r--r--net/ipv4/tcp_cong.c1
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv4/tcp_ipv4.c39
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_probe.c1
-rw-r--r--net/ipv4/tcp_timer.c5
-rw-r--r--net/ipv4/tunnel4.c1
-rw-r--r--net/ipv4/udp.c12
-rw-r--r--net/ipv4/xfrm4_input.c1
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c1
-rw-r--r--net/ipv4/xfrm4_policy.c22
58 files changed, 798 insertions, 392 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c9a1c68767ff..8e3a1fd938ab 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -250,6 +250,20 @@ config IP_MROUTE
250 <file:Documentation/networking/multicast.txt>. If you haven't heard 250 <file:Documentation/networking/multicast.txt>. If you haven't heard
251 about it, you don't need it. 251 about it, you don't need it.
252 252
253config IP_MROUTE_MULTIPLE_TABLES
254 bool "IP: multicast policy routing"
255 depends on IP_MROUTE && IP_ADVANCED_ROUTER
256 select FIB_RULES
257 help
258 Normally, a multicast router runs a userspace daemon and decides
259 what to do with a multicast packet based on the source and
260 destination addresses. If you say Y here, the multicast router
261 will also be able to take interfaces and packet marks into
262 account and run multiple instances of userspace daemons
263 simultaneously, each one handling a single table.
264
265 If unsure, say N.
266
253config IP_PIMSM_V1 267config IP_PIMSM_V1
254 bool "IP: PIM-SM version 1 support" 268 bool "IP: PIM-SM version 1 support"
255 depends on IP_MROUTE 269 depends on IP_MROUTE
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 33b7dffa7732..c5376c725503 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -86,6 +86,7 @@
86#include <linux/poll.h> 86#include <linux/poll.h>
87#include <linux/netfilter_ipv4.h> 87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h> 88#include <linux/random.h>
89#include <linux/slab.h>
89 90
90#include <asm/uaccess.h> 91#include <asm/uaccess.h>
91#include <asm/system.h> 92#include <asm/system.h>
@@ -153,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
153 WARN_ON(sk->sk_forward_alloc); 154 WARN_ON(sk->sk_forward_alloc);
154 155
155 kfree(inet->opt); 156 kfree(inet->opt);
156 dst_release(sk->sk_dst_cache); 157 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
157 sk_refcnt_debug_dec(sk); 158 sk_refcnt_debug_dec(sk);
158} 159}
159EXPORT_SYMBOL(inet_sock_destruct); 160EXPORT_SYMBOL(inet_sock_destruct);
@@ -418,6 +419,8 @@ int inet_release(struct socket *sock)
418 if (sk) { 419 if (sk) {
419 long timeout; 420 long timeout;
420 421
422 inet_rps_reset_flow(sk);
423
421 /* Applications forget to leave groups before exiting */ 424 /* Applications forget to leave groups before exiting */
422 ip_mc_drop_socket(sk); 425 ip_mc_drop_socket(sk);
423 426
@@ -530,6 +533,8 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
530{ 533{
531 struct sock *sk = sock->sk; 534 struct sock *sk = sock->sk;
532 535
536 if (addr_len < sizeof(uaddr->sa_family))
537 return -EINVAL;
533 if (uaddr->sa_family == AF_UNSPEC) 538 if (uaddr->sa_family == AF_UNSPEC)
534 return sk->sk_prot->disconnect(sk, flags); 539 return sk->sk_prot->disconnect(sk, flags);
535 540
@@ -573,6 +578,9 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
573 int err; 578 int err;
574 long timeo; 579 long timeo;
575 580
581 if (addr_len < sizeof(uaddr->sa_family))
582 return -EINVAL;
583
576 lock_sock(sk); 584 lock_sock(sk);
577 585
578 if (uaddr->sa_family == AF_UNSPEC) { 586 if (uaddr->sa_family == AF_UNSPEC) {
@@ -714,6 +722,8 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
714{ 722{
715 struct sock *sk = sock->sk; 723 struct sock *sk = sock->sk;
716 724
725 inet_rps_record_flow(sk);
726
717 /* We may need to bind the socket. */ 727 /* We may need to bind the socket. */
718 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 728 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
719 return -EAGAIN; 729 return -EAGAIN;
@@ -722,12 +732,13 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
722} 732}
723EXPORT_SYMBOL(inet_sendmsg); 733EXPORT_SYMBOL(inet_sendmsg);
724 734
725
726static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, 735static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
727 size_t size, int flags) 736 size_t size, int flags)
728{ 737{
729 struct sock *sk = sock->sk; 738 struct sock *sk = sock->sk;
730 739
740 inet_rps_record_flow(sk);
741
731 /* We may need to bind the socket. */ 742 /* We may need to bind the socket. */
732 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 743 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
733 return -EAGAIN; 744 return -EAGAIN;
@@ -737,6 +748,22 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
737 return sock_no_sendpage(sock, page, offset, size, flags); 748 return sock_no_sendpage(sock, page, offset, size, flags);
738} 749}
739 750
751int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
752 size_t size, int flags)
753{
754 struct sock *sk = sock->sk;
755 int addr_len = 0;
756 int err;
757
758 inet_rps_record_flow(sk);
759
760 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
761 flags & ~MSG_DONTWAIT, &addr_len);
762 if (err >= 0)
763 msg->msg_namelen = addr_len;
764 return err;
765}
766EXPORT_SYMBOL(inet_recvmsg);
740 767
741int inet_shutdown(struct socket *sock, int how) 768int inet_shutdown(struct socket *sock, int how)
742{ 769{
@@ -866,7 +893,7 @@ const struct proto_ops inet_stream_ops = {
866 .setsockopt = sock_common_setsockopt, 893 .setsockopt = sock_common_setsockopt,
867 .getsockopt = sock_common_getsockopt, 894 .getsockopt = sock_common_getsockopt,
868 .sendmsg = tcp_sendmsg, 895 .sendmsg = tcp_sendmsg,
869 .recvmsg = sock_common_recvmsg, 896 .recvmsg = inet_recvmsg,
870 .mmap = sock_no_mmap, 897 .mmap = sock_no_mmap,
871 .sendpage = tcp_sendpage, 898 .sendpage = tcp_sendpage,
872 .splice_read = tcp_splice_read, 899 .splice_read = tcp_splice_read,
@@ -893,7 +920,7 @@ const struct proto_ops inet_dgram_ops = {
893 .setsockopt = sock_common_setsockopt, 920 .setsockopt = sock_common_setsockopt,
894 .getsockopt = sock_common_getsockopt, 921 .getsockopt = sock_common_getsockopt,
895 .sendmsg = inet_sendmsg, 922 .sendmsg = inet_sendmsg,
896 .recvmsg = sock_common_recvmsg, 923 .recvmsg = inet_recvmsg,
897 .mmap = sock_no_mmap, 924 .mmap = sock_no_mmap,
898 .sendpage = inet_sendpage, 925 .sendpage = inet_sendpage,
899#ifdef CONFIG_COMPAT 926#ifdef CONFIG_COMPAT
@@ -923,7 +950,7 @@ static const struct proto_ops inet_sockraw_ops = {
923 .setsockopt = sock_common_setsockopt, 950 .setsockopt = sock_common_setsockopt,
924 .getsockopt = sock_common_getsockopt, 951 .getsockopt = sock_common_getsockopt,
925 .sendmsg = inet_sendmsg, 952 .sendmsg = inet_sendmsg,
926 .recvmsg = sock_common_recvmsg, 953 .recvmsg = inet_recvmsg,
927 .mmap = sock_no_mmap, 954 .mmap = sock_no_mmap,
928 .sendpage = inet_sendpage, 955 .sendpage = inet_sendpage,
929#ifdef CONFIG_COMPAT 956#ifdef CONFIG_COMPAT
@@ -1401,10 +1428,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field);
1401int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) 1428int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
1402{ 1429{
1403 BUG_ON(ptr == NULL); 1430 BUG_ON(ptr == NULL);
1404 ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); 1431 ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
1405 if (!ptr[0]) 1432 if (!ptr[0])
1406 goto err0; 1433 goto err0;
1407 ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); 1434 ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
1408 if (!ptr[1]) 1435 if (!ptr[1])
1409 goto err1; 1436 goto err1;
1410 return 0; 1437 return 0;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 987b47dc69ad..880a5ec6dce0 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -1,6 +1,7 @@
1#include <crypto/hash.h> 1#include <crypto/hash.h>
2#include <linux/err.h> 2#include <linux/err.h>
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/slab.h>
4#include <net/ip.h> 5#include <net/ip.h>
5#include <net/xfrm.h> 6#include <net/xfrm.h>
6#include <net/ah.h> 7#include <net/ah.h>
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c4dd13542802..6e747065c202 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -98,6 +98,7 @@
98#include <linux/net.h> 98#include <linux/net.h>
99#include <linux/rcupdate.h> 99#include <linux/rcupdate.h>
100#include <linux/jhash.h> 100#include <linux/jhash.h>
101#include <linux/slab.h>
101#ifdef CONFIG_SYSCTL 102#ifdef CONFIG_SYSCTL
102#include <linux/sysctl.h> 103#include <linux/sysctl.h>
103#endif 104#endif
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 1e029dc75455..c97cd9ff697e 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -44,6 +44,7 @@
44#include <linux/string.h> 44#include <linux/string.h>
45#include <linux/jhash.h> 45#include <linux/jhash.h>
46#include <linux/audit.h> 46#include <linux/audit.h>
47#include <linux/slab.h>
47#include <net/ip.h> 48#include <net/ip.h>
48#include <net/icmp.h> 49#include <net/icmp.h>
49#include <net/tcp.h> 50#include <net/tcp.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 51ca946e3392..382bc768ed56 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -50,6 +50,7 @@
50#include <linux/notifier.h> 50#include <linux/notifier.h>
51#include <linux/inetdevice.h> 51#include <linux/inetdevice.h>
52#include <linux/igmp.h> 52#include <linux/igmp.h>
53#include <linux/slab.h>
53#ifdef CONFIG_SYSCTL 54#ifdef CONFIG_SYSCTL
54#include <linux/sysctl.h> 55#include <linux/sysctl.h>
55#endif 56#endif
@@ -1095,10 +1096,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1095 case NETDEV_DOWN: 1096 case NETDEV_DOWN:
1096 ip_mc_down(in_dev); 1097 ip_mc_down(in_dev);
1097 break; 1098 break;
1098 case NETDEV_BONDING_OLDTYPE: 1099 case NETDEV_PRE_TYPE_CHANGE:
1099 ip_mc_unmap(in_dev); 1100 ip_mc_unmap(in_dev);
1100 break; 1101 break;
1101 case NETDEV_BONDING_NEWTYPE: 1102 case NETDEV_POST_TYPE_CHANGE:
1102 ip_mc_remap(in_dev); 1103 ip_mc_remap(in_dev);
1103 break; 1104 break;
1104 case NETDEV_CHANGEMTU: 1105 case NETDEV_CHANGEMTU:
@@ -1194,7 +1195,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1194 hlist_for_each_entry_rcu(dev, node, head, index_hlist) { 1195 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1195 if (idx < s_idx) 1196 if (idx < s_idx)
1196 goto cont; 1197 goto cont;
1197 if (idx > s_idx) 1198 if (h > s_h || idx > s_idx)
1198 s_ip_idx = 0; 1199 s_ip_idx = 0;
1199 in_dev = __in_dev_get_rcu(dev); 1200 in_dev = __in_dev_get_rcu(dev);
1200 if (!in_dev) 1201 if (!in_dev)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 9b3e28ed5240..4f0ed458c883 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -34,6 +34,7 @@
34#include <linux/skbuff.h> 34#include <linux/skbuff.h>
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/list.h> 36#include <linux/list.h>
37#include <linux/slab.h>
37 38
38#include <net/ip.h> 39#include <net/ip.h>
39#include <net/protocol.h> 40#include <net/protocol.h>
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 14972017b9c2..4ed7e0dea1bc 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -32,6 +32,7 @@
32#include <linux/skbuff.h> 32#include <linux/skbuff.h>
33#include <linux/netlink.h> 33#include <linux/netlink.h>
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/slab.h>
35 36
36#include <net/net_namespace.h> 37#include <net/net_namespace.h>
37#include <net/ip.h> 38#include <net/ip.h>
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index ca2d07b1c706..3ec84fea5b71 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -213,7 +213,6 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
213{ 213{
214 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 214 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
215 215
216 frh->family = AF_INET;
217 frh->dst_len = rule4->dst_len; 216 frh->dst_len = rule4->dst_len;
218 frh->src_len = rule4->src_len; 217 frh->src_len = rule4->src_len;
219 frh->tos = rule4->tos; 218 frh->tos = rule4->tos;
@@ -234,23 +233,6 @@ nla_put_failure:
234 return -ENOBUFS; 233 return -ENOBUFS;
235} 234}
236 235
237static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
238{
239 struct list_head *pos;
240 struct fib_rule *rule;
241
242 if (!list_empty(&ops->rules_list)) {
243 pos = ops->rules_list.next;
244 if (pos->next != &ops->rules_list) {
245 rule = list_entry(pos->next, struct fib_rule, list);
246 if (rule->pref)
247 return rule->pref - 1;
248 }
249 }
250
251 return 0;
252}
253
254static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) 236static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
255{ 237{
256 return nla_total_size(4) /* dst */ 238 return nla_total_size(4) /* dst */
@@ -264,7 +246,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
264} 246}
265 247
266static struct fib_rules_ops fib4_rules_ops_template = { 248static struct fib_rules_ops fib4_rules_ops_template = {
267 .family = AF_INET, 249 .family = FIB_RULES_IPV4,
268 .rule_size = sizeof(struct fib4_rule), 250 .rule_size = sizeof(struct fib4_rule),
269 .addr_size = sizeof(u32), 251 .addr_size = sizeof(u32),
270 .action = fib4_rule_action, 252 .action = fib4_rule_action,
@@ -272,7 +254,7 @@ static struct fib_rules_ops fib4_rules_ops_template = {
272 .configure = fib4_rule_configure, 254 .configure = fib4_rule_configure,
273 .compare = fib4_rule_compare, 255 .compare = fib4_rule_compare,
274 .fill = fib4_rule_fill, 256 .fill = fib4_rule_fill,
275 .default_pref = fib4_rule_default_pref, 257 .default_pref = fib_default_rule_pref,
276 .nlmsg_payload = fib4_rule_nlmsg_payload, 258 .nlmsg_payload = fib4_rule_nlmsg_payload,
277 .flush_cache = fib4_rule_flush_cache, 259 .flush_cache = fib4_rule_flush_cache,
278 .nlgroup = RTNLGRP_IPV4_RULE, 260 .nlgroup = RTNLGRP_IPV4_RULE,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1af0ea0fb6a2..20f09c5b31e8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -32,6 +32,7 @@
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/skbuff.h> 33#include <linux/skbuff.h>
34#include <linux/init.h> 34#include <linux/init.h>
35#include <linux/slab.h>
35 36
36#include <net/arp.h> 37#include <net/arp.h>
37#include <net/ip.h> 38#include <net/ip.h>
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index af5d89792860..59a838795e3e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -71,6 +71,7 @@
71#include <linux/netlink.h> 71#include <linux/netlink.h>
72#include <linux/init.h> 72#include <linux/init.h>
73#include <linux/list.h> 73#include <linux/list.h>
74#include <linux/slab.h>
74#include <net/net_namespace.h> 75#include <net/net_namespace.h>
75#include <net/ip.h> 76#include <net/ip.h>
76#include <net/protocol.h> 77#include <net/protocol.h>
@@ -961,7 +962,9 @@ fib_find_node(struct trie *t, u32 key)
961 struct node *n; 962 struct node *n;
962 963
963 pos = 0; 964 pos = 0;
964 n = rcu_dereference(t->trie); 965 n = rcu_dereference_check(t->trie,
966 rcu_read_lock_held() ||
967 lockdep_rtnl_is_held());
965 968
966 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 969 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
967 tn = (struct tnode *) n; 970 tn = (struct tnode *) n;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b4c2bcd15db..f3d339f728b0 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -74,6 +74,7 @@
74#include <linux/netdevice.h> 74#include <linux/netdevice.h>
75#include <linux/string.h> 75#include <linux/string.h>
76#include <linux/netfilter_ipv4.h> 76#include <linux/netfilter_ipv4.h>
77#include <linux/slab.h>
77#include <net/snmp.h> 78#include <net/snmp.h>
78#include <net/ip.h> 79#include <net/ip.h>
79#include <net/route.h> 80#include <net/route.h>
@@ -330,9 +331,10 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
330 if (ip_append_data(sk, icmp_glue_bits, icmp_param, 331 if (ip_append_data(sk, icmp_glue_bits, icmp_param,
331 icmp_param->data_len+icmp_param->head_len, 332 icmp_param->data_len+icmp_param->head_len,
332 icmp_param->head_len, 333 icmp_param->head_len,
333 ipc, rt, MSG_DONTWAIT) < 0) 334 ipc, rt, MSG_DONTWAIT) < 0) {
335 ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
334 ip_flush_pending_frames(sk); 336 ip_flush_pending_frames(sk);
335 else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 337 } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
336 struct icmphdr *icmph = icmp_hdr(skb); 338 struct icmphdr *icmph = icmp_hdr(skb);
337 __wsum csum = 0; 339 __wsum csum = 0;
338 struct sk_buff *skb1; 340 struct sk_buff *skb1;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 63bf298ca109..5fff865a4fa7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -71,6 +71,7 @@
71 */ 71 */
72 72
73#include <linux/module.h> 73#include <linux/module.h>
74#include <linux/slab.h>
74#include <asm/uaccess.h> 75#include <asm/uaccess.h>
75#include <asm/system.h> 76#include <asm/system.h>
76#include <linux/types.h> 77#include <linux/types.h>
@@ -997,7 +998,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
997 --ANK 998 --ANK
998 */ 999 */
999 if (arp_mc_map(addr, buf, dev, 0) == 0) 1000 if (arp_mc_map(addr, buf, dev, 0) == 0)
1000 dev_mc_add(dev, buf, dev->addr_len, 0); 1001 dev_mc_add(dev, buf);
1001} 1002}
1002 1003
1003/* 1004/*
@@ -1010,7 +1011,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
1010 struct net_device *dev = in_dev->dev; 1011 struct net_device *dev = in_dev->dev;
1011 1012
1012 if (arp_mc_map(addr, buf, dev, 0) == 0) 1013 if (arp_mc_map(addr, buf, dev, 0) == 0)
1013 dev_mc_delete(dev, buf, dev->addr_len, 0); 1014 dev_mc_del(dev, buf);
1014} 1015}
1015 1016
1016#ifdef CONFIG_IP_MULTICAST 1017#ifdef CONFIG_IP_MULTICAST
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 1aaa8110d84b..e5fa2ddce320 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -14,6 +14,7 @@
14#include <linux/types.h> 14#include <linux/types.h>
15#include <linux/fcntl.h> 15#include <linux/fcntl.h>
16#include <linux/random.h> 16#include <linux/random.h>
17#include <linux/slab.h>
17#include <linux/cache.h> 18#include <linux/cache.h>
18#include <linux/init.h> 19#include <linux/init.h>
19#include <linux/time.h> 20#include <linux/time.h>
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index eaf3e2c8646a..a2ca6aed763b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -19,6 +19,7 @@
19#include <linux/random.h> 19#include <linux/random.h>
20#include <linux/skbuff.h> 20#include <linux/skbuff.h>
21#include <linux/rtnetlink.h> 21#include <linux/rtnetlink.h>
22#include <linux/slab.h>
22 23
23#include <net/inet_frag.h> 24#include <net/inet_frag.h>
24 25
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index cc94cc2d8b2d..c5af909cf701 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12#include <linux/kmemcheck.h> 12#include <linux/kmemcheck.h>
13#include <linux/slab.h>
13#include <net/inet_hashtables.h> 14#include <net/inet_hashtables.h>
14#include <net/inet_timewait_sock.h> 15#include <net/inet_timewait_sock.h>
15#include <net/ip.h> 16#include <net/ip.h>
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 9f2cd47ceeb7..56cdf68a074c 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -25,6 +25,7 @@
25#include <linux/ip.h> 25#include <linux/ip.h>
26#include <linux/icmp.h> 26#include <linux/icmp.h>
27#include <linux/netdevice.h> 27#include <linux/netdevice.h>
28#include <linux/slab.h>
28#include <net/sock.h> 29#include <net/sock.h>
29#include <net/ip.h> 30#include <net/ip.h>
30#include <net/tcp.h> 31#include <net/tcp.h>
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b59430bc041c..75347ea70ea0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -32,6 +32,7 @@
32#include <linux/netdevice.h> 32#include <linux/netdevice.h>
33#include <linux/jhash.h> 33#include <linux/jhash.h>
34#include <linux/random.h> 34#include <linux/random.h>
35#include <linux/slab.h>
35#include <net/route.h> 36#include <net/route.h>
36#include <net/dst.h> 37#include <net/dst.h>
37#include <net/sock.h> 38#include <net/sock.h>
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f47c9f76754b..fe381d12ecdd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -14,6 +14,7 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/slab.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18#include <linux/skbuff.h> 19#include <linux/skbuff.h>
19#include <linux/netdevice.h> 20#include <linux/netdevice.h>
@@ -810,11 +811,13 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
810 tunnel->err_count = 0; 811 tunnel->err_count = 0;
811 } 812 }
812 813
813 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; 814 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
814 815
815 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 816 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
816 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 817 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
817 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 818 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
819 if (max_headroom > dev->needed_headroom)
820 dev->needed_headroom = max_headroom;
818 if (!new_skb) { 821 if (!new_skb) {
819 ip_rt_put(rt); 822 ip_rt_put(rt);
820 txq->tx_dropped++; 823 txq->tx_dropped++;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 091b5c7e04e1..af76de5f76de 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -119,6 +119,7 @@
119#include <linux/kernel.h> 119#include <linux/kernel.h>
120#include <linux/string.h> 120#include <linux/string.h>
121#include <linux/errno.h> 121#include <linux/errno.h>
122#include <linux/slab.h>
122 123
123#include <linux/net.h> 124#include <linux/net.h>
124#include <linux/socket.h> 125#include <linux/socket.h>
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 94bf105ef3c9..4c09a31fd140 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/slab.h>
14#include <linux/types.h> 15#include <linux/types.h>
15#include <asm/uaccess.h> 16#include <asm/uaccess.h>
16#include <linux/skbuff.h> 17#include <linux/skbuff.h>
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f09135e1e14f..b0b2e3059f11 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -51,6 +51,7 @@
51#include <linux/string.h> 51#include <linux/string.h>
52#include <linux/errno.h> 52#include <linux/errno.h>
53#include <linux/highmem.h> 53#include <linux/highmem.h>
54#include <linux/slab.h>
54 55
55#include <linux/socket.h> 56#include <linux/socket.h>
56#include <linux/sockios.h> 57#include <linux/sockios.h>
@@ -310,7 +311,7 @@ int ip_output(struct sk_buff *skb)
310 !(IPCB(skb)->flags & IPSKB_REROUTED)); 311 !(IPCB(skb)->flags & IPSKB_REROUTED));
311} 312}
312 313
313int ip_queue_xmit(struct sk_buff *skb, int ipfragok) 314int ip_queue_xmit(struct sk_buff *skb)
314{ 315{
315 struct sock *sk = skb->sk; 316 struct sock *sk = skb->sk;
316 struct inet_sock *inet = inet_sk(sk); 317 struct inet_sock *inet = inet_sk(sk);
@@ -369,7 +370,7 @@ packet_routed:
369 skb_reset_network_header(skb); 370 skb_reset_network_header(skb);
370 iph = ip_hdr(skb); 371 iph = ip_hdr(skb);
371 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 372 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
372 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) 373 if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df)
373 iph->frag_off = htons(IP_DF); 374 iph->frag_off = htons(IP_DF);
374 else 375 else
375 iph->frag_off = 0; 376 iph->frag_off = 0;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 644dc43a55de..b0aa0546a3b3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -23,6 +23,7 @@
23#include <linux/icmp.h> 23#include <linux/icmp.h>
24#include <linux/inetdevice.h> 24#include <linux/inetdevice.h>
25#include <linux/netdevice.h> 25#include <linux/netdevice.h>
26#include <linux/slab.h>
26#include <net/sock.h> 27#include <net/sock.h>
27#include <net/ip.h> 28#include <net/ip.h>
28#include <net/icmp.h> 29#include <net/icmp.h>
@@ -286,12 +287,8 @@ int ip_ra_control(struct sock *sk, unsigned char on,
286void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 287void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
287 __be16 port, u32 info, u8 *payload) 288 __be16 port, u32 info, u8 *payload)
288{ 289{
289 struct inet_sock *inet = inet_sk(sk);
290 struct sock_exterr_skb *serr; 290 struct sock_exterr_skb *serr;
291 291
292 if (!inet->recverr)
293 return;
294
295 skb = skb_clone(skb, GFP_ATOMIC); 292 skb = skb_clone(skb, GFP_ATOMIC);
296 if (!skb) 293 if (!skb)
297 return; 294 return;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 678909281648..b9d84e800cf4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -53,6 +53,7 @@
53#include <linux/root_dev.h> 53#include <linux/root_dev.h>
54#include <linux/delay.h> 54#include <linux/delay.h>
55#include <linux/nfs_fs.h> 55#include <linux/nfs_fs.h>
56#include <linux/slab.h>
56#include <net/net_namespace.h> 57#include <net/net_namespace.h>
57#include <net/arp.h> 58#include <net/arp.h>
58#include <net/ip.h> 59#include <net/ip.h>
@@ -975,7 +976,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
975 /* Is it a reply for the device we are configuring? */ 976 /* Is it a reply for the device we are configuring? */
976 if (b->xid != ic_dev_xid) { 977 if (b->xid != ic_dev_xid) {
977 if (net_ratelimit()) 978 if (net_ratelimit())
978 printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n"); 979 printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet\n");
979 goto drop_unlock; 980 goto drop_unlock;
980 } 981 }
981 982
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 2f302d3ac9a3..0b27b14dcc9d 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -95,6 +95,7 @@
95#include <linux/module.h> 95#include <linux/module.h>
96#include <linux/types.h> 96#include <linux/types.h>
97#include <linux/kernel.h> 97#include <linux/kernel.h>
98#include <linux/slab.h>
98#include <asm/uaccess.h> 99#include <asm/uaccess.h>
99#include <linux/skbuff.h> 100#include <linux/skbuff.h>
100#include <linux/netdevice.h> 101#include <linux/netdevice.h>
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 1d42f6103c8d..1aa498d7a0a5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -47,6 +47,7 @@
47#include <linux/mroute.h> 47#include <linux/mroute.h>
48#include <linux/init.h> 48#include <linux/init.h>
49#include <linux/if_ether.h> 49#include <linux/if_ether.h>
50#include <linux/slab.h>
50#include <net/net_namespace.h> 51#include <net/net_namespace.h>
51#include <net/ip.h> 52#include <net/ip.h>
52#include <net/protocol.h> 53#include <net/protocol.h>
@@ -62,11 +63,40 @@
62#include <net/ipip.h> 63#include <net/ipip.h>
63#include <net/checksum.h> 64#include <net/checksum.h>
64#include <net/netlink.h> 65#include <net/netlink.h>
66#include <net/fib_rules.h>
65 67
66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67#define CONFIG_IP_PIMSM 1 69#define CONFIG_IP_PIMSM 1
68#endif 70#endif
69 71
72struct mr_table {
73 struct list_head list;
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
77 u32 id;
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
70/* Big lock, protecting vif table, mrt cache and mroute socket state. 100/* Big lock, protecting vif table, mrt cache and mroute socket state.
71 Note that the changes are semaphored via rtnl_lock. 101 Note that the changes are semaphored via rtnl_lock.
72 */ 102 */
@@ -77,9 +107,7 @@ static DEFINE_RWLOCK(mrt_lock);
77 * Multicast router control variables 107 * Multicast router control variables
78 */ 108 */
79 109
80#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL) 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
81
82static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
83 111
84/* Special spinlock for queue of unresolved entries */ 112/* Special spinlock for queue of unresolved entries */
85static DEFINE_SPINLOCK(mfc_unres_lock); 113static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -94,12 +122,215 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
94 122
95static struct kmem_cache *mrt_cachep __read_mostly; 123static struct kmem_cache *mrt_cachep __read_mostly;
96 124
97static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
98static int ipmr_cache_report(struct net *net, 126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
99 struct sk_buff *pkt, vifi_t vifi, int assert); 130 struct sk_buff *pkt, vifi_t vifi, int assert);
100static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); 131static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
169
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
219static struct fib_rules_ops ipmr_rules_ops_template = {
220 .family = FIB_RULES_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271 kfree(mrt);
272 fib_rules_unregister(net->ipv4.mr_rules_ops);
273}
274#else
275#define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277
278static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279{
280 return net->ipv4.mrt;
281}
282
283static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt)
285{
286 *mrt = net->ipv4.mrt;
287 return 0;
288}
289
290static int __net_init ipmr_rules_init(struct net *net)
291{
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM;
294}
295
296static void __net_exit ipmr_rules_exit(struct net *net)
297{
298 kfree(net->ipv4.mrt);
299}
300#endif
301
302static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303{
304 struct mr_table *mrt;
305 unsigned int i;
101 306
102static struct timer_list ipmr_expire_timer; 307 mrt = ipmr_get_table(net, id);
308 if (mrt != NULL)
309 return mrt;
310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (mrt == NULL)
313 return NULL;
314 write_pnet(&mrt->net, net);
315 mrt->id = id;
316
317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt);
325
326#ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1;
328#endif
329#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331#endif
332 return mrt;
333}
103 334
104/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 335/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
105 336
@@ -200,12 +431,22 @@ failure:
200static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 431static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
201{ 432{
202 struct net *net = dev_net(dev); 433 struct net *net = dev_net(dev);
434 struct mr_table *mrt;
435 struct flowi fl = {
436 .oif = dev->ifindex,
437 .iif = skb->skb_iif,
438 .mark = skb->mark,
439 };
440 int err;
441
442 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0)
444 return err;
203 445
204 read_lock(&mrt_lock); 446 read_lock(&mrt_lock);
205 dev->stats.tx_bytes += skb->len; 447 dev->stats.tx_bytes += skb->len;
206 dev->stats.tx_packets++; 448 dev->stats.tx_packets++;
207 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num, 449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
208 IGMPMSG_WHOLEPKT);
209 read_unlock(&mrt_lock); 450 read_unlock(&mrt_lock);
210 kfree_skb(skb); 451 kfree_skb(skb);
211 return NETDEV_TX_OK; 452 return NETDEV_TX_OK;
@@ -225,12 +466,18 @@ static void reg_vif_setup(struct net_device *dev)
225 dev->features |= NETIF_F_NETNS_LOCAL; 466 dev->features |= NETIF_F_NETNS_LOCAL;
226} 467}
227 468
228static struct net_device *ipmr_reg_vif(struct net *net) 469static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
229{ 470{
230 struct net_device *dev; 471 struct net_device *dev;
231 struct in_device *in_dev; 472 struct in_device *in_dev;
473 char name[IFNAMSIZ];
232 474
233 dev = alloc_netdev(0, "pimreg", reg_vif_setup); 475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg");
477 else
478 sprintf(name, "pimreg%u", mrt->id);
479
480 dev = alloc_netdev(0, name, reg_vif_setup);
234 481
235 if (dev == NULL) 482 if (dev == NULL)
236 return NULL; 483 return NULL;
@@ -275,17 +522,17 @@ failure:
275 * @notify: Set to 1, if the caller is a notifier_call 522 * @notify: Set to 1, if the caller is a notifier_call
276 */ 523 */
277 524
278static int vif_delete(struct net *net, int vifi, int notify, 525static int vif_delete(struct mr_table *mrt, int vifi, int notify,
279 struct list_head *head) 526 struct list_head *head)
280{ 527{
281 struct vif_device *v; 528 struct vif_device *v;
282 struct net_device *dev; 529 struct net_device *dev;
283 struct in_device *in_dev; 530 struct in_device *in_dev;
284 531
285 if (vifi < 0 || vifi >= net->ipv4.maxvif) 532 if (vifi < 0 || vifi >= mrt->maxvif)
286 return -EADDRNOTAVAIL; 533 return -EADDRNOTAVAIL;
287 534
288 v = &net->ipv4.vif_table[vifi]; 535 v = &mrt->vif_table[vifi];
289 536
290 write_lock_bh(&mrt_lock); 537 write_lock_bh(&mrt_lock);
291 dev = v->dev; 538 dev = v->dev;
@@ -297,17 +544,17 @@ static int vif_delete(struct net *net, int vifi, int notify,
297 } 544 }
298 545
299#ifdef CONFIG_IP_PIMSM 546#ifdef CONFIG_IP_PIMSM
300 if (vifi == net->ipv4.mroute_reg_vif_num) 547 if (vifi == mrt->mroute_reg_vif_num)
301 net->ipv4.mroute_reg_vif_num = -1; 548 mrt->mroute_reg_vif_num = -1;
302#endif 549#endif
303 550
304 if (vifi+1 == net->ipv4.maxvif) { 551 if (vifi+1 == mrt->maxvif) {
305 int tmp; 552 int tmp;
306 for (tmp=vifi-1; tmp>=0; tmp--) { 553 for (tmp=vifi-1; tmp>=0; tmp--) {
307 if (VIF_EXISTS(net, tmp)) 554 if (VIF_EXISTS(mrt, tmp))
308 break; 555 break;
309 } 556 }
310 net->ipv4.maxvif = tmp+1; 557 mrt->maxvif = tmp+1;
311 } 558 }
312 559
313 write_unlock_bh(&mrt_lock); 560 write_unlock_bh(&mrt_lock);
@@ -328,7 +575,6 @@ static int vif_delete(struct net *net, int vifi, int notify,
328 575
329static inline void ipmr_cache_free(struct mfc_cache *c) 576static inline void ipmr_cache_free(struct mfc_cache *c)
330{ 577{
331 release_net(mfc_net(c));
332 kmem_cache_free(mrt_cachep, c); 578 kmem_cache_free(mrt_cachep, c);
333} 579}
334 580
@@ -336,13 +582,13 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
336 and reporting error to netlink readers. 582 and reporting error to netlink readers.
337 */ 583 */
338 584
339static void ipmr_destroy_unres(struct mfc_cache *c) 585static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
340{ 586{
587 struct net *net = read_pnet(&mrt->net);
341 struct sk_buff *skb; 588 struct sk_buff *skb;
342 struct nlmsgerr *e; 589 struct nlmsgerr *e;
343 struct net *net = mfc_net(c);
344 590
345 atomic_dec(&net->ipv4.cache_resolve_queue_len); 591 atomic_dec(&mrt->cache_resolve_queue_len);
346 592
347 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348 if (ip_hdr(skb)->version == 0) { 594 if (ip_hdr(skb)->version == 0) {
@@ -363,42 +609,40 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
363} 609}
364 610
365 611
366/* Single timer process for all the unresolved queue. */ 612/* Timer process for the unresolved queue. */
367 613
368static void ipmr_expire_process(unsigned long dummy) 614static void ipmr_expire_process(unsigned long arg)
369{ 615{
616 struct mr_table *mrt = (struct mr_table *)arg;
370 unsigned long now; 617 unsigned long now;
371 unsigned long expires; 618 unsigned long expires;
372 struct mfc_cache *c, **cp; 619 struct mfc_cache *c, *next;
373 620
374 if (!spin_trylock(&mfc_unres_lock)) { 621 if (!spin_trylock(&mfc_unres_lock)) {
375 mod_timer(&ipmr_expire_timer, jiffies+HZ/10); 622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
376 return; 623 return;
377 } 624 }
378 625
379 if (mfc_unres_queue == NULL) 626 if (list_empty(&mrt->mfc_unres_queue))
380 goto out; 627 goto out;
381 628
382 now = jiffies; 629 now = jiffies;
383 expires = 10*HZ; 630 expires = 10*HZ;
384 cp = &mfc_unres_queue;
385 631
386 while ((c=*cp) != NULL) { 632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
387 if (time_after(c->mfc_un.unres.expires, now)) { 633 if (time_after(c->mfc_un.unres.expires, now)) {
388 unsigned long interval = c->mfc_un.unres.expires - now; 634 unsigned long interval = c->mfc_un.unres.expires - now;
389 if (interval < expires) 635 if (interval < expires)
390 expires = interval; 636 expires = interval;
391 cp = &c->next;
392 continue; 637 continue;
393 } 638 }
394 639
395 *cp = c->next; 640 list_del(&c->list);
396 641 ipmr_destroy_unres(mrt, c);
397 ipmr_destroy_unres(c);
398 } 642 }
399 643
400 if (mfc_unres_queue != NULL) 644 if (!list_empty(&mrt->mfc_unres_queue))
401 mod_timer(&ipmr_expire_timer, jiffies + expires); 645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
402 646
403out: 647out:
404 spin_unlock(&mfc_unres_lock); 648 spin_unlock(&mfc_unres_lock);
@@ -406,17 +650,17 @@ out:
406 650
407/* Fill oifs list. It is called under write locked mrt_lock. */ 651/* Fill oifs list. It is called under write locked mrt_lock. */
408 652
409static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) 653static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
654 unsigned char *ttls)
410{ 655{
411 int vifi; 656 int vifi;
412 struct net *net = mfc_net(cache);
413 657
414 cache->mfc_un.res.minvif = MAXVIFS; 658 cache->mfc_un.res.minvif = MAXVIFS;
415 cache->mfc_un.res.maxvif = 0; 659 cache->mfc_un.res.maxvif = 0;
416 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417 661
418 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) { 662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
419 if (VIF_EXISTS(net, vifi) && 663 if (VIF_EXISTS(mrt, vifi) &&
420 ttls[vifi] && ttls[vifi] < 255) { 664 ttls[vifi] && ttls[vifi] < 255) {
421 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422 if (cache->mfc_un.res.minvif > vifi) 666 if (cache->mfc_un.res.minvif > vifi)
@@ -427,16 +671,17 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
427 } 671 }
428} 672}
429 673
430static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) 674static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock)
431{ 676{
432 int vifi = vifc->vifc_vifi; 677 int vifi = vifc->vifc_vifi;
433 struct vif_device *v = &net->ipv4.vif_table[vifi]; 678 struct vif_device *v = &mrt->vif_table[vifi];
434 struct net_device *dev; 679 struct net_device *dev;
435 struct in_device *in_dev; 680 struct in_device *in_dev;
436 int err; 681 int err;
437 682
438 /* Is vif busy ? */ 683 /* Is vif busy ? */
439 if (VIF_EXISTS(net, vifi)) 684 if (VIF_EXISTS(mrt, vifi))
440 return -EADDRINUSE; 685 return -EADDRINUSE;
441 686
442 switch (vifc->vifc_flags) { 687 switch (vifc->vifc_flags) {
@@ -446,9 +691,9 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
446 * Special Purpose VIF in PIM 691 * Special Purpose VIF in PIM
447 * All the packets will be sent to the daemon 692 * All the packets will be sent to the daemon
448 */ 693 */
449 if (net->ipv4.mroute_reg_vif_num >= 0) 694 if (mrt->mroute_reg_vif_num >= 0)
450 return -EADDRINUSE; 695 return -EADDRINUSE;
451 dev = ipmr_reg_vif(net); 696 dev = ipmr_reg_vif(net, mrt);
452 if (!dev) 697 if (!dev)
453 return -ENOBUFS; 698 return -ENOBUFS;
454 err = dev_set_allmulti(dev, 1); 699 err = dev_set_allmulti(dev, 1);
@@ -524,49 +769,47 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
524 v->dev = dev; 769 v->dev = dev;
525#ifdef CONFIG_IP_PIMSM 770#ifdef CONFIG_IP_PIMSM
526 if (v->flags&VIFF_REGISTER) 771 if (v->flags&VIFF_REGISTER)
527 net->ipv4.mroute_reg_vif_num = vifi; 772 mrt->mroute_reg_vif_num = vifi;
528#endif 773#endif
529 if (vifi+1 > net->ipv4.maxvif) 774 if (vifi+1 > mrt->maxvif)
530 net->ipv4.maxvif = vifi+1; 775 mrt->maxvif = vifi+1;
531 write_unlock_bh(&mrt_lock); 776 write_unlock_bh(&mrt_lock);
532 return 0; 777 return 0;
533} 778}
534 779
535static struct mfc_cache *ipmr_cache_find(struct net *net, 780static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
536 __be32 origin, 781 __be32 origin,
537 __be32 mcastgrp) 782 __be32 mcastgrp)
538{ 783{
539 int line = MFC_HASH(mcastgrp, origin); 784 int line = MFC_HASH(mcastgrp, origin);
540 struct mfc_cache *c; 785 struct mfc_cache *c;
541 786
542 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) { 787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
543 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) 788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
544 break; 789 return c;
545 } 790 }
546 return c; 791 return NULL;
547} 792}
548 793
549/* 794/*
550 * Allocate a multicast cache entry 795 * Allocate a multicast cache entry
551 */ 796 */
552static struct mfc_cache *ipmr_cache_alloc(struct net *net) 797static struct mfc_cache *ipmr_cache_alloc(void)
553{ 798{
554 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
555 if (c == NULL) 800 if (c == NULL)
556 return NULL; 801 return NULL;
557 c->mfc_un.res.minvif = MAXVIFS; 802 c->mfc_un.res.minvif = MAXVIFS;
558 mfc_net_set(c, net);
559 return c; 803 return c;
560} 804}
561 805
562static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net) 806static struct mfc_cache *ipmr_cache_alloc_unres(void)
563{ 807{
564 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
565 if (c == NULL) 809 if (c == NULL)
566 return NULL; 810 return NULL;
567 skb_queue_head_init(&c->mfc_un.unres.unresolved); 811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
568 c->mfc_un.unres.expires = jiffies + 10*HZ; 812 c->mfc_un.unres.expires = jiffies + 10*HZ;
569 mfc_net_set(c, net);
570 return c; 813 return c;
571} 814}
572 815
@@ -574,7 +817,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
574 * A cache entry has gone into a resolved state from queued 817 * A cache entry has gone into a resolved state from queued
575 */ 818 */
576 819
577static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) 820static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c)
578{ 822{
579 struct sk_buff *skb; 823 struct sk_buff *skb;
580 struct nlmsgerr *e; 824 struct nlmsgerr *e;
@@ -587,7 +831,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
587 if (ip_hdr(skb)->version == 0) { 831 if (ip_hdr(skb)->version == 0) {
588 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
589 833
590 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 834 if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
591 nlh->nlmsg_len = (skb_tail_pointer(skb) - 835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
592 (u8 *)nlh); 836 (u8 *)nlh);
593 } else { 837 } else {
@@ -599,9 +843,9 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
599 memset(&e->msg, 0, sizeof(e->msg)); 843 memset(&e->msg, 0, sizeof(e->msg));
600 } 844 }
601 845
602 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid); 846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
603 } else 847 } else
604 ip_mr_forward(skb, c, 0); 848 ip_mr_forward(net, mrt, skb, c, 0);
605 } 849 }
606} 850}
607 851
@@ -612,7 +856,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
612 * Called under mrt_lock. 856 * Called under mrt_lock.
613 */ 857 */
614 858
615static int ipmr_cache_report(struct net *net, 859static int ipmr_cache_report(struct mr_table *mrt,
616 struct sk_buff *pkt, vifi_t vifi, int assert) 860 struct sk_buff *pkt, vifi_t vifi, int assert)
617{ 861{
618 struct sk_buff *skb; 862 struct sk_buff *skb;
@@ -645,7 +889,7 @@ static int ipmr_cache_report(struct net *net,
645 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
646 msg->im_msgtype = IGMPMSG_WHOLEPKT; 890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
647 msg->im_mbz = 0; 891 msg->im_mbz = 0;
648 msg->im_vif = net->ipv4.mroute_reg_vif_num; 892 msg->im_vif = mrt->mroute_reg_vif_num;
649 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
650 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
651 sizeof(struct iphdr)); 895 sizeof(struct iphdr));
@@ -677,7 +921,7 @@ static int ipmr_cache_report(struct net *net,
677 skb->transport_header = skb->network_header; 921 skb->transport_header = skb->network_header;
678 } 922 }
679 923
680 if (net->ipv4.mroute_sk == NULL) { 924 if (mrt->mroute_sk == NULL) {
681 kfree_skb(skb); 925 kfree_skb(skb);
682 return -EINVAL; 926 return -EINVAL;
683 } 927 }
@@ -685,7 +929,7 @@ static int ipmr_cache_report(struct net *net,
685 /* 929 /*
686 * Deliver to mrouted 930 * Deliver to mrouted
687 */ 931 */
688 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb); 932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
689 if (ret < 0) { 933 if (ret < 0) {
690 if (net_ratelimit()) 934 if (net_ratelimit())
691 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -700,27 +944,29 @@ static int ipmr_cache_report(struct net *net,
700 */ 944 */
701 945
702static int 946static int
703ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) 947ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
704{ 948{
949 bool found = false;
705 int err; 950 int err;
706 struct mfc_cache *c; 951 struct mfc_cache *c;
707 const struct iphdr *iph = ip_hdr(skb); 952 const struct iphdr *iph = ip_hdr(skb);
708 953
709 spin_lock_bh(&mfc_unres_lock); 954 spin_lock_bh(&mfc_unres_lock);
710 for (c=mfc_unres_queue; c; c=c->next) { 955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
711 if (net_eq(mfc_net(c), net) && 956 if (c->mfc_mcastgrp == iph->daddr &&
712 c->mfc_mcastgrp == iph->daddr && 957 c->mfc_origin == iph->saddr) {
713 c->mfc_origin == iph->saddr) 958 found = true;
714 break; 959 break;
960 }
715 } 961 }
716 962
717 if (c == NULL) { 963 if (!found) {
718 /* 964 /*
719 * Create a new entry if allowable 965 * Create a new entry if allowable
720 */ 966 */
721 967
722 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 || 968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
723 (c = ipmr_cache_alloc_unres(net)) == NULL) { 969 (c = ipmr_cache_alloc_unres()) == NULL) {
724 spin_unlock_bh(&mfc_unres_lock); 970 spin_unlock_bh(&mfc_unres_lock);
725 971
726 kfree_skb(skb); 972 kfree_skb(skb);
@@ -737,7 +983,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
737 /* 983 /*
738 * Reflect first query at mrouted. 984 * Reflect first query at mrouted.
739 */ 985 */
740 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE); 986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
741 if (err < 0) { 987 if (err < 0) {
742 /* If the report failed throw the cache entry 988 /* If the report failed throw the cache entry
743 out - Brad Parker 989 out - Brad Parker
@@ -749,11 +995,10 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
749 return err; 995 return err;
750 } 996 }
751 997
752 atomic_inc(&net->ipv4.cache_resolve_queue_len); 998 atomic_inc(&mrt->cache_resolve_queue_len);
753 c->next = mfc_unres_queue; 999 list_add(&c->list, &mrt->mfc_unres_queue);
754 mfc_unres_queue = c;
755 1000
756 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); 1001 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
757 } 1002 }
758 1003
759 /* 1004 /*
@@ -775,19 +1020,18 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
775 * MFC cache manipulation by user space mroute daemon 1020 * MFC cache manipulation by user space mroute daemon
776 */ 1021 */
777 1022
778static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc) 1023static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
779{ 1024{
780 int line; 1025 int line;
781 struct mfc_cache *c, **cp; 1026 struct mfc_cache *c, *next;
782 1027
783 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1028 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
784 1029
785 for (cp = &net->ipv4.mfc_cache_array[line]; 1030 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
786 (c = *cp) != NULL; cp = &c->next) {
787 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1031 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
788 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1032 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
789 write_lock_bh(&mrt_lock); 1033 write_lock_bh(&mrt_lock);
790 *cp = c->next; 1034 list_del(&c->list);
791 write_unlock_bh(&mrt_lock); 1035 write_unlock_bh(&mrt_lock);
792 1036
793 ipmr_cache_free(c); 1037 ipmr_cache_free(c);
@@ -797,24 +1041,30 @@ static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
797 return -ENOENT; 1041 return -ENOENT;
798} 1042}
799 1043
800static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) 1044static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1045 struct mfcctl *mfc, int mrtsock)
801{ 1046{
1047 bool found = false;
802 int line; 1048 int line;
803 struct mfc_cache *uc, *c, **cp; 1049 struct mfc_cache *uc, *c;
1050
1051 if (mfc->mfcc_parent >= MAXVIFS)
1052 return -ENFILE;
804 1053
805 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1054 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
806 1055
807 for (cp = &net->ipv4.mfc_cache_array[line]; 1056 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
808 (c = *cp) != NULL; cp = &c->next) {
809 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1057 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
810 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) 1058 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1059 found = true;
811 break; 1060 break;
1061 }
812 } 1062 }
813 1063
814 if (c != NULL) { 1064 if (found) {
815 write_lock_bh(&mrt_lock); 1065 write_lock_bh(&mrt_lock);
816 c->mfc_parent = mfc->mfcc_parent; 1066 c->mfc_parent = mfc->mfcc_parent;
817 ipmr_update_thresholds(c, mfc->mfcc_ttls); 1067 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
818 if (!mrtsock) 1068 if (!mrtsock)
819 c->mfc_flags |= MFC_STATIC; 1069 c->mfc_flags |= MFC_STATIC;
820 write_unlock_bh(&mrt_lock); 1070 write_unlock_bh(&mrt_lock);
@@ -824,43 +1074,42 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
824 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1074 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
825 return -EINVAL; 1075 return -EINVAL;
826 1076
827 c = ipmr_cache_alloc(net); 1077 c = ipmr_cache_alloc();
828 if (c == NULL) 1078 if (c == NULL)
829 return -ENOMEM; 1079 return -ENOMEM;
830 1080
831 c->mfc_origin = mfc->mfcc_origin.s_addr; 1081 c->mfc_origin = mfc->mfcc_origin.s_addr;
832 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1082 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
833 c->mfc_parent = mfc->mfcc_parent; 1083 c->mfc_parent = mfc->mfcc_parent;
834 ipmr_update_thresholds(c, mfc->mfcc_ttls); 1084 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
835 if (!mrtsock) 1085 if (!mrtsock)
836 c->mfc_flags |= MFC_STATIC; 1086 c->mfc_flags |= MFC_STATIC;
837 1087
838 write_lock_bh(&mrt_lock); 1088 write_lock_bh(&mrt_lock);
839 c->next = net->ipv4.mfc_cache_array[line]; 1089 list_add(&c->list, &mrt->mfc_cache_array[line]);
840 net->ipv4.mfc_cache_array[line] = c;
841 write_unlock_bh(&mrt_lock); 1090 write_unlock_bh(&mrt_lock);
842 1091
843 /* 1092 /*
844 * Check to see if we resolved a queued list. If so we 1093 * Check to see if we resolved a queued list. If so we
845 * need to send on the frames and tidy up. 1094 * need to send on the frames and tidy up.
846 */ 1095 */
1096 found = false;
847 spin_lock_bh(&mfc_unres_lock); 1097 spin_lock_bh(&mfc_unres_lock);
848 for (cp = &mfc_unres_queue; (uc=*cp) != NULL; 1098 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
849 cp = &uc->next) { 1099 if (uc->mfc_origin == c->mfc_origin &&
850 if (net_eq(mfc_net(uc), net) &&
851 uc->mfc_origin == c->mfc_origin &&
852 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1100 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
853 *cp = uc->next; 1101 list_del(&uc->list);
854 atomic_dec(&net->ipv4.cache_resolve_queue_len); 1102 atomic_dec(&mrt->cache_resolve_queue_len);
1103 found = true;
855 break; 1104 break;
856 } 1105 }
857 } 1106 }
858 if (mfc_unres_queue == NULL) 1107 if (list_empty(&mrt->mfc_unres_queue))
859 del_timer(&ipmr_expire_timer); 1108 del_timer(&mrt->ipmr_expire_timer);
860 spin_unlock_bh(&mfc_unres_lock); 1109 spin_unlock_bh(&mfc_unres_lock);
861 1110
862 if (uc) { 1111 if (found) {
863 ipmr_cache_resolve(uc, c); 1112 ipmr_cache_resolve(net, mrt, uc, c);
864 ipmr_cache_free(uc); 1113 ipmr_cache_free(uc);
865 } 1114 }
866 return 0; 1115 return 0;
@@ -870,53 +1119,41 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
870 * Close the multicast socket, and clear the vif tables etc 1119 * Close the multicast socket, and clear the vif tables etc
871 */ 1120 */
872 1121
873static void mroute_clean_tables(struct net *net) 1122static void mroute_clean_tables(struct mr_table *mrt)
874{ 1123{
875 int i; 1124 int i;
876 LIST_HEAD(list); 1125 LIST_HEAD(list);
1126 struct mfc_cache *c, *next;
877 1127
878 /* 1128 /*
879 * Shut down all active vif entries 1129 * Shut down all active vif entries
880 */ 1130 */
881 for (i = 0; i < net->ipv4.maxvif; i++) { 1131 for (i = 0; i < mrt->maxvif; i++) {
882 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) 1132 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
883 vif_delete(net, i, 0, &list); 1133 vif_delete(mrt, i, 0, &list);
884 } 1134 }
885 unregister_netdevice_many(&list); 1135 unregister_netdevice_many(&list);
886 1136
887 /* 1137 /*
888 * Wipe the cache 1138 * Wipe the cache
889 */ 1139 */
890 for (i=0; i<MFC_LINES; i++) { 1140 for (i = 0; i < MFC_LINES; i++) {
891 struct mfc_cache *c, **cp; 1141 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
892 1142 if (c->mfc_flags&MFC_STATIC)
893 cp = &net->ipv4.mfc_cache_array[i];
894 while ((c = *cp) != NULL) {
895 if (c->mfc_flags&MFC_STATIC) {
896 cp = &c->next;
897 continue; 1143 continue;
898 }
899 write_lock_bh(&mrt_lock); 1144 write_lock_bh(&mrt_lock);
900 *cp = c->next; 1145 list_del(&c->list);
901 write_unlock_bh(&mrt_lock); 1146 write_unlock_bh(&mrt_lock);
902 1147
903 ipmr_cache_free(c); 1148 ipmr_cache_free(c);
904 } 1149 }
905 } 1150 }
906 1151
907 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) { 1152 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
908 struct mfc_cache *c, **cp;
909
910 spin_lock_bh(&mfc_unres_lock); 1153 spin_lock_bh(&mfc_unres_lock);
911 cp = &mfc_unres_queue; 1154 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
912 while ((c = *cp) != NULL) { 1155 list_del(&c->list);
913 if (!net_eq(mfc_net(c), net)) { 1156 ipmr_destroy_unres(mrt, c);
914 cp = &c->next;
915 continue;
916 }
917 *cp = c->next;
918
919 ipmr_destroy_unres(c);
920 } 1157 }
921 spin_unlock_bh(&mfc_unres_lock); 1158 spin_unlock_bh(&mfc_unres_lock);
922 } 1159 }
@@ -925,16 +1162,19 @@ static void mroute_clean_tables(struct net *net)
925static void mrtsock_destruct(struct sock *sk) 1162static void mrtsock_destruct(struct sock *sk)
926{ 1163{
927 struct net *net = sock_net(sk); 1164 struct net *net = sock_net(sk);
1165 struct mr_table *mrt;
928 1166
929 rtnl_lock(); 1167 rtnl_lock();
930 if (sk == net->ipv4.mroute_sk) { 1168 ipmr_for_each_table(mrt, net) {
931 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1169 if (sk == mrt->mroute_sk) {
1170 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
932 1171
933 write_lock_bh(&mrt_lock); 1172 write_lock_bh(&mrt_lock);
934 net->ipv4.mroute_sk = NULL; 1173 mrt->mroute_sk = NULL;
935 write_unlock_bh(&mrt_lock); 1174 write_unlock_bh(&mrt_lock);
936 1175
937 mroute_clean_tables(net); 1176 mroute_clean_tables(mrt);
1177 }
938 } 1178 }
939 rtnl_unlock(); 1179 rtnl_unlock();
940} 1180}
@@ -952,9 +1192,14 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
952 struct vifctl vif; 1192 struct vifctl vif;
953 struct mfcctl mfc; 1193 struct mfcctl mfc;
954 struct net *net = sock_net(sk); 1194 struct net *net = sock_net(sk);
1195 struct mr_table *mrt;
1196
1197 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1198 if (mrt == NULL)
1199 return -ENOENT;
955 1200
956 if (optname != MRT_INIT) { 1201 if (optname != MRT_INIT) {
957 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN)) 1202 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
958 return -EACCES; 1203 return -EACCES;
959 } 1204 }
960 1205
@@ -967,7 +1212,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
967 return -ENOPROTOOPT; 1212 return -ENOPROTOOPT;
968 1213
969 rtnl_lock(); 1214 rtnl_lock();
970 if (net->ipv4.mroute_sk) { 1215 if (mrt->mroute_sk) {
971 rtnl_unlock(); 1216 rtnl_unlock();
972 return -EADDRINUSE; 1217 return -EADDRINUSE;
973 } 1218 }
@@ -975,7 +1220,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
975 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1220 ret = ip_ra_control(sk, 1, mrtsock_destruct);
976 if (ret == 0) { 1221 if (ret == 0) {
977 write_lock_bh(&mrt_lock); 1222 write_lock_bh(&mrt_lock);
978 net->ipv4.mroute_sk = sk; 1223 mrt->mroute_sk = sk;
979 write_unlock_bh(&mrt_lock); 1224 write_unlock_bh(&mrt_lock);
980 1225
981 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1226 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
@@ -983,7 +1228,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
983 rtnl_unlock(); 1228 rtnl_unlock();
984 return ret; 1229 return ret;
985 case MRT_DONE: 1230 case MRT_DONE:
986 if (sk != net->ipv4.mroute_sk) 1231 if (sk != mrt->mroute_sk)
987 return -EACCES; 1232 return -EACCES;
988 return ip_ra_control(sk, 0, NULL); 1233 return ip_ra_control(sk, 0, NULL);
989 case MRT_ADD_VIF: 1234 case MRT_ADD_VIF:
@@ -996,9 +1241,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
996 return -ENFILE; 1241 return -ENFILE;
997 rtnl_lock(); 1242 rtnl_lock();
998 if (optname == MRT_ADD_VIF) { 1243 if (optname == MRT_ADD_VIF) {
999 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); 1244 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1000 } else { 1245 } else {
1001 ret = vif_delete(net, vif.vifc_vifi, 0, NULL); 1246 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1002 } 1247 }
1003 rtnl_unlock(); 1248 rtnl_unlock();
1004 return ret; 1249 return ret;
@@ -1015,9 +1260,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1015 return -EFAULT; 1260 return -EFAULT;
1016 rtnl_lock(); 1261 rtnl_lock();
1017 if (optname == MRT_DEL_MFC) 1262 if (optname == MRT_DEL_MFC)
1018 ret = ipmr_mfc_delete(net, &mfc); 1263 ret = ipmr_mfc_delete(mrt, &mfc);
1019 else 1264 else
1020 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk); 1265 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1021 rtnl_unlock(); 1266 rtnl_unlock();
1022 return ret; 1267 return ret;
1023 /* 1268 /*
@@ -1028,7 +1273,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1028 int v; 1273 int v;
1029 if (get_user(v,(int __user *)optval)) 1274 if (get_user(v,(int __user *)optval))
1030 return -EFAULT; 1275 return -EFAULT;
1031 net->ipv4.mroute_do_assert = (v) ? 1 : 0; 1276 mrt->mroute_do_assert = (v) ? 1 : 0;
1032 return 0; 1277 return 0;
1033 } 1278 }
1034#ifdef CONFIG_IP_PIMSM 1279#ifdef CONFIG_IP_PIMSM
@@ -1042,14 +1287,35 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1042 1287
1043 rtnl_lock(); 1288 rtnl_lock();
1044 ret = 0; 1289 ret = 0;
1045 if (v != net->ipv4.mroute_do_pim) { 1290 if (v != mrt->mroute_do_pim) {
1046 net->ipv4.mroute_do_pim = v; 1291 mrt->mroute_do_pim = v;
1047 net->ipv4.mroute_do_assert = v; 1292 mrt->mroute_do_assert = v;
1048 } 1293 }
1049 rtnl_unlock(); 1294 rtnl_unlock();
1050 return ret; 1295 return ret;
1051 } 1296 }
1052#endif 1297#endif
1298#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1299 case MRT_TABLE:
1300 {
1301 u32 v;
1302
1303 if (optlen != sizeof(u32))
1304 return -EINVAL;
1305 if (get_user(v, (u32 __user *)optval))
1306 return -EFAULT;
1307 if (sk == mrt->mroute_sk)
1308 return -EBUSY;
1309
1310 rtnl_lock();
1311 ret = 0;
1312 if (!ipmr_new_table(net, v))
1313 ret = -ENOMEM;
1314 raw_sk(sk)->ipmr_table = v;
1315 rtnl_unlock();
1316 return ret;
1317 }
1318#endif
1053 /* 1319 /*
1054 * Spurious command, or MRT_VERSION which you cannot 1320 * Spurious command, or MRT_VERSION which you cannot
1055 * set. 1321 * set.
@@ -1068,6 +1334,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1068 int olr; 1334 int olr;
1069 int val; 1335 int val;
1070 struct net *net = sock_net(sk); 1336 struct net *net = sock_net(sk);
1337 struct mr_table *mrt;
1338
1339 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1340 if (mrt == NULL)
1341 return -ENOENT;
1071 1342
1072 if (optname != MRT_VERSION && 1343 if (optname != MRT_VERSION &&
1073#ifdef CONFIG_IP_PIMSM 1344#ifdef CONFIG_IP_PIMSM
@@ -1089,10 +1360,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1089 val = 0x0305; 1360 val = 0x0305;
1090#ifdef CONFIG_IP_PIMSM 1361#ifdef CONFIG_IP_PIMSM
1091 else if (optname == MRT_PIM) 1362 else if (optname == MRT_PIM)
1092 val = net->ipv4.mroute_do_pim; 1363 val = mrt->mroute_do_pim;
1093#endif 1364#endif
1094 else 1365 else
1095 val = net->ipv4.mroute_do_assert; 1366 val = mrt->mroute_do_assert;
1096 if (copy_to_user(optval, &val, olr)) 1367 if (copy_to_user(optval, &val, olr))
1097 return -EFAULT; 1368 return -EFAULT;
1098 return 0; 1369 return 0;
@@ -1109,16 +1380,21 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1109 struct vif_device *vif; 1380 struct vif_device *vif;
1110 struct mfc_cache *c; 1381 struct mfc_cache *c;
1111 struct net *net = sock_net(sk); 1382 struct net *net = sock_net(sk);
1383 struct mr_table *mrt;
1384
1385 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1386 if (mrt == NULL)
1387 return -ENOENT;
1112 1388
1113 switch (cmd) { 1389 switch (cmd) {
1114 case SIOCGETVIFCNT: 1390 case SIOCGETVIFCNT:
1115 if (copy_from_user(&vr, arg, sizeof(vr))) 1391 if (copy_from_user(&vr, arg, sizeof(vr)))
1116 return -EFAULT; 1392 return -EFAULT;
1117 if (vr.vifi >= net->ipv4.maxvif) 1393 if (vr.vifi >= mrt->maxvif)
1118 return -EINVAL; 1394 return -EINVAL;
1119 read_lock(&mrt_lock); 1395 read_lock(&mrt_lock);
1120 vif = &net->ipv4.vif_table[vr.vifi]; 1396 vif = &mrt->vif_table[vr.vifi];
1121 if (VIF_EXISTS(net, vr.vifi)) { 1397 if (VIF_EXISTS(mrt, vr.vifi)) {
1122 vr.icount = vif->pkt_in; 1398 vr.icount = vif->pkt_in;
1123 vr.ocount = vif->pkt_out; 1399 vr.ocount = vif->pkt_out;
1124 vr.ibytes = vif->bytes_in; 1400 vr.ibytes = vif->bytes_in;
@@ -1136,7 +1412,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1136 return -EFAULT; 1412 return -EFAULT;
1137 1413
1138 read_lock(&mrt_lock); 1414 read_lock(&mrt_lock);
1139 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr); 1415 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1140 if (c) { 1416 if (c) {
1141 sr.pktcnt = c->mfc_un.res.pkt; 1417 sr.pktcnt = c->mfc_un.res.pkt;
1142 sr.bytecnt = c->mfc_un.res.bytes; 1418 sr.bytecnt = c->mfc_un.res.bytes;
@@ -1159,16 +1435,20 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1159{ 1435{
1160 struct net_device *dev = ptr; 1436 struct net_device *dev = ptr;
1161 struct net *net = dev_net(dev); 1437 struct net *net = dev_net(dev);
1438 struct mr_table *mrt;
1162 struct vif_device *v; 1439 struct vif_device *v;
1163 int ct; 1440 int ct;
1164 LIST_HEAD(list); 1441 LIST_HEAD(list);
1165 1442
1166 if (event != NETDEV_UNREGISTER) 1443 if (event != NETDEV_UNREGISTER)
1167 return NOTIFY_DONE; 1444 return NOTIFY_DONE;
1168 v = &net->ipv4.vif_table[0]; 1445
1169 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { 1446 ipmr_for_each_table(mrt, net) {
1170 if (v->dev == dev) 1447 v = &mrt->vif_table[0];
1171 vif_delete(net, ct, 1, &list); 1448 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1449 if (v->dev == dev)
1450 vif_delete(mrt, ct, 1, &list);
1451 }
1172 } 1452 }
1173 unregister_netdevice_many(&list); 1453 unregister_netdevice_many(&list);
1174 return NOTIFY_DONE; 1454 return NOTIFY_DONE;
@@ -1227,11 +1507,11 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
1227 * Processing handlers for ipmr_forward 1507 * Processing handlers for ipmr_forward
1228 */ 1508 */
1229 1509
1230static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) 1510static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1511 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1231{ 1512{
1232 struct net *net = mfc_net(c);
1233 const struct iphdr *iph = ip_hdr(skb); 1513 const struct iphdr *iph = ip_hdr(skb);
1234 struct vif_device *vif = &net->ipv4.vif_table[vifi]; 1514 struct vif_device *vif = &mrt->vif_table[vifi];
1235 struct net_device *dev; 1515 struct net_device *dev;
1236 struct rtable *rt; 1516 struct rtable *rt;
1237 int encap = 0; 1517 int encap = 0;
@@ -1245,7 +1525,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1245 vif->bytes_out += skb->len; 1525 vif->bytes_out += skb->len;
1246 vif->dev->stats.tx_bytes += skb->len; 1526 vif->dev->stats.tx_bytes += skb->len;
1247 vif->dev->stats.tx_packets++; 1527 vif->dev->stats.tx_packets++;
1248 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT); 1528 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1249 goto out_free; 1529 goto out_free;
1250 } 1530 }
1251#endif 1531#endif
@@ -1328,12 +1608,12 @@ out_free:
1328 return; 1608 return;
1329} 1609}
1330 1610
1331static int ipmr_find_vif(struct net_device *dev) 1611static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1332{ 1612{
1333 struct net *net = dev_net(dev);
1334 int ct; 1613 int ct;
1335 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) { 1614
1336 if (net->ipv4.vif_table[ct].dev == dev) 1615 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1616 if (mrt->vif_table[ct].dev == dev)
1337 break; 1617 break;
1338 } 1618 }
1339 return ct; 1619 return ct;
@@ -1341,11 +1621,12 @@ static int ipmr_find_vif(struct net_device *dev)
1341 1621
1342/* "local" means that we should preserve one skb (for local delivery) */ 1622/* "local" means that we should preserve one skb (for local delivery) */
1343 1623
1344static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) 1624static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1625 struct sk_buff *skb, struct mfc_cache *cache,
1626 int local)
1345{ 1627{
1346 int psend = -1; 1628 int psend = -1;
1347 int vif, ct; 1629 int vif, ct;
1348 struct net *net = mfc_net(cache);
1349 1630
1350 vif = cache->mfc_parent; 1631 vif = cache->mfc_parent;
1351 cache->mfc_un.res.pkt++; 1632 cache->mfc_un.res.pkt++;
@@ -1354,7 +1635,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1354 /* 1635 /*
1355 * Wrong interface: drop packet and (maybe) send PIM assert. 1636 * Wrong interface: drop packet and (maybe) send PIM assert.
1356 */ 1637 */
1357 if (net->ipv4.vif_table[vif].dev != skb->dev) { 1638 if (mrt->vif_table[vif].dev != skb->dev) {
1358 int true_vifi; 1639 int true_vifi;
1359 1640
1360 if (skb_rtable(skb)->fl.iif == 0) { 1641 if (skb_rtable(skb)->fl.iif == 0) {
@@ -1373,26 +1654,26 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1373 } 1654 }
1374 1655
1375 cache->mfc_un.res.wrong_if++; 1656 cache->mfc_un.res.wrong_if++;
1376 true_vifi = ipmr_find_vif(skb->dev); 1657 true_vifi = ipmr_find_vif(mrt, skb->dev);
1377 1658
1378 if (true_vifi >= 0 && net->ipv4.mroute_do_assert && 1659 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1379 /* pimsm uses asserts, when switching from RPT to SPT, 1660 /* pimsm uses asserts, when switching from RPT to SPT,
1380 so that we cannot check that packet arrived on an oif. 1661 so that we cannot check that packet arrived on an oif.
1381 It is bad, but otherwise we would need to move pretty 1662 It is bad, but otherwise we would need to move pretty
1382 large chunk of pimd to kernel. Ough... --ANK 1663 large chunk of pimd to kernel. Ough... --ANK
1383 */ 1664 */
1384 (net->ipv4.mroute_do_pim || 1665 (mrt->mroute_do_pim ||
1385 cache->mfc_un.res.ttls[true_vifi] < 255) && 1666 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1386 time_after(jiffies, 1667 time_after(jiffies,
1387 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1668 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1388 cache->mfc_un.res.last_assert = jiffies; 1669 cache->mfc_un.res.last_assert = jiffies;
1389 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF); 1670 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1390 } 1671 }
1391 goto dont_forward; 1672 goto dont_forward;
1392 } 1673 }
1393 1674
1394 net->ipv4.vif_table[vif].pkt_in++; 1675 mrt->vif_table[vif].pkt_in++;
1395 net->ipv4.vif_table[vif].bytes_in += skb->len; 1676 mrt->vif_table[vif].bytes_in += skb->len;
1396 1677
1397 /* 1678 /*
1398 * Forward the frame 1679 * Forward the frame
@@ -1402,7 +1683,8 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1402 if (psend != -1) { 1683 if (psend != -1) {
1403 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1684 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1404 if (skb2) 1685 if (skb2)
1405 ipmr_queue_xmit(skb2, cache, psend); 1686 ipmr_queue_xmit(net, mrt, skb2, cache,
1687 psend);
1406 } 1688 }
1407 psend = ct; 1689 psend = ct;
1408 } 1690 }
@@ -1411,9 +1693,9 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1411 if (local) { 1693 if (local) {
1412 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1694 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1413 if (skb2) 1695 if (skb2)
1414 ipmr_queue_xmit(skb2, cache, psend); 1696 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1415 } else { 1697 } else {
1416 ipmr_queue_xmit(skb, cache, psend); 1698 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1417 return 0; 1699 return 0;
1418 } 1700 }
1419 } 1701 }
@@ -1434,6 +1716,8 @@ int ip_mr_input(struct sk_buff *skb)
1434 struct mfc_cache *cache; 1716 struct mfc_cache *cache;
1435 struct net *net = dev_net(skb->dev); 1717 struct net *net = dev_net(skb->dev);
1436 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 1718 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1719 struct mr_table *mrt;
1720 int err;
1437 1721
1438 /* Packet is looped back after forward, it should not be 1722 /* Packet is looped back after forward, it should not be
1439 forwarded second time, but still can be delivered locally. 1723 forwarded second time, but still can be delivered locally.
@@ -1441,6 +1725,10 @@ int ip_mr_input(struct sk_buff *skb)
1441 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1725 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1442 goto dont_forward; 1726 goto dont_forward;
1443 1727
1728 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1729 if (err < 0)
1730 return err;
1731
1444 if (!local) { 1732 if (!local) {
1445 if (IPCB(skb)->opt.router_alert) { 1733 if (IPCB(skb)->opt.router_alert) {
1446 if (ip_call_ra_chain(skb)) 1734 if (ip_call_ra_chain(skb))
@@ -1453,9 +1741,9 @@ int ip_mr_input(struct sk_buff *skb)
1453 that we can forward NO IGMP messages. 1741 that we can forward NO IGMP messages.
1454 */ 1742 */
1455 read_lock(&mrt_lock); 1743 read_lock(&mrt_lock);
1456 if (net->ipv4.mroute_sk) { 1744 if (mrt->mroute_sk) {
1457 nf_reset(skb); 1745 nf_reset(skb);
1458 raw_rcv(net->ipv4.mroute_sk, skb); 1746 raw_rcv(mrt->mroute_sk, skb);
1459 read_unlock(&mrt_lock); 1747 read_unlock(&mrt_lock);
1460 return 0; 1748 return 0;
1461 } 1749 }
@@ -1464,7 +1752,7 @@ int ip_mr_input(struct sk_buff *skb)
1464 } 1752 }
1465 1753
1466 read_lock(&mrt_lock); 1754 read_lock(&mrt_lock);
1467 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1755 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1468 1756
1469 /* 1757 /*
1470 * No usable cache entry 1758 * No usable cache entry
@@ -1482,9 +1770,9 @@ int ip_mr_input(struct sk_buff *skb)
1482 skb = skb2; 1770 skb = skb2;
1483 } 1771 }
1484 1772
1485 vif = ipmr_find_vif(skb->dev); 1773 vif = ipmr_find_vif(mrt, skb->dev);
1486 if (vif >= 0) { 1774 if (vif >= 0) {
1487 int err = ipmr_cache_unresolved(net, vif, skb); 1775 int err = ipmr_cache_unresolved(mrt, vif, skb);
1488 read_unlock(&mrt_lock); 1776 read_unlock(&mrt_lock);
1489 1777
1490 return err; 1778 return err;
@@ -1494,7 +1782,7 @@ int ip_mr_input(struct sk_buff *skb)
1494 return -ENODEV; 1782 return -ENODEV;
1495 } 1783 }
1496 1784
1497 ip_mr_forward(skb, cache, local); 1785 ip_mr_forward(net, mrt, skb, cache, local);
1498 1786
1499 read_unlock(&mrt_lock); 1787 read_unlock(&mrt_lock);
1500 1788
@@ -1511,11 +1799,11 @@ dont_forward:
1511} 1799}
1512 1800
1513#ifdef CONFIG_IP_PIMSM 1801#ifdef CONFIG_IP_PIMSM
1514static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) 1802static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1803 unsigned int pimlen)
1515{ 1804{
1516 struct net_device *reg_dev = NULL; 1805 struct net_device *reg_dev = NULL;
1517 struct iphdr *encap; 1806 struct iphdr *encap;
1518 struct net *net = dev_net(skb->dev);
1519 1807
1520 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1808 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1521 /* 1809 /*
@@ -1530,8 +1818,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1530 return 1; 1818 return 1;
1531 1819
1532 read_lock(&mrt_lock); 1820 read_lock(&mrt_lock);
1533 if (net->ipv4.mroute_reg_vif_num >= 0) 1821 if (mrt->mroute_reg_vif_num >= 0)
1534 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev; 1822 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1535 if (reg_dev) 1823 if (reg_dev)
1536 dev_hold(reg_dev); 1824 dev_hold(reg_dev);
1537 read_unlock(&mrt_lock); 1825 read_unlock(&mrt_lock);
@@ -1566,17 +1854,21 @@ int pim_rcv_v1(struct sk_buff * skb)
1566{ 1854{
1567 struct igmphdr *pim; 1855 struct igmphdr *pim;
1568 struct net *net = dev_net(skb->dev); 1856 struct net *net = dev_net(skb->dev);
1857 struct mr_table *mrt;
1569 1858
1570 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1859 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1571 goto drop; 1860 goto drop;
1572 1861
1573 pim = igmp_hdr(skb); 1862 pim = igmp_hdr(skb);
1574 1863
1575 if (!net->ipv4.mroute_do_pim || 1864 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1865 goto drop;
1866
1867 if (!mrt->mroute_do_pim ||
1576 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1868 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1577 goto drop; 1869 goto drop;
1578 1870
1579 if (__pim_rcv(skb, sizeof(*pim))) { 1871 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1580drop: 1872drop:
1581 kfree_skb(skb); 1873 kfree_skb(skb);
1582 } 1874 }
@@ -1588,6 +1880,8 @@ drop:
1588static int pim_rcv(struct sk_buff * skb) 1880static int pim_rcv(struct sk_buff * skb)
1589{ 1881{
1590 struct pimreghdr *pim; 1882 struct pimreghdr *pim;
1883 struct net *net = dev_net(skb->dev);
1884 struct mr_table *mrt;
1591 1885
1592 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1886 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1593 goto drop; 1887 goto drop;
@@ -1599,7 +1893,10 @@ static int pim_rcv(struct sk_buff * skb)
1599 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1893 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1600 goto drop; 1894 goto drop;
1601 1895
1602 if (__pim_rcv(skb, sizeof(*pim))) { 1896 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1897 goto drop;
1898
1899 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1603drop: 1900drop:
1604 kfree_skb(skb); 1901 kfree_skb(skb);
1605 } 1902 }
@@ -1608,28 +1905,31 @@ drop:
1608#endif 1905#endif
1609 1906
1610static int 1907static int
1611ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) 1908ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
1909 struct rtmsg *rtm)
1612{ 1910{
1613 int ct; 1911 int ct;
1614 struct rtnexthop *nhp; 1912 struct rtnexthop *nhp;
1615 struct net *net = mfc_net(c);
1616 struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1617 u8 *b = skb_tail_pointer(skb); 1913 u8 *b = skb_tail_pointer(skb);
1618 struct rtattr *mp_head; 1914 struct rtattr *mp_head;
1619 1915
1620 if (dev) 1916 /* If cache is unresolved, don't try to parse IIF and OIF */
1621 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex); 1917 if (c->mfc_parent > MAXVIFS)
1918 return -ENOENT;
1919
1920 if (VIF_EXISTS(mrt, c->mfc_parent))
1921 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1622 1922
1623 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 1923 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1624 1924
1625 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1925 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1626 if (c->mfc_un.res.ttls[ct] < 255) { 1926 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1627 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1927 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1628 goto rtattr_failure; 1928 goto rtattr_failure;
1629 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1929 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1630 nhp->rtnh_flags = 0; 1930 nhp->rtnh_flags = 0;
1631 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1931 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1632 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex; 1932 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1633 nhp->rtnh_len = sizeof(*nhp); 1933 nhp->rtnh_len = sizeof(*nhp);
1634 } 1934 }
1635 } 1935 }
@@ -1647,11 +1947,16 @@ int ipmr_get_route(struct net *net,
1647 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1947 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1648{ 1948{
1649 int err; 1949 int err;
1950 struct mr_table *mrt;
1650 struct mfc_cache *cache; 1951 struct mfc_cache *cache;
1651 struct rtable *rt = skb_rtable(skb); 1952 struct rtable *rt = skb_rtable(skb);
1652 1953
1954 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1955 if (mrt == NULL)
1956 return -ENOENT;
1957
1653 read_lock(&mrt_lock); 1958 read_lock(&mrt_lock);
1654 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst); 1959 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1655 1960
1656 if (cache == NULL) { 1961 if (cache == NULL) {
1657 struct sk_buff *skb2; 1962 struct sk_buff *skb2;
@@ -1665,7 +1970,7 @@ int ipmr_get_route(struct net *net,
1665 } 1970 }
1666 1971
1667 dev = skb->dev; 1972 dev = skb->dev;
1668 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { 1973 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1669 read_unlock(&mrt_lock); 1974 read_unlock(&mrt_lock);
1670 return -ENODEV; 1975 return -ENODEV;
1671 } 1976 }
@@ -1682,14 +1987,14 @@ int ipmr_get_route(struct net *net,
1682 iph->saddr = rt->rt_src; 1987 iph->saddr = rt->rt_src;
1683 iph->daddr = rt->rt_dst; 1988 iph->daddr = rt->rt_dst;
1684 iph->version = 0; 1989 iph->version = 0;
1685 err = ipmr_cache_unresolved(net, vif, skb2); 1990 err = ipmr_cache_unresolved(mrt, vif, skb2);
1686 read_unlock(&mrt_lock); 1991 read_unlock(&mrt_lock);
1687 return err; 1992 return err;
1688 } 1993 }
1689 1994
1690 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1995 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1691 cache->mfc_flags |= MFC_NOTIFY; 1996 cache->mfc_flags |= MFC_NOTIFY;
1692 err = ipmr_fill_mroute(skb, cache, rtm); 1997 err = ipmr_fill_mroute(mrt, skb, cache, rtm);
1693 read_unlock(&mrt_lock); 1998 read_unlock(&mrt_lock);
1694 return err; 1999 return err;
1695} 2000}
@@ -1700,6 +2005,7 @@ int ipmr_get_route(struct net *net,
1700 */ 2005 */
1701struct ipmr_vif_iter { 2006struct ipmr_vif_iter {
1702 struct seq_net_private p; 2007 struct seq_net_private p;
2008 struct mr_table *mrt;
1703 int ct; 2009 int ct;
1704}; 2010};
1705 2011
@@ -1707,11 +2013,13 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1707 struct ipmr_vif_iter *iter, 2013 struct ipmr_vif_iter *iter,
1708 loff_t pos) 2014 loff_t pos)
1709{ 2015{
1710 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) { 2016 struct mr_table *mrt = iter->mrt;
1711 if (!VIF_EXISTS(net, iter->ct)) 2017
2018 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2019 if (!VIF_EXISTS(mrt, iter->ct))
1712 continue; 2020 continue;
1713 if (pos-- == 0) 2021 if (pos-- == 0)
1714 return &net->ipv4.vif_table[iter->ct]; 2022 return &mrt->vif_table[iter->ct];
1715 } 2023 }
1716 return NULL; 2024 return NULL;
1717} 2025}
@@ -1719,7 +2027,15 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1719static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2027static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1720 __acquires(mrt_lock) 2028 __acquires(mrt_lock)
1721{ 2029{
2030 struct ipmr_vif_iter *iter = seq->private;
1722 struct net *net = seq_file_net(seq); 2031 struct net *net = seq_file_net(seq);
2032 struct mr_table *mrt;
2033
2034 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2035 if (mrt == NULL)
2036 return ERR_PTR(-ENOENT);
2037
2038 iter->mrt = mrt;
1723 2039
1724 read_lock(&mrt_lock); 2040 read_lock(&mrt_lock);
1725 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2041 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
@@ -1730,15 +2046,16 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1730{ 2046{
1731 struct ipmr_vif_iter *iter = seq->private; 2047 struct ipmr_vif_iter *iter = seq->private;
1732 struct net *net = seq_file_net(seq); 2048 struct net *net = seq_file_net(seq);
2049 struct mr_table *mrt = iter->mrt;
1733 2050
1734 ++*pos; 2051 ++*pos;
1735 if (v == SEQ_START_TOKEN) 2052 if (v == SEQ_START_TOKEN)
1736 return ipmr_vif_seq_idx(net, iter, 0); 2053 return ipmr_vif_seq_idx(net, iter, 0);
1737 2054
1738 while (++iter->ct < net->ipv4.maxvif) { 2055 while (++iter->ct < mrt->maxvif) {
1739 if (!VIF_EXISTS(net, iter->ct)) 2056 if (!VIF_EXISTS(mrt, iter->ct))
1740 continue; 2057 continue;
1741 return &net->ipv4.vif_table[iter->ct]; 2058 return &mrt->vif_table[iter->ct];
1742 } 2059 }
1743 return NULL; 2060 return NULL;
1744} 2061}
@@ -1751,7 +2068,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1751 2068
1752static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2069static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1753{ 2070{
1754 struct net *net = seq_file_net(seq); 2071 struct ipmr_vif_iter *iter = seq->private;
2072 struct mr_table *mrt = iter->mrt;
1755 2073
1756 if (v == SEQ_START_TOKEN) { 2074 if (v == SEQ_START_TOKEN) {
1757 seq_puts(seq, 2075 seq_puts(seq,
@@ -1762,7 +2080,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1762 2080
1763 seq_printf(seq, 2081 seq_printf(seq,
1764 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2082 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1765 vif - net->ipv4.vif_table, 2083 vif - mrt->vif_table,
1766 name, vif->bytes_in, vif->pkt_in, 2084 name, vif->bytes_in, vif->pkt_in,
1767 vif->bytes_out, vif->pkt_out, 2085 vif->bytes_out, vif->pkt_out,
1768 vif->flags, vif->local, vif->remote); 2086 vif->flags, vif->local, vif->remote);
@@ -1793,7 +2111,8 @@ static const struct file_operations ipmr_vif_fops = {
1793 2111
1794struct ipmr_mfc_iter { 2112struct ipmr_mfc_iter {
1795 struct seq_net_private p; 2113 struct seq_net_private p;
1796 struct mfc_cache **cache; 2114 struct mr_table *mrt;
2115 struct list_head *cache;
1797 int ct; 2116 int ct;
1798}; 2117};
1799 2118
@@ -1801,22 +2120,22 @@ struct ipmr_mfc_iter {
1801static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 2120static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1802 struct ipmr_mfc_iter *it, loff_t pos) 2121 struct ipmr_mfc_iter *it, loff_t pos)
1803{ 2122{
2123 struct mr_table *mrt = it->mrt;
1804 struct mfc_cache *mfc; 2124 struct mfc_cache *mfc;
1805 2125
1806 it->cache = net->ipv4.mfc_cache_array;
1807 read_lock(&mrt_lock); 2126 read_lock(&mrt_lock);
1808 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 2127 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
1809 for (mfc = net->ipv4.mfc_cache_array[it->ct]; 2128 it->cache = &mrt->mfc_cache_array[it->ct];
1810 mfc; mfc = mfc->next) 2129 list_for_each_entry(mfc, it->cache, list)
1811 if (pos-- == 0) 2130 if (pos-- == 0)
1812 return mfc; 2131 return mfc;
2132 }
1813 read_unlock(&mrt_lock); 2133 read_unlock(&mrt_lock);
1814 2134
1815 it->cache = &mfc_unres_queue;
1816 spin_lock_bh(&mfc_unres_lock); 2135 spin_lock_bh(&mfc_unres_lock);
1817 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) 2136 it->cache = &mrt->mfc_unres_queue;
1818 if (net_eq(mfc_net(mfc), net) && 2137 list_for_each_entry(mfc, it->cache, list)
1819 pos-- == 0) 2138 if (pos-- == 0)
1820 return mfc; 2139 return mfc;
1821 spin_unlock_bh(&mfc_unres_lock); 2140 spin_unlock_bh(&mfc_unres_lock);
1822 2141
@@ -1829,7 +2148,13 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1829{ 2148{
1830 struct ipmr_mfc_iter *it = seq->private; 2149 struct ipmr_mfc_iter *it = seq->private;
1831 struct net *net = seq_file_net(seq); 2150 struct net *net = seq_file_net(seq);
2151 struct mr_table *mrt;
2152
2153 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2154 if (mrt == NULL)
2155 return ERR_PTR(-ENOENT);
1832 2156
2157 it->mrt = mrt;
1833 it->cache = NULL; 2158 it->cache = NULL;
1834 it->ct = 0; 2159 it->ct = 0;
1835 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 2160 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
@@ -1841,37 +2166,36 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1841 struct mfc_cache *mfc = v; 2166 struct mfc_cache *mfc = v;
1842 struct ipmr_mfc_iter *it = seq->private; 2167 struct ipmr_mfc_iter *it = seq->private;
1843 struct net *net = seq_file_net(seq); 2168 struct net *net = seq_file_net(seq);
2169 struct mr_table *mrt = it->mrt;
1844 2170
1845 ++*pos; 2171 ++*pos;
1846 2172
1847 if (v == SEQ_START_TOKEN) 2173 if (v == SEQ_START_TOKEN)
1848 return ipmr_mfc_seq_idx(net, seq->private, 0); 2174 return ipmr_mfc_seq_idx(net, seq->private, 0);
1849 2175
1850 if (mfc->next) 2176 if (mfc->list.next != it->cache)
1851 return mfc->next; 2177 return list_entry(mfc->list.next, struct mfc_cache, list);
1852 2178
1853 if (it->cache == &mfc_unres_queue) 2179 if (it->cache == &mrt->mfc_unres_queue)
1854 goto end_of_list; 2180 goto end_of_list;
1855 2181
1856 BUG_ON(it->cache != net->ipv4.mfc_cache_array); 2182 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1857 2183
1858 while (++it->ct < MFC_LINES) { 2184 while (++it->ct < MFC_LINES) {
1859 mfc = net->ipv4.mfc_cache_array[it->ct]; 2185 it->cache = &mrt->mfc_cache_array[it->ct];
1860 if (mfc) 2186 if (list_empty(it->cache))
1861 return mfc; 2187 continue;
2188 return list_first_entry(it->cache, struct mfc_cache, list);
1862 } 2189 }
1863 2190
1864 /* exhausted cache_array, show unresolved */ 2191 /* exhausted cache_array, show unresolved */
1865 read_unlock(&mrt_lock); 2192 read_unlock(&mrt_lock);
1866 it->cache = &mfc_unres_queue; 2193 it->cache = &mrt->mfc_unres_queue;
1867 it->ct = 0; 2194 it->ct = 0;
1868 2195
1869 spin_lock_bh(&mfc_unres_lock); 2196 spin_lock_bh(&mfc_unres_lock);
1870 mfc = mfc_unres_queue; 2197 if (!list_empty(it->cache))
1871 while (mfc && !net_eq(mfc_net(mfc), net)) 2198 return list_first_entry(it->cache, struct mfc_cache, list);
1872 mfc = mfc->next;
1873 if (mfc)
1874 return mfc;
1875 2199
1876 end_of_list: 2200 end_of_list:
1877 spin_unlock_bh(&mfc_unres_lock); 2201 spin_unlock_bh(&mfc_unres_lock);
@@ -1883,18 +2207,17 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1883static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 2207static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1884{ 2208{
1885 struct ipmr_mfc_iter *it = seq->private; 2209 struct ipmr_mfc_iter *it = seq->private;
1886 struct net *net = seq_file_net(seq); 2210 struct mr_table *mrt = it->mrt;
1887 2211
1888 if (it->cache == &mfc_unres_queue) 2212 if (it->cache == &mrt->mfc_unres_queue)
1889 spin_unlock_bh(&mfc_unres_lock); 2213 spin_unlock_bh(&mfc_unres_lock);
1890 else if (it->cache == net->ipv4.mfc_cache_array) 2214 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1891 read_unlock(&mrt_lock); 2215 read_unlock(&mrt_lock);
1892} 2216}
1893 2217
1894static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2218static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1895{ 2219{
1896 int n; 2220 int n;
1897 struct net *net = seq_file_net(seq);
1898 2221
1899 if (v == SEQ_START_TOKEN) { 2222 if (v == SEQ_START_TOKEN) {
1900 seq_puts(seq, 2223 seq_puts(seq,
@@ -1902,20 +2225,21 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1902 } else { 2225 } else {
1903 const struct mfc_cache *mfc = v; 2226 const struct mfc_cache *mfc = v;
1904 const struct ipmr_mfc_iter *it = seq->private; 2227 const struct ipmr_mfc_iter *it = seq->private;
2228 const struct mr_table *mrt = it->mrt;
1905 2229
1906 seq_printf(seq, "%08lX %08lX %-3hd", 2230 seq_printf(seq, "%08lX %08lX %-3hd",
1907 (unsigned long) mfc->mfc_mcastgrp, 2231 (unsigned long) mfc->mfc_mcastgrp,
1908 (unsigned long) mfc->mfc_origin, 2232 (unsigned long) mfc->mfc_origin,
1909 mfc->mfc_parent); 2233 mfc->mfc_parent);
1910 2234
1911 if (it->cache != &mfc_unres_queue) { 2235 if (it->cache != &mrt->mfc_unres_queue) {
1912 seq_printf(seq, " %8lu %8lu %8lu", 2236 seq_printf(seq, " %8lu %8lu %8lu",
1913 mfc->mfc_un.res.pkt, 2237 mfc->mfc_un.res.pkt,
1914 mfc->mfc_un.res.bytes, 2238 mfc->mfc_un.res.bytes,
1915 mfc->mfc_un.res.wrong_if); 2239 mfc->mfc_un.res.wrong_if);
1916 for (n = mfc->mfc_un.res.minvif; 2240 for (n = mfc->mfc_un.res.minvif;
1917 n < mfc->mfc_un.res.maxvif; n++ ) { 2241 n < mfc->mfc_un.res.maxvif; n++ ) {
1918 if (VIF_EXISTS(net, n) && 2242 if (VIF_EXISTS(mrt, n) &&
1919 mfc->mfc_un.res.ttls[n] < 255) 2243 mfc->mfc_un.res.ttls[n] < 255)
1920 seq_printf(seq, 2244 seq_printf(seq,
1921 " %2d:%-3d", 2245 " %2d:%-3d",
@@ -1967,27 +2291,11 @@ static const struct net_protocol pim_protocol = {
1967 */ 2291 */
1968static int __net_init ipmr_net_init(struct net *net) 2292static int __net_init ipmr_net_init(struct net *net)
1969{ 2293{
1970 int err = 0; 2294 int err;
1971 2295
1972 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device), 2296 err = ipmr_rules_init(net);
1973 GFP_KERNEL); 2297 if (err < 0)
1974 if (!net->ipv4.vif_table) {
1975 err = -ENOMEM;
1976 goto fail; 2298 goto fail;
1977 }
1978
1979 /* Forwarding cache */
1980 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1981 sizeof(struct mfc_cache *),
1982 GFP_KERNEL);
1983 if (!net->ipv4.mfc_cache_array) {
1984 err = -ENOMEM;
1985 goto fail_mfc_cache;
1986 }
1987
1988#ifdef CONFIG_IP_PIMSM
1989 net->ipv4.mroute_reg_vif_num = -1;
1990#endif
1991 2299
1992#ifdef CONFIG_PROC_FS 2300#ifdef CONFIG_PROC_FS
1993 err = -ENOMEM; 2301 err = -ENOMEM;
@@ -2002,10 +2310,8 @@ static int __net_init ipmr_net_init(struct net *net)
2002proc_cache_fail: 2310proc_cache_fail:
2003 proc_net_remove(net, "ip_mr_vif"); 2311 proc_net_remove(net, "ip_mr_vif");
2004proc_vif_fail: 2312proc_vif_fail:
2005 kfree(net->ipv4.mfc_cache_array); 2313 ipmr_rules_exit(net);
2006#endif 2314#endif
2007fail_mfc_cache:
2008 kfree(net->ipv4.vif_table);
2009fail: 2315fail:
2010 return err; 2316 return err;
2011} 2317}
@@ -2016,8 +2322,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
2016 proc_net_remove(net, "ip_mr_cache"); 2322 proc_net_remove(net, "ip_mr_cache");
2017 proc_net_remove(net, "ip_mr_vif"); 2323 proc_net_remove(net, "ip_mr_vif");
2018#endif 2324#endif
2019 kfree(net->ipv4.mfc_cache_array); 2325 ipmr_rules_exit(net);
2020 kfree(net->ipv4.vif_table);
2021} 2326}
2022 2327
2023static struct pernet_operations ipmr_net_ops = { 2328static struct pernet_operations ipmr_net_ops = {
@@ -2040,7 +2345,6 @@ int __init ip_mr_init(void)
2040 if (err) 2345 if (err)
2041 goto reg_pernet_fail; 2346 goto reg_pernet_fail;
2042 2347
2043 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2044 err = register_netdevice_notifier(&ip_mr_notifier); 2348 err = register_netdevice_notifier(&ip_mr_notifier);
2045 if (err) 2349 if (err)
2046 goto reg_notif_fail; 2350 goto reg_notif_fail;
@@ -2058,7 +2362,6 @@ add_proto_fail:
2058 unregister_netdevice_notifier(&ip_mr_notifier); 2362 unregister_netdevice_notifier(&ip_mr_notifier);
2059#endif 2363#endif
2060reg_notif_fail: 2364reg_notif_fail:
2061 del_timer(&ipmr_expire_timer);
2062 unregister_pernet_subsys(&ipmr_net_ops); 2365 unregister_pernet_subsys(&ipmr_net_ops);
2063reg_pernet_fail: 2366reg_pernet_fail:
2064 kmem_cache_destroy(mrt_cachep); 2367 kmem_cache_destroy(mrt_cachep);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index c14623fc4d5e..82fb43c5c59e 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -4,6 +4,7 @@
4#include <linux/netfilter_ipv4.h> 4#include <linux/netfilter_ipv4.h>
5#include <linux/ip.h> 5#include <linux/ip.h>
6#include <linux/skbuff.h> 6#include <linux/skbuff.h>
7#include <linux/gfp.h>
7#include <net/route.h> 8#include <net/route.h>
8#include <net/xfrm.h> 9#include <net/xfrm.h>
9#include <net/ip.h> 10#include <net/ip.h>
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index bfe26f32b930..79ca5e70d497 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -8,6 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/netfilter/x_tables.h> 9#include <linux/netfilter/x_tables.h>
10#include <linux/netfilter_arp/arp_tables.h> 10#include <linux/netfilter_arp/arp_tables.h>
11#include <linux/slab.h>
11 12
12MODULE_LICENSE("GPL"); 13MODULE_LICENSE("GPL");
13MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); 14MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index d781513282d4..c838238104f5 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -26,6 +26,7 @@
26#include <linux/security.h> 26#include <linux/security.h>
27#include <linux/net.h> 27#include <linux/net.h>
28#include <linux/mutex.h> 28#include <linux/mutex.h>
29#include <linux/slab.h>
29#include <net/net_namespace.h> 30#include <net/net_namespace.h>
30#include <net/sock.h> 31#include <net/sock.h>
31#include <net/route.h> 32#include <net/route.h>
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index c6be74e57264..8815d458de46 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -15,6 +15,7 @@
15#include <linux/jhash.h> 15#include <linux/jhash.h>
16#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/slab.h>
18#include <linux/ip.h> 19#include <linux/ip.h>
19#include <linux/tcp.h> 20#include <linux/tcp.h>
20#include <linux/udp.h> 21#include <linux/udp.h>
@@ -88,7 +89,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
88 list_del(&c->list); 89 list_del(&c->list);
89 write_unlock_bh(&clusterip_lock); 90 write_unlock_bh(&clusterip_lock);
90 91
91 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); 92 dev_mc_del(c->dev, c->clustermac);
92 dev_put(c->dev); 93 dev_put(c->dev);
93 94
94 /* In case anyone still accesses the file, the open/close 95 /* In case anyone still accesses the file, the open/close
@@ -397,7 +398,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
397 dev_put(dev); 398 dev_put(dev);
398 return -ENOMEM; 399 return -ENOMEM;
399 } 400 }
400 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); 401 dev_mc_add(config->dev, config->clustermac);
401 } 402 }
402 } 403 }
403 cipinfo->config = config; 404 cipinfo->config = config;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 038fa0bb8f6b..a86135a28058 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -12,6 +12,7 @@
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/slab.h>
15#include <linux/ip.h> 16#include <linux/ip.h>
16#include <linux/udp.h> 17#include <linux/udp.h>
17#include <linux/icmp.h> 18#include <linux/icmp.h>
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 04c86dc5d538..8f60749e87a3 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -33,6 +33,7 @@
33#include <linux/module.h> 33#include <linux/module.h>
34#include <linux/spinlock.h> 34#include <linux/spinlock.h>
35#include <linux/socket.h> 35#include <linux/socket.h>
36#include <linux/slab.h>
36#include <linux/skbuff.h> 37#include <linux/skbuff.h>
37#include <linux/kernel.h> 38#include <linux/kernel.h>
38#include <linux/timer.h> 39#include <linux/timer.h>
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index c8dc9800d620..55392466daa4 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/moduleparam.h> 14#include <linux/moduleparam.h>
15#include <linux/netfilter_ipv4/ip_tables.h> 15#include <linux/netfilter_ipv4/ip_tables.h>
16#include <linux/slab.h>
16#include <net/ip.h> 17#include <net/ip.h>
17 18
18MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index b9b83464cbf4..294a2a32f293 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -12,6 +12,7 @@
12#include <linux/netfilter_ipv4/ip_tables.h> 12#include <linux/netfilter_ipv4/ip_tables.h>
13#include <linux/netdevice.h> 13#include <linux/netdevice.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/slab.h>
15#include <net/sock.h> 16#include <net/sock.h>
16#include <net/route.h> 17#include <net/route.h>
17#include <linux/ip.h> 18#include <linux/ip.h>
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 06fb9d11953c..07fb710cd722 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -5,6 +5,7 @@
5 */ 5 */
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/netfilter_ipv4/ip_tables.h> 7#include <linux/netfilter_ipv4/ip_tables.h>
8#include <linux/slab.h>
8#include <net/ip.h> 9#include <net/ip.h>
9 10
10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) 11#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index cce2f64e6f21..be45bdc4c602 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -17,6 +17,7 @@
17 */ 17 */
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/netfilter_ipv4/ip_tables.h> 19#include <linux/netfilter_ipv4/ip_tables.h>
20#include <linux/slab.h>
20#include <net/ip.h> 21#include <net/ip.h>
21 22
22MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4595281c2863..4f8bddb760c9 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -12,6 +12,7 @@
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/timer.h> 13#include <linux/timer.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/gfp.h>
15#include <net/checksum.h> 16#include <net/checksum.h>
16#include <net/icmp.h> 17#include <net/icmp.h>
17#include <net/ip.h> 18#include <net/ip.h>
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 4b6af4bb1f50..4a0c6b548eee 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -8,6 +8,7 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/gfp.h>
11#include <linux/kmod.h> 12#include <linux/kmod.h>
12#include <linux/types.h> 13#include <linux/types.h>
13#include <linux/timer.h> 14#include <linux/timer.h>
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index b66137c80bc7..b48a0fc3d9ed 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -16,6 +16,7 @@
16#include <linux/kmod.h> 16#include <linux/kmod.h>
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19#include <linux/slab.h>
19#include <net/checksum.h> 20#include <net/checksum.h>
20#include <net/route.h> 21#include <net/route.h>
21#include <linux/bitops.h> 22#include <linux/bitops.h>
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 0b9c7ce3d6c5..4d85b6e55f29 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -43,6 +43,7 @@
43#include <linux/moduleparam.h> 43#include <linux/moduleparam.h>
44#include <linux/types.h> 44#include <linux/types.h>
45#include <linux/kernel.h> 45#include <linux/kernel.h>
46#include <linux/slab.h>
46#include <linux/in.h> 47#include <linux/in.h>
47#include <linux/ip.h> 48#include <linux/ip.h>
48#include <linux/udp.h> 49#include <linux/udp.h>
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 0b49248e34fa..84c7974f5830 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -7,6 +7,7 @@
7 */ 7 */
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/icmp.h> 9#include <linux/icmp.h>
10#include <linux/gfp.h>
10#include <linux/ip.h> 11#include <linux/ip.h>
11#include <linux/netfilter.h> 12#include <linux/netfilter.h>
12#include <linux/netfilter_ipv4.h> 13#include <linux/netfilter_ipv4.h>
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4f1f337f4337..3dc9914c1dce 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -251,6 +251,7 @@ static const struct snmp_mib snmp4_net_list[] = {
251 SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), 251 SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
252 SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), 252 SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), 253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
254 SNMP_MIB_SENTINEL 255 SNMP_MIB_SENTINEL
255}; 256};
256 257
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 34d9adb83590..bbda0d5f9244 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -60,7 +60,6 @@
60#include <net/net_namespace.h> 60#include <net/net_namespace.h>
61#include <net/dst.h> 61#include <net/dst.h>
62#include <net/sock.h> 62#include <net/sock.h>
63#include <linux/gfp.h>
64#include <linux/ip.h> 63#include <linux/ip.h>
65#include <linux/net.h> 64#include <linux/net.h>
66#include <net/ip.h> 65#include <net/ip.h>
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a770df2493d2..cb562fdd9b9a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -90,6 +90,7 @@
90#include <linux/jhash.h> 90#include <linux/jhash.h>
91#include <linux/rcupdate.h> 91#include <linux/rcupdate.h>
92#include <linux/times.h> 92#include <linux/times.h>
93#include <linux/slab.h>
93#include <net/dst.h> 94#include <net/dst.h>
94#include <net/net_namespace.h> 95#include <net/net_namespace.h>
95#include <net/protocol.h> 96#include <net/protocol.h>
@@ -1097,7 +1098,7 @@ static int slow_chain_length(const struct rtable *head)
1097} 1098}
1098 1099
1099static int rt_intern_hash(unsigned hash, struct rtable *rt, 1100static int rt_intern_hash(unsigned hash, struct rtable *rt,
1100 struct rtable **rp, struct sk_buff *skb) 1101 struct rtable **rp, struct sk_buff *skb, int ifindex)
1101{ 1102{
1102 struct rtable *rth, **rthp; 1103 struct rtable *rth, **rthp;
1103 unsigned long now; 1104 unsigned long now;
@@ -1212,11 +1213,16 @@ restart:
1212 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { 1213 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
1213 struct net *net = dev_net(rt->u.dst.dev); 1214 struct net *net = dev_net(rt->u.dst.dev);
1214 int num = ++net->ipv4.current_rt_cache_rebuild_count; 1215 int num = ++net->ipv4.current_rt_cache_rebuild_count;
1215 if (!rt_caching(dev_net(rt->u.dst.dev))) { 1216 if (!rt_caching(net)) {
1216 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", 1217 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
1217 rt->u.dst.dev->name, num); 1218 rt->u.dst.dev->name, num);
1218 } 1219 }
1219 rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); 1220 rt_emergency_hash_rebuild(net);
1221 spin_unlock_bh(rt_hash_lock_addr(hash));
1222
1223 hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1224 ifindex, rt_genid(net));
1225 goto restart;
1220 } 1226 }
1221 } 1227 }
1222 1228
@@ -1441,7 +1447,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1441 dev_hold(rt->u.dst.dev); 1447 dev_hold(rt->u.dst.dev);
1442 if (rt->idev) 1448 if (rt->idev)
1443 in_dev_hold(rt->idev); 1449 in_dev_hold(rt->idev);
1444 rt->u.dst.obsolete = 0; 1450 rt->u.dst.obsolete = -1;
1445 rt->u.dst.lastuse = jiffies; 1451 rt->u.dst.lastuse = jiffies;
1446 rt->u.dst.path = &rt->u.dst; 1452 rt->u.dst.path = &rt->u.dst;
1447 rt->u.dst.neighbour = NULL; 1453 rt->u.dst.neighbour = NULL;
@@ -1477,7 +1483,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1477 &netevent); 1483 &netevent);
1478 1484
1479 rt_del(hash, rth); 1485 rt_del(hash, rth);
1480 if (!rt_intern_hash(hash, rt, &rt, NULL)) 1486 if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif))
1481 ip_rt_put(rt); 1487 ip_rt_put(rt);
1482 goto do_next; 1488 goto do_next;
1483 } 1489 }
@@ -1506,11 +1512,12 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1506 struct dst_entry *ret = dst; 1512 struct dst_entry *ret = dst;
1507 1513
1508 if (rt) { 1514 if (rt) {
1509 if (dst->obsolete) { 1515 if (dst->obsolete > 0) {
1510 ip_rt_put(rt); 1516 ip_rt_put(rt);
1511 ret = NULL; 1517 ret = NULL;
1512 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1518 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1513 rt->u.dst.expires) { 1519 (rt->u.dst.expires &&
1520 time_after_eq(jiffies, rt->u.dst.expires))) {
1514 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1521 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1515 rt->fl.oif, 1522 rt->fl.oif,
1516 rt_genid(dev_net(dst->dev))); 1523 rt_genid(dev_net(dst->dev)));
@@ -1726,7 +1733,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1726 1733
1727static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1734static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1728{ 1735{
1729 return NULL; 1736 if (rt_is_expired((struct rtable *)dst))
1737 return NULL;
1738 return dst;
1730} 1739}
1731 1740
1732static void ipv4_dst_destroy(struct dst_entry *dst) 1741static void ipv4_dst_destroy(struct dst_entry *dst)
@@ -1888,7 +1897,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1888 if (!rth) 1897 if (!rth)
1889 goto e_nobufs; 1898 goto e_nobufs;
1890 1899
1891 rth->u.dst.output= ip_rt_bug; 1900 rth->u.dst.output = ip_rt_bug;
1901 rth->u.dst.obsolete = -1;
1892 1902
1893 atomic_set(&rth->u.dst.__refcnt, 1); 1903 atomic_set(&rth->u.dst.__refcnt, 1);
1894 rth->u.dst.flags= DST_HOST; 1904 rth->u.dst.flags= DST_HOST;
@@ -1927,7 +1937,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1927 1937
1928 in_dev_put(in_dev); 1938 in_dev_put(in_dev);
1929 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1939 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1930 return rt_intern_hash(hash, rth, NULL, skb); 1940 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
1931 1941
1932e_nobufs: 1942e_nobufs:
1933 in_dev_put(in_dev); 1943 in_dev_put(in_dev);
@@ -2054,6 +2064,7 @@ static int __mkroute_input(struct sk_buff *skb,
2054 rth->fl.oif = 0; 2064 rth->fl.oif = 0;
2055 rth->rt_spec_dst= spec_dst; 2065 rth->rt_spec_dst= spec_dst;
2056 2066
2067 rth->u.dst.obsolete = -1;
2057 rth->u.dst.input = ip_forward; 2068 rth->u.dst.input = ip_forward;
2058 rth->u.dst.output = ip_output; 2069 rth->u.dst.output = ip_output;
2059 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); 2070 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
@@ -2093,7 +2104,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2093 /* put it into the cache */ 2104 /* put it into the cache */
2094 hash = rt_hash(daddr, saddr, fl->iif, 2105 hash = rt_hash(daddr, saddr, fl->iif,
2095 rt_genid(dev_net(rth->u.dst.dev))); 2106 rt_genid(dev_net(rth->u.dst.dev)));
2096 return rt_intern_hash(hash, rth, NULL, skb); 2107 return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
2097} 2108}
2098 2109
2099/* 2110/*
@@ -2218,6 +2229,7 @@ local_input:
2218 goto e_nobufs; 2229 goto e_nobufs;
2219 2230
2220 rth->u.dst.output= ip_rt_bug; 2231 rth->u.dst.output= ip_rt_bug;
2232 rth->u.dst.obsolete = -1;
2221 rth->rt_genid = rt_genid(net); 2233 rth->rt_genid = rt_genid(net);
2222 2234
2223 atomic_set(&rth->u.dst.__refcnt, 1); 2235 atomic_set(&rth->u.dst.__refcnt, 1);
@@ -2249,7 +2261,7 @@ local_input:
2249 } 2261 }
2250 rth->rt_type = res.type; 2262 rth->rt_type = res.type;
2251 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2263 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2252 err = rt_intern_hash(hash, rth, NULL, skb); 2264 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
2253 goto done; 2265 goto done;
2254 2266
2255no_route: 2267no_route:
@@ -2444,6 +2456,7 @@ static int __mkroute_output(struct rtable **result,
2444 rth->rt_spec_dst= fl->fl4_src; 2456 rth->rt_spec_dst= fl->fl4_src;
2445 2457
2446 rth->u.dst.output=ip_output; 2458 rth->u.dst.output=ip_output;
2459 rth->u.dst.obsolete = -1;
2447 rth->rt_genid = rt_genid(dev_net(dev_out)); 2460 rth->rt_genid = rt_genid(dev_net(dev_out));
2448 2461
2449 RT_CACHE_STAT_INC(out_slow_tot); 2462 RT_CACHE_STAT_INC(out_slow_tot);
@@ -2495,7 +2508,7 @@ static int ip_mkroute_output(struct rtable **rp,
2495 if (err == 0) { 2508 if (err == 0) {
2496 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, 2509 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
2497 rt_genid(dev_net(dev_out))); 2510 rt_genid(dev_net(dev_out)));
2498 err = rt_intern_hash(hash, rth, rp, NULL); 2511 err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif);
2499 } 2512 }
2500 2513
2501 return err; 2514 return err;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c1bc074f61b7..1cd5c15174b8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -12,6 +12,7 @@
12#include <linux/inetdevice.h> 12#include <linux/inetdevice.h>
13#include <linux/seqlock.h> 13#include <linux/seqlock.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/slab.h>
15#include <net/snmp.h> 16#include <net/snmp.h>
16#include <net/icmp.h> 17#include <net/icmp.h>
17#include <net/ip.h> 18#include <net/ip.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5901010fad55..0f8caf64caa3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -265,6 +265,7 @@
265#include <linux/err.h> 265#include <linux/err.h>
266#include <linux/crypto.h> 266#include <linux/crypto.h>
267#include <linux/time.h> 267#include <linux/time.h>
268#include <linux/slab.h>
268 269
269#include <net/icmp.h> 270#include <net/icmp.h>
270#include <net/tcp.h> 271#include <net/tcp.h>
@@ -429,7 +430,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
429 if (tp->urg_seq == tp->copied_seq && 430 if (tp->urg_seq == tp->copied_seq &&
430 !sock_flag(sk, SOCK_URGINLINE) && 431 !sock_flag(sk, SOCK_URGINLINE) &&
431 tp->urg_data) 432 tp->urg_data)
432 target--; 433 target++;
433 434
434 /* Potential race condition. If read of tp below will 435 /* Potential race condition. If read of tp below will
435 * escape above sk->sk_state, we can be illegally awaken 436 * escape above sk->sk_state, we can be illegally awaken
@@ -1254,6 +1255,39 @@ static void tcp_prequeue_process(struct sock *sk)
1254 tp->ucopy.memory = 0; 1255 tp->ucopy.memory = 0;
1255} 1256}
1256 1257
1258#ifdef CONFIG_NET_DMA
1259static void tcp_service_net_dma(struct sock *sk, bool wait)
1260{
1261 dma_cookie_t done, used;
1262 dma_cookie_t last_issued;
1263 struct tcp_sock *tp = tcp_sk(sk);
1264
1265 if (!tp->ucopy.dma_chan)
1266 return;
1267
1268 last_issued = tp->ucopy.dma_cookie;
1269 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1270
1271 do {
1272 if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1273 last_issued, &done,
1274 &used) == DMA_SUCCESS) {
1275 /* Safe to free early-copied skbs now */
1276 __skb_queue_purge(&sk->sk_async_wait_queue);
1277 break;
1278 } else {
1279 struct sk_buff *skb;
1280 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1281 (dma_async_is_complete(skb->dma_cookie, done,
1282 used) == DMA_SUCCESS)) {
1283 __skb_dequeue(&sk->sk_async_wait_queue);
1284 kfree_skb(skb);
1285 }
1286 }
1287 } while (wait);
1288}
1289#endif
1290
1257static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) 1291static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1258{ 1292{
1259 struct sk_buff *skb; 1293 struct sk_buff *skb;
@@ -1335,6 +1369,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1335 sk_eat_skb(sk, skb, 0); 1369 sk_eat_skb(sk, skb, 0);
1336 if (!desc->count) 1370 if (!desc->count)
1337 break; 1371 break;
1372 tp->copied_seq = seq;
1338 } 1373 }
1339 tp->copied_seq = seq; 1374 tp->copied_seq = seq;
1340 1375
@@ -1546,6 +1581,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1546 /* __ Set realtime policy in scheduler __ */ 1581 /* __ Set realtime policy in scheduler __ */
1547 } 1582 }
1548 1583
1584#ifdef CONFIG_NET_DMA
1585 if (tp->ucopy.dma_chan)
1586 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1587#endif
1549 if (copied >= target) { 1588 if (copied >= target) {
1550 /* Do not sleep, just process backlog. */ 1589 /* Do not sleep, just process backlog. */
1551 release_sock(sk); 1590 release_sock(sk);
@@ -1554,6 +1593,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1554 sk_wait_data(sk, &timeo); 1593 sk_wait_data(sk, &timeo);
1555 1594
1556#ifdef CONFIG_NET_DMA 1595#ifdef CONFIG_NET_DMA
1596 tcp_service_net_dma(sk, false); /* Don't block */
1557 tp->ucopy.wakeup = 0; 1597 tp->ucopy.wakeup = 0;
1558#endif 1598#endif
1559 1599
@@ -1633,6 +1673,9 @@ do_prequeue:
1633 copied = -EFAULT; 1673 copied = -EFAULT;
1634 break; 1674 break;
1635 } 1675 }
1676
1677 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1678
1636 if ((offset + used) == skb->len) 1679 if ((offset + used) == skb->len)
1637 copied_early = 1; 1680 copied_early = 1;
1638 1681
@@ -1702,27 +1745,9 @@ skip_copy:
1702 } 1745 }
1703 1746
1704#ifdef CONFIG_NET_DMA 1747#ifdef CONFIG_NET_DMA
1705 if (tp->ucopy.dma_chan) { 1748 tcp_service_net_dma(sk, true); /* Wait for queue to drain */
1706 dma_cookie_t done, used; 1749 tp->ucopy.dma_chan = NULL;
1707
1708 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1709
1710 while (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1711 tp->ucopy.dma_cookie, &done,
1712 &used) == DMA_IN_PROGRESS) {
1713 /* do partial cleanup of sk_async_wait_queue */
1714 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1715 (dma_async_is_complete(skb->dma_cookie, done,
1716 used) == DMA_SUCCESS)) {
1717 __skb_dequeue(&sk->sk_async_wait_queue);
1718 kfree_skb(skb);
1719 }
1720 }
1721 1750
1722 /* Safe to free early-copied skbs now */
1723 __skb_queue_purge(&sk->sk_async_wait_queue);
1724 tp->ucopy.dma_chan = NULL;
1725 }
1726 if (tp->ucopy.pinned_list) { 1751 if (tp->ucopy.pinned_list) {
1727 dma_unpin_iovec_pages(tp->ucopy.pinned_list); 1752 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
1728 tp->ucopy.pinned_list = NULL; 1753 tp->ucopy.pinned_list = NULL;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 6428b342b164..0ec9bd0ae94f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -10,6 +10,7 @@
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/gfp.h>
13#include <net/tcp.h> 14#include <net/tcp.h>
14 15
15int sysctl_tcp_max_ssthresh = 0; 16int sysctl_tcp_max_ssthresh = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 788851ca8c5d..ae3ec15fb630 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -62,6 +62,7 @@
62 */ 62 */
63 63
64#include <linux/mm.h> 64#include <linux/mm.h>
65#include <linux/slab.h>
65#include <linux/module.h> 66#include <linux/module.h>
66#include <linux/sysctl.h> 67#include <linux/sysctl.h>
67#include <linux/kernel.h> 68#include <linux/kernel.h>
@@ -2511,6 +2512,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2511 int err; 2512 int err;
2512 unsigned int mss; 2513 unsigned int mss;
2513 2514
2515 if (packets == 0)
2516 return;
2517
2514 WARN_ON(packets > tp->packets_out); 2518 WARN_ON(packets > tp->packets_out);
2515 if (tp->lost_skb_hint) { 2519 if (tp->lost_skb_hint) {
2516 skb = tp->lost_skb_hint; 2520 skb = tp->lost_skb_hint;
@@ -3706,7 +3710,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3706 } 3710 }
3707 3711
3708 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) 3712 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3709 dst_confirm(sk->sk_dst_cache); 3713 dst_confirm(__sk_dst_get(sk));
3710 3714
3711 return 1; 3715 return 1;
3712 3716
@@ -4315,7 +4319,7 @@ static void tcp_ofo_queue(struct sock *sk)
4315 } 4319 }
4316 4320
4317 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { 4321 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4318 SOCK_DEBUG(sk, "ofo packet was already received \n"); 4322 SOCK_DEBUG(sk, "ofo packet was already received\n");
4319 __skb_unlink(skb, &tp->out_of_order_queue); 4323 __skb_unlink(skb, &tp->out_of_order_queue);
4320 __kfree_skb(skb); 4324 __kfree_skb(skb);
4321 continue; 4325 continue;
@@ -5829,7 +5833,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5829 if (tp->snd_una == tp->write_seq) { 5833 if (tp->snd_una == tp->write_seq) {
5830 tcp_set_state(sk, TCP_FIN_WAIT2); 5834 tcp_set_state(sk, TCP_FIN_WAIT2);
5831 sk->sk_shutdown |= SEND_SHUTDOWN; 5835 sk->sk_shutdown |= SEND_SHUTDOWN;
5832 dst_confirm(sk->sk_dst_cache); 5836 dst_confirm(__sk_dst_get(sk));
5833 5837
5834 if (!sock_flag(sk, SOCK_DEAD)) 5838 if (!sock_flag(sk, SOCK_DEAD))
5835 /* Wake up lingering close() */ 5839 /* Wake up lingering close() */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 70df40980a87..ad08392a738c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -60,6 +60,7 @@
60#include <linux/jhash.h> 60#include <linux/jhash.h>
61#include <linux/init.h> 61#include <linux/init.h>
62#include <linux/times.h> 62#include <linux/times.h>
63#include <linux/slab.h>
63 64
64#include <net/net_namespace.h> 65#include <net/net_namespace.h>
65#include <net/icmp.h> 66#include <net/icmp.h>
@@ -370,6 +371,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
370 if (sk->sk_state == TCP_CLOSE) 371 if (sk->sk_state == TCP_CLOSE)
371 goto out; 372 goto out;
372 373
374 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
375 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 goto out;
377 }
378
373 icsk = inet_csk(sk); 379 icsk = inet_csk(sk);
374 tp = tcp_sk(sk); 380 tp = tcp_sk(sk);
375 seq = ntohl(th->seq); 381 seq = ntohl(th->seq);
@@ -513,26 +519,31 @@ out:
513 sock_put(sk); 519 sock_put(sk);
514} 520}
515 521
516/* This routine computes an IPv4 TCP checksum. */ 522static void __tcp_v4_send_check(struct sk_buff *skb,
517void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) 523 __be32 saddr, __be32 daddr)
518{ 524{
519 struct inet_sock *inet = inet_sk(sk);
520 struct tcphdr *th = tcp_hdr(skb); 525 struct tcphdr *th = tcp_hdr(skb);
521 526
522 if (skb->ip_summed == CHECKSUM_PARTIAL) { 527 if (skb->ip_summed == CHECKSUM_PARTIAL) {
523 th->check = ~tcp_v4_check(len, inet->inet_saddr, 528 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
524 inet->inet_daddr, 0);
525 skb->csum_start = skb_transport_header(skb) - skb->head; 529 skb->csum_start = skb_transport_header(skb) - skb->head;
526 skb->csum_offset = offsetof(struct tcphdr, check); 530 skb->csum_offset = offsetof(struct tcphdr, check);
527 } else { 531 } else {
528 th->check = tcp_v4_check(len, inet->inet_saddr, 532 th->check = tcp_v4_check(skb->len, saddr, daddr,
529 inet->inet_daddr,
530 csum_partial(th, 533 csum_partial(th,
531 th->doff << 2, 534 th->doff << 2,
532 skb->csum)); 535 skb->csum));
533 } 536 }
534} 537}
535 538
539/* This routine computes an IPv4 TCP checksum. */
540void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
541{
542 struct inet_sock *inet = inet_sk(sk);
543
544 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
545}
546
536int tcp_v4_gso_send_check(struct sk_buff *skb) 547int tcp_v4_gso_send_check(struct sk_buff *skb)
537{ 548{
538 const struct iphdr *iph; 549 const struct iphdr *iph;
@@ -545,10 +556,8 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
545 th = tcp_hdr(skb); 556 th = tcp_hdr(skb);
546 557
547 th->check = 0; 558 th->check = 0;
548 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
549 skb->csum_start = skb_transport_header(skb) - skb->head;
550 skb->csum_offset = offsetof(struct tcphdr, check);
551 skb->ip_summed = CHECKSUM_PARTIAL; 559 skb->ip_summed = CHECKSUM_PARTIAL;
560 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
552 return 0; 561 return 0;
553} 562}
554 563
@@ -757,13 +766,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
757 skb = tcp_make_synack(sk, dst, req, rvp); 766 skb = tcp_make_synack(sk, dst, req, rvp);
758 767
759 if (skb) { 768 if (skb) {
760 struct tcphdr *th = tcp_hdr(skb); 769 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
761
762 th->check = tcp_v4_check(skb->len,
763 ireq->loc_addr,
764 ireq->rmt_addr,
765 csum_partial(th, skb->len,
766 skb->csum));
767 770
768 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 771 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
769 ireq->rmt_addr, 772 ireq->rmt_addr,
@@ -1669,6 +1672,8 @@ process:
1669 1672
1670 skb->dev = NULL; 1673 skb->dev = NULL;
1671 1674
1675 inet_rps_save_rxhash(sk, skb->rxhash);
1676
1672 bh_lock_sock_nested(sk); 1677 bh_lock_sock_nested(sk);
1673 ret = 0; 1678 ret = 0;
1674 if (!sock_owned_by_user(sk)) { 1679 if (!sock_owned_by_user(sk)) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4199bc6915c5..794c2e122a41 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -20,6 +20,7 @@
20 20
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/slab.h>
23#include <linux/sysctl.h> 24#include <linux/sysctl.h>
24#include <linux/workqueue.h> 25#include <linux/workqueue.h>
25#include <net/tcp.h> 26#include <net/tcp.h>
@@ -671,6 +672,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
671 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 672 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
672 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 673 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
673 inet_rsk(req)->acked = 1; 674 inet_rsk(req)->acked = 1;
675 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
674 return NULL; 676 return NULL;
675 } 677 }
676 678
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f181b78f2385..2b7d71fb8439 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -37,6 +37,7 @@
37#include <net/tcp.h> 37#include <net/tcp.h>
38 38
39#include <linux/compiler.h> 39#include <linux/compiler.h>
40#include <linux/gfp.h>
40#include <linux/module.h> 41#include <linux/module.h>
41 42
42/* People can turn this off for buggy TCP's found in printers etc. */ 43/* People can turn this off for buggy TCP's found in printers etc. */
@@ -349,6 +350,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
349 */ 350 */
350static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) 351static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
351{ 352{
353 skb->ip_summed = CHECKSUM_PARTIAL;
352 skb->csum = 0; 354 skb->csum = 0;
353 355
354 TCP_SKB_CB(skb)->flags = flags; 356 TCP_SKB_CB(skb)->flags = flags;
@@ -877,7 +879,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
877 } 879 }
878#endif 880#endif
879 881
880 icsk->icsk_af_ops->send_check(sk, skb->len, skb); 882 icsk->icsk_af_ops->send_check(sk, skb);
881 883
882 if (likely(tcb->flags & TCPCB_FLAG_ACK)) 884 if (likely(tcb->flags & TCPCB_FLAG_ACK))
883 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 885 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
@@ -888,7 +890,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
888 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) 890 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
889 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); 891 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
890 892
891 err = icsk->icsk_af_ops->queue_xmit(skb, 0); 893 err = icsk->icsk_af_ops->queue_xmit(skb);
892 if (likely(err <= 0)) 894 if (likely(err <= 0))
893 return err; 895 return err;
894 896
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 9bc805df95d2..f8efada580e8 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -22,6 +22,7 @@
22#include <linux/kprobes.h> 22#include <linux/kprobes.h>
23#include <linux/socket.h> 23#include <linux/socket.h>
24#include <linux/tcp.h> 24#include <linux/tcp.h>
25#include <linux/slab.h>
25#include <linux/proc_fs.h> 26#include <linux/proc_fs.h>
26#include <linux/module.h> 27#include <linux/module.h>
27#include <linux/ktime.h> 28#include <linux/ktime.h>
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b2e6bbccaee1..c732be00606b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -19,6 +19,7 @@
19 */ 19 */
20 20
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/gfp.h>
22#include <net/tcp.h> 23#include <net/tcp.h>
23 24
24int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; 25int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
@@ -171,14 +172,14 @@ static int tcp_write_timeout(struct sock *sk)
171 172
172 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 173 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
173 if (icsk->icsk_retransmits) 174 if (icsk->icsk_retransmits)
174 dst_negative_advice(&sk->sk_dst_cache, sk); 175 dst_negative_advice(sk);
175 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 176 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
176 } else { 177 } else {
177 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { 178 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
178 /* Black hole detection */ 179 /* Black hole detection */
179 tcp_mtu_probing(icsk, sk); 180 tcp_mtu_probing(icsk, sk);
180 181
181 dst_negative_advice(&sk->sk_dst_cache, sk); 182 dst_negative_advice(sk);
182 } 183 }
183 184
184 retry_until = sysctl_tcp_retries2; 185 retry_until = sysctl_tcp_retries2;
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 3959e0ca456a..3b3813cc80b9 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -8,6 +8,7 @@
8#include <linux/mutex.h> 8#include <linux/mutex.h>
9#include <linux/netdevice.h> 9#include <linux/netdevice.h>
10#include <linux/skbuff.h> 10#include <linux/skbuff.h>
11#include <linux/slab.h>
11#include <net/icmp.h> 12#include <net/icmp.h>
12#include <net/ip.h> 13#include <net/ip.h>
13#include <net/protocol.h> 14#include <net/protocol.h>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7af756d0f931..666b963496ff 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -95,6 +95,7 @@
95#include <linux/mm.h> 95#include <linux/mm.h>
96#include <linux/inet.h> 96#include <linux/inet.h>
97#include <linux/netdevice.h> 97#include <linux/netdevice.h>
98#include <linux/slab.h>
98#include <net/tcp_states.h> 99#include <net/tcp_states.h>
99#include <linux/skbuff.h> 100#include <linux/skbuff.h>
100#include <linux/proc_fs.h> 101#include <linux/proc_fs.h>
@@ -471,8 +472,8 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
471 if (hslot->count < hslot2->count) 472 if (hslot->count < hslot2->count)
472 goto begin; 473 goto begin;
473 474
474 result = udp4_lib_lookup2(net, INADDR_ANY, sport, 475 result = udp4_lib_lookup2(net, saddr, sport,
475 daddr, hnum, dif, 476 INADDR_ANY, hnum, dif,
476 hslot2, slot2); 477 hslot2, slot2);
477 } 478 }
478 rcu_read_unlock(); 479 rcu_read_unlock();
@@ -1216,6 +1217,7 @@ int udp_disconnect(struct sock *sk, int flags)
1216 sk->sk_state = TCP_CLOSE; 1217 sk->sk_state = TCP_CLOSE;
1217 inet->inet_daddr = 0; 1218 inet->inet_daddr = 0;
1218 inet->inet_dport = 0; 1219 inet->inet_dport = 0;
1220 inet_rps_save_rxhash(sk, 0);
1219 sk->sk_bound_dev_if = 0; 1221 sk->sk_bound_dev_if = 0;
1220 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 1222 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
1221 inet_reset_saddr(sk); 1223 inet_reset_saddr(sk);
@@ -1257,8 +1259,12 @@ EXPORT_SYMBOL(udp_lib_unhash);
1257 1259
1258static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1260static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1259{ 1261{
1260 int rc = sock_queue_rcv_skb(sk, skb); 1262 int rc;
1263
1264 if (inet_sk(sk)->inet_daddr)
1265 inet_rps_save_rxhash(sk, skb->rxhash);
1261 1266
1267 rc = sock_queue_rcv_skb(sk, skb);
1262 if (rc < 0) { 1268 if (rc < 0) {
1263 int is_udplite = IS_UDPLITE(sk); 1269 int is_udplite = IS_UDPLITE(sk);
1264 1270
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index c3969e0f96c3..abcd7ed65db1 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -9,6 +9,7 @@
9 * 9 *
10 */ 10 */
11 11
12#include <linux/slab.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/string.h> 14#include <linux/string.h>
14#include <linux/netfilter.h> 15#include <linux/netfilter.h>
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 3444f3b34eca..6f368413eb0e 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -4,6 +4,7 @@
4 * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> 4 * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
5 */ 5 */
6 6
7#include <linux/gfp.h>
7#include <linux/init.h> 8#include <linux/init.h>
8#include <linux/kernel.h> 9#include <linux/kernel.h>
9#include <linux/module.h> 10#include <linux/module.h>
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e4a1483fba77..1705476670ef 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -59,27 +59,6 @@ static int xfrm4_get_saddr(struct net *net,
59 return 0; 59 return 0;
60} 60}
61 61
62static struct dst_entry *
63__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
64{
65 struct dst_entry *dst;
66
67 read_lock_bh(&policy->lock);
68 for (dst = policy->bundles; dst; dst = dst->next) {
69 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
70 if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/
71 xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
72 xdst->u.rt.fl.fl4_src == fl->fl4_src &&
73 xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
74 xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
75 dst_clone(dst);
76 break;
77 }
78 }
79 read_unlock_bh(&policy->lock);
80 return dst;
81}
82
83static int xfrm4_get_tos(struct flowi *fl) 62static int xfrm4_get_tos(struct flowi *fl)
84{ 63{
85 return fl->fl4_tos; 64 return fl->fl4_tos;
@@ -259,7 +238,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
259 .dst_ops = &xfrm4_dst_ops, 238 .dst_ops = &xfrm4_dst_ops,
260 .dst_lookup = xfrm4_dst_lookup, 239 .dst_lookup = xfrm4_dst_lookup,
261 .get_saddr = xfrm4_get_saddr, 240 .get_saddr = xfrm4_get_saddr,
262 .find_bundle = __xfrm4_find_bundle,
263 .decode_session = _decode_session4, 241 .decode_session = _decode_session4,
264 .get_tos = xfrm4_get_tos, 242 .get_tos = xfrm4_get_tos,
265 .init_path = xfrm4_init_path, 243 .init_path = xfrm4_init_path,