aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig22
-rw-r--r--net/ipv4/af_inet.c49
-rw-r--r--net/ipv4/devinet.c4
-rw-r--r--net/ipv4/fib_rules.c22
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_sockglue.c20
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipmr.c912
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c4
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c29
-rw-r--r--net/ipv4/tcp.c19
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv4/tcp_ipv4.c37
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv4/tcp_output.c14
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv4/udp.c31
-rw-r--r--net/ipv4/xfrm4_policy.c22
23 files changed, 815 insertions, 408 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 0c94a1ac2946..8e3a1fd938ab 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -250,6 +250,20 @@ config IP_MROUTE
250 <file:Documentation/networking/multicast.txt>. If you haven't heard 250 <file:Documentation/networking/multicast.txt>. If you haven't heard
251 about it, you don't need it. 251 about it, you don't need it.
252 252
253config IP_MROUTE_MULTIPLE_TABLES
254 bool "IP: multicast policy routing"
255 depends on IP_MROUTE && IP_ADVANCED_ROUTER
256 select FIB_RULES
257 help
258 Normally, a multicast router runs a userspace daemon and decides
259 what to do with a multicast packet based on the source and
260 destination addresses. If you say Y here, the multicast router
261 will also be able to take interfaces and packet marks into
262 account and run multiple instances of userspace daemons
263 simultaneously, each one handling a single table.
264
265 If unsure, say N.
266
253config IP_PIMSM_V1 267config IP_PIMSM_V1
254 bool "IP: PIM-SM version 1 support" 268 bool "IP: PIM-SM version 1 support"
255 depends on IP_MROUTE 269 depends on IP_MROUTE
@@ -587,9 +601,15 @@ choice
587 config DEFAULT_HTCP 601 config DEFAULT_HTCP
588 bool "Htcp" if TCP_CONG_HTCP=y 602 bool "Htcp" if TCP_CONG_HTCP=y
589 603
604 config DEFAULT_HYBLA
605 bool "Hybla" if TCP_CONG_HYBLA=y
606
590 config DEFAULT_VEGAS 607 config DEFAULT_VEGAS
591 bool "Vegas" if TCP_CONG_VEGAS=y 608 bool "Vegas" if TCP_CONG_VEGAS=y
592 609
610 config DEFAULT_VENO
611 bool "Veno" if TCP_CONG_VENO=y
612
593 config DEFAULT_WESTWOOD 613 config DEFAULT_WESTWOOD
594 bool "Westwood" if TCP_CONG_WESTWOOD=y 614 bool "Westwood" if TCP_CONG_WESTWOOD=y
595 615
@@ -610,8 +630,10 @@ config DEFAULT_TCP_CONG
610 default "bic" if DEFAULT_BIC 630 default "bic" if DEFAULT_BIC
611 default "cubic" if DEFAULT_CUBIC 631 default "cubic" if DEFAULT_CUBIC
612 default "htcp" if DEFAULT_HTCP 632 default "htcp" if DEFAULT_HTCP
633 default "hybla" if DEFAULT_HYBLA
613 default "vegas" if DEFAULT_VEGAS 634 default "vegas" if DEFAULT_VEGAS
614 default "westwood" if DEFAULT_WESTWOOD 635 default "westwood" if DEFAULT_WESTWOOD
636 default "veno" if DEFAULT_VENO
615 default "reno" if DEFAULT_RENO 637 default "reno" if DEFAULT_RENO
616 default "cubic" 638 default "cubic"
617 639
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f71357422380..c6c43bcd1c6f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -154,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
154 WARN_ON(sk->sk_forward_alloc); 154 WARN_ON(sk->sk_forward_alloc);
155 155
156 kfree(inet->opt); 156 kfree(inet->opt);
157 dst_release(sk->sk_dst_cache); 157 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
158 sk_refcnt_debug_dec(sk); 158 sk_refcnt_debug_dec(sk);
159} 159}
160EXPORT_SYMBOL(inet_sock_destruct); 160EXPORT_SYMBOL(inet_sock_destruct);
@@ -419,6 +419,8 @@ int inet_release(struct socket *sock)
419 if (sk) { 419 if (sk) {
420 long timeout; 420 long timeout;
421 421
422 sock_rps_reset_flow(sk);
423
422 /* Applications forget to leave groups before exiting */ 424 /* Applications forget to leave groups before exiting */
423 ip_mc_drop_socket(sk); 425 ip_mc_drop_socket(sk);
424 426
@@ -546,7 +548,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
546{ 548{
547 DEFINE_WAIT(wait); 549 DEFINE_WAIT(wait);
548 550
549 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 551 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
550 552
551 /* Basic assumption: if someone sets sk->sk_err, he _must_ 553 /* Basic assumption: if someone sets sk->sk_err, he _must_
552 * change state of the socket from TCP_SYN_*. 554 * change state of the socket from TCP_SYN_*.
@@ -559,9 +561,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
559 lock_sock(sk); 561 lock_sock(sk);
560 if (signal_pending(current) || !timeo) 562 if (signal_pending(current) || !timeo)
561 break; 563 break;
562 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 564 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
563 } 565 }
564 finish_wait(sk->sk_sleep, &wait); 566 finish_wait(sk_sleep(sk), &wait);
565 return timeo; 567 return timeo;
566} 568}
567 569
@@ -720,6 +722,8 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
720{ 722{
721 struct sock *sk = sock->sk; 723 struct sock *sk = sock->sk;
722 724
725 sock_rps_record_flow(sk);
726
723 /* We may need to bind the socket. */ 727 /* We may need to bind the socket. */
724 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 728 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
725 return -EAGAIN; 729 return -EAGAIN;
@@ -728,12 +732,13 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
728} 732}
729EXPORT_SYMBOL(inet_sendmsg); 733EXPORT_SYMBOL(inet_sendmsg);
730 734
731
732static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, 735static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
733 size_t size, int flags) 736 size_t size, int flags)
734{ 737{
735 struct sock *sk = sock->sk; 738 struct sock *sk = sock->sk;
736 739
740 sock_rps_record_flow(sk);
741
737 /* We may need to bind the socket. */ 742 /* We may need to bind the socket. */
738 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 743 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
739 return -EAGAIN; 744 return -EAGAIN;
@@ -743,6 +748,22 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
743 return sock_no_sendpage(sock, page, offset, size, flags); 748 return sock_no_sendpage(sock, page, offset, size, flags);
744} 749}
745 750
751int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
752 size_t size, int flags)
753{
754 struct sock *sk = sock->sk;
755 int addr_len = 0;
756 int err;
757
758 sock_rps_record_flow(sk);
759
760 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
761 flags & ~MSG_DONTWAIT, &addr_len);
762 if (err >= 0)
763 msg->msg_namelen = addr_len;
764 return err;
765}
766EXPORT_SYMBOL(inet_recvmsg);
746 767
747int inet_shutdown(struct socket *sock, int how) 768int inet_shutdown(struct socket *sock, int how)
748{ 769{
@@ -872,7 +893,7 @@ const struct proto_ops inet_stream_ops = {
872 .setsockopt = sock_common_setsockopt, 893 .setsockopt = sock_common_setsockopt,
873 .getsockopt = sock_common_getsockopt, 894 .getsockopt = sock_common_getsockopt,
874 .sendmsg = tcp_sendmsg, 895 .sendmsg = tcp_sendmsg,
875 .recvmsg = sock_common_recvmsg, 896 .recvmsg = inet_recvmsg,
876 .mmap = sock_no_mmap, 897 .mmap = sock_no_mmap,
877 .sendpage = tcp_sendpage, 898 .sendpage = tcp_sendpage,
878 .splice_read = tcp_splice_read, 899 .splice_read = tcp_splice_read,
@@ -899,7 +920,7 @@ const struct proto_ops inet_dgram_ops = {
899 .setsockopt = sock_common_setsockopt, 920 .setsockopt = sock_common_setsockopt,
900 .getsockopt = sock_common_getsockopt, 921 .getsockopt = sock_common_getsockopt,
901 .sendmsg = inet_sendmsg, 922 .sendmsg = inet_sendmsg,
902 .recvmsg = sock_common_recvmsg, 923 .recvmsg = inet_recvmsg,
903 .mmap = sock_no_mmap, 924 .mmap = sock_no_mmap,
904 .sendpage = inet_sendpage, 925 .sendpage = inet_sendpage,
905#ifdef CONFIG_COMPAT 926#ifdef CONFIG_COMPAT
@@ -929,7 +950,7 @@ static const struct proto_ops inet_sockraw_ops = {
929 .setsockopt = sock_common_setsockopt, 950 .setsockopt = sock_common_setsockopt,
930 .getsockopt = sock_common_getsockopt, 951 .getsockopt = sock_common_getsockopt,
931 .sendmsg = inet_sendmsg, 952 .sendmsg = inet_sendmsg,
932 .recvmsg = sock_common_recvmsg, 953 .recvmsg = inet_recvmsg,
933 .mmap = sock_no_mmap, 954 .mmap = sock_no_mmap,
934 .sendpage = inet_sendpage, 955 .sendpage = inet_sendpage,
935#ifdef CONFIG_COMPAT 956#ifdef CONFIG_COMPAT
@@ -1302,8 +1323,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1302 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 1323 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
1303 goto out_unlock; 1324 goto out_unlock;
1304 1325
1305 id = ntohl(*(u32 *)&iph->id); 1326 id = ntohl(*(__be32 *)&iph->id);
1306 flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); 1327 flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
1307 id >>= 16; 1328 id >>= 16;
1308 1329
1309 for (p = *head; p; p = p->next) { 1330 for (p = *head; p; p = p->next) {
@@ -1316,8 +1337,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1316 1337
1317 if ((iph->protocol ^ iph2->protocol) | 1338 if ((iph->protocol ^ iph2->protocol) |
1318 (iph->tos ^ iph2->tos) | 1339 (iph->tos ^ iph2->tos) |
1319 (iph->saddr ^ iph2->saddr) | 1340 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
1320 (iph->daddr ^ iph2->daddr)) { 1341 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
1321 NAPI_GRO_CB(p)->same_flow = 0; 1342 NAPI_GRO_CB(p)->same_flow = 0;
1322 continue; 1343 continue;
1323 } 1344 }
@@ -1407,10 +1428,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field);
1407int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) 1428int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
1408{ 1429{
1409 BUG_ON(ptr == NULL); 1430 BUG_ON(ptr == NULL);
1410 ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); 1431 ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
1411 if (!ptr[0]) 1432 if (!ptr[0])
1412 goto err0; 1433 goto err0;
1413 ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); 1434 ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
1414 if (!ptr[1]) 1435 if (!ptr[1])
1415 goto err1; 1436 goto err1;
1416 return 0; 1437 return 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 90e3d6379a42..382bc768ed56 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1096,10 +1096,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1096 case NETDEV_DOWN: 1096 case NETDEV_DOWN:
1097 ip_mc_down(in_dev); 1097 ip_mc_down(in_dev);
1098 break; 1098 break;
1099 case NETDEV_BONDING_OLDTYPE: 1099 case NETDEV_PRE_TYPE_CHANGE:
1100 ip_mc_unmap(in_dev); 1100 ip_mc_unmap(in_dev);
1101 break; 1101 break;
1102 case NETDEV_BONDING_NEWTYPE: 1102 case NETDEV_POST_TYPE_CHANGE:
1103 ip_mc_remap(in_dev); 1103 ip_mc_remap(in_dev);
1104 break; 1104 break;
1105 case NETDEV_CHANGEMTU: 1105 case NETDEV_CHANGEMTU:
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index ca2d07b1c706..76daeb5ff564 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -213,7 +213,6 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
213{ 213{
214 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 214 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
215 215
216 frh->family = AF_INET;
217 frh->dst_len = rule4->dst_len; 216 frh->dst_len = rule4->dst_len;
218 frh->src_len = rule4->src_len; 217 frh->src_len = rule4->src_len;
219 frh->tos = rule4->tos; 218 frh->tos = rule4->tos;
@@ -234,23 +233,6 @@ nla_put_failure:
234 return -ENOBUFS; 233 return -ENOBUFS;
235} 234}
236 235
237static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
238{
239 struct list_head *pos;
240 struct fib_rule *rule;
241
242 if (!list_empty(&ops->rules_list)) {
243 pos = ops->rules_list.next;
244 if (pos->next != &ops->rules_list) {
245 rule = list_entry(pos->next, struct fib_rule, list);
246 if (rule->pref)
247 return rule->pref - 1;
248 }
249 }
250
251 return 0;
252}
253
254static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) 236static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
255{ 237{
256 return nla_total_size(4) /* dst */ 238 return nla_total_size(4) /* dst */
@@ -263,7 +245,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
263 rt_cache_flush(ops->fro_net, -1); 245 rt_cache_flush(ops->fro_net, -1);
264} 246}
265 247
266static struct fib_rules_ops fib4_rules_ops_template = { 248static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
267 .family = AF_INET, 249 .family = AF_INET,
268 .rule_size = sizeof(struct fib4_rule), 250 .rule_size = sizeof(struct fib4_rule),
269 .addr_size = sizeof(u32), 251 .addr_size = sizeof(u32),
@@ -272,7 +254,7 @@ static struct fib_rules_ops fib4_rules_ops_template = {
272 .configure = fib4_rule_configure, 254 .configure = fib4_rule_configure,
273 .compare = fib4_rule_compare, 255 .compare = fib4_rule_compare,
274 .fill = fib4_rule_fill, 256 .fill = fib4_rule_fill,
275 .default_pref = fib4_rule_default_pref, 257 .default_pref = fib_default_rule_pref,
276 .nlmsg_payload = fib4_rule_nlmsg_payload, 258 .nlmsg_payload = fib4_rule_nlmsg_payload,
277 .flush_cache = fib4_rule_flush_cache, 259 .flush_cache = fib4_rule_flush_cache,
278 .nlgroup = RTNLGRP_IPV4_RULE, 260 .nlgroup = RTNLGRP_IPV4_RULE,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ac4dec132735..f3d339f728b0 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -331,9 +331,10 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
331 if (ip_append_data(sk, icmp_glue_bits, icmp_param, 331 if (ip_append_data(sk, icmp_glue_bits, icmp_param,
332 icmp_param->data_len+icmp_param->head_len, 332 icmp_param->data_len+icmp_param->head_len,
333 icmp_param->head_len, 333 icmp_param->head_len,
334 ipc, rt, MSG_DONTWAIT) < 0) 334 ipc, rt, MSG_DONTWAIT) < 0) {
335 ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
335 ip_flush_pending_frames(sk); 336 ip_flush_pending_frames(sk);
336 else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 337 } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
337 struct icmphdr *icmph = icmp_hdr(skb); 338 struct icmphdr *icmph = icmp_hdr(skb);
338 __wsum csum = 0; 339 __wsum csum = 0;
339 struct sk_buff *skb1; 340 struct sk_buff *skb1;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 15d3eeda92f5..5fff865a4fa7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -998,7 +998,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
998 --ANK 998 --ANK
999 */ 999 */
1000 if (arp_mc_map(addr, buf, dev, 0) == 0) 1000 if (arp_mc_map(addr, buf, dev, 0) == 0)
1001 dev_mc_add(dev, buf, dev->addr_len, 0); 1001 dev_mc_add(dev, buf);
1002} 1002}
1003 1003
1004/* 1004/*
@@ -1011,7 +1011,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
1011 struct net_device *dev = in_dev->dev; 1011 struct net_device *dev = in_dev->dev;
1012 1012
1013 if (arp_mc_map(addr, buf, dev, 0) == 0) 1013 if (arp_mc_map(addr, buf, dev, 0) == 0)
1014 dev_mc_delete(dev, buf, dev->addr_len, 0); 1014 dev_mc_del(dev, buf);
1015} 1015}
1016 1016
1017#ifdef CONFIG_IP_MULTICAST 1017#ifdef CONFIG_IP_MULTICAST
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8da6429269dd..e0a3e3537b14 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -234,7 +234,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
234 * having to remove and re-insert us on the wait queue. 234 * having to remove and re-insert us on the wait queue.
235 */ 235 */
236 for (;;) { 236 for (;;) {
237 prepare_to_wait_exclusive(sk->sk_sleep, &wait, 237 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
238 TASK_INTERRUPTIBLE); 238 TASK_INTERRUPTIBLE);
239 release_sock(sk); 239 release_sock(sk);
240 if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 240 if (reqsk_queue_empty(&icsk->icsk_accept_queue))
@@ -253,7 +253,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
253 if (!timeo) 253 if (!timeo)
254 break; 254 break;
255 } 255 }
256 finish_wait(sk->sk_sleep, &wait); 256 finish_wait(sk_sleep(sk), &wait);
257 return err; 257 return err;
258} 258}
259 259
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d1bcc9f21d4f..f0392191740b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -311,7 +311,7 @@ int ip_output(struct sk_buff *skb)
311 !(IPCB(skb)->flags & IPSKB_REROUTED)); 311 !(IPCB(skb)->flags & IPSKB_REROUTED));
312} 312}
313 313
314int ip_queue_xmit(struct sk_buff *skb, int ipfragok) 314int ip_queue_xmit(struct sk_buff *skb)
315{ 315{
316 struct sock *sk = skb->sk; 316 struct sock *sk = skb->sk;
317 struct inet_sock *inet = inet_sk(sk); 317 struct inet_sock *inet = inet_sk(sk);
@@ -370,7 +370,7 @@ packet_routed:
370 skb_reset_network_header(skb); 370 skb_reset_network_header(skb);
371 iph = ip_hdr(skb); 371 iph = ip_hdr(skb);
372 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 372 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
373 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) 373 if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df)
374 iph->frag_off = htons(IP_DF); 374 iph->frag_off = htons(IP_DF);
375 else 375 else
376 iph->frag_off = 0; 376 iph->frag_off = 0;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 1e64dabbd232..ce231780a2b1 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -287,12 +287,8 @@ int ip_ra_control(struct sock *sk, unsigned char on,
287void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 287void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
288 __be16 port, u32 info, u8 *payload) 288 __be16 port, u32 info, u8 *payload)
289{ 289{
290 struct inet_sock *inet = inet_sk(sk);
291 struct sock_exterr_skb *serr; 290 struct sock_exterr_skb *serr;
292 291
293 if (!inet->recverr)
294 return;
295
296 skb = skb_clone(skb, GFP_ATOMIC); 292 skb = skb_clone(skb, GFP_ATOMIC);
297 if (!skb) 293 if (!skb)
298 return; 294 return;
@@ -958,6 +954,22 @@ e_inval:
958 return -EINVAL; 954 return -EINVAL;
959} 955}
960 956
957/**
958 * ip_queue_rcv_skb - Queue an skb into sock receive queue
959 * @sk: socket
960 * @skb: buffer
961 *
962 * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
963 * is not set, we drop skb dst entry now, while dst cache line is hot.
964 */
965int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
966{
967 if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
968 skb_dst_drop(skb);
969 return sock_queue_rcv_skb(sk, skb);
970}
971EXPORT_SYMBOL(ip_queue_rcv_skb);
972
961int ip_setsockopt(struct sock *sk, int level, 973int ip_setsockopt(struct sock *sk, int level,
962 int optname, char __user *optval, unsigned int optlen) 974 int optname, char __user *optval, unsigned int optlen)
963{ 975{
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 067ce9e043dc..b9d84e800cf4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -976,7 +976,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
976 /* Is it a reply for the device we are configuring? */ 976 /* Is it a reply for the device we are configuring? */
977 if (b->xid != ic_dev_xid) { 977 if (b->xid != ic_dev_xid) {
978 if (net_ratelimit()) 978 if (net_ratelimit())
979 printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n"); 979 printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet\n");
980 goto drop_unlock; 980 goto drop_unlock;
981 } 981 }
982 982
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d4f6d1340a4..eddfd12f55b8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -63,11 +63,40 @@
63#include <net/ipip.h> 63#include <net/ipip.h>
64#include <net/checksum.h> 64#include <net/checksum.h>
65#include <net/netlink.h> 65#include <net/netlink.h>
66#include <net/fib_rules.h>
66 67
67#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68#define CONFIG_IP_PIMSM 1 69#define CONFIG_IP_PIMSM 1
69#endif 70#endif
70 71
72struct mr_table {
73 struct list_head list;
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
77 u32 id;
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
71/* Big lock, protecting vif table, mrt cache and mroute socket state. 100/* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock. 101 Note that the changes are semaphored via rtnl_lock.
73 */ 102 */
@@ -78,9 +107,7 @@ static DEFINE_RWLOCK(mrt_lock);
78 * Multicast router control variables 107 * Multicast router control variables
79 */ 108 */
80 109
81#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL) 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
82
83static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
84 111
85/* Special spinlock for queue of unresolved entries */ 112/* Special spinlock for queue of unresolved entries */
86static DEFINE_SPINLOCK(mfc_unres_lock); 113static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -95,12 +122,215 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
95 122
96static struct kmem_cache *mrt_cachep __read_mostly; 123static struct kmem_cache *mrt_cachep __read_mostly;
97 124
98static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
99static int ipmr_cache_report(struct net *net, 126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
100 struct sk_buff *pkt, vifi_t vifi, int assert); 130 struct sk_buff *pkt, vifi_t vifi, int assert);
101static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); 131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
169
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
102 193
103static struct timer_list ipmr_expire_timer; 194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
218
219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220 .family = RTNL_FAMILY_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271 kfree(mrt);
272 fib_rules_unregister(net->ipv4.mr_rules_ops);
273}
274#else
275#define ipmr_for_each_table(mrt, net) \
276 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277
278static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279{
280 return net->ipv4.mrt;
281}
282
283static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284 struct mr_table **mrt)
285{
286 *mrt = net->ipv4.mrt;
287 return 0;
288}
289
290static int __net_init ipmr_rules_init(struct net *net)
291{
292 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293 return net->ipv4.mrt ? 0 : -ENOMEM;
294}
295
296static void __net_exit ipmr_rules_exit(struct net *net)
297{
298 kfree(net->ipv4.mrt);
299}
300#endif
301
302static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303{
304 struct mr_table *mrt;
305 unsigned int i;
306
307 mrt = ipmr_get_table(net, id);
308 if (mrt != NULL)
309 return mrt;
310
311 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312 if (mrt == NULL)
313 return NULL;
314 write_pnet(&mrt->net, net);
315 mrt->id = id;
316
317 /* Forwarding cache */
318 for (i = 0; i < MFC_LINES; i++)
319 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320
321 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322
323 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324 (unsigned long)mrt);
325
326#ifdef CONFIG_IP_PIMSM
327 mrt->mroute_reg_vif_num = -1;
328#endif
329#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331#endif
332 return mrt;
333}
104 334
105/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 335/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
106 336
@@ -201,12 +431,22 @@ failure:
201static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 431static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
202{ 432{
203 struct net *net = dev_net(dev); 433 struct net *net = dev_net(dev);
434 struct mr_table *mrt;
435 struct flowi fl = {
436 .oif = dev->ifindex,
437 .iif = skb->skb_iif,
438 .mark = skb->mark,
439 };
440 int err;
441
442 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0)
444 return err;
204 445
205 read_lock(&mrt_lock); 446 read_lock(&mrt_lock);
206 dev->stats.tx_bytes += skb->len; 447 dev->stats.tx_bytes += skb->len;
207 dev->stats.tx_packets++; 448 dev->stats.tx_packets++;
208 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num, 449 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
209 IGMPMSG_WHOLEPKT);
210 read_unlock(&mrt_lock); 450 read_unlock(&mrt_lock);
211 kfree_skb(skb); 451 kfree_skb(skb);
212 return NETDEV_TX_OK; 452 return NETDEV_TX_OK;
@@ -226,12 +466,18 @@ static void reg_vif_setup(struct net_device *dev)
226 dev->features |= NETIF_F_NETNS_LOCAL; 466 dev->features |= NETIF_F_NETNS_LOCAL;
227} 467}
228 468
229static struct net_device *ipmr_reg_vif(struct net *net) 469static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
230{ 470{
231 struct net_device *dev; 471 struct net_device *dev;
232 struct in_device *in_dev; 472 struct in_device *in_dev;
473 char name[IFNAMSIZ];
233 474
234 dev = alloc_netdev(0, "pimreg", reg_vif_setup); 475 if (mrt->id == RT_TABLE_DEFAULT)
476 sprintf(name, "pimreg");
477 else
478 sprintf(name, "pimreg%u", mrt->id);
479
480 dev = alloc_netdev(0, name, reg_vif_setup);
235 481
236 if (dev == NULL) 482 if (dev == NULL)
237 return NULL; 483 return NULL;
@@ -276,17 +522,17 @@ failure:
276 * @notify: Set to 1, if the caller is a notifier_call 522 * @notify: Set to 1, if the caller is a notifier_call
277 */ 523 */
278 524
279static int vif_delete(struct net *net, int vifi, int notify, 525static int vif_delete(struct mr_table *mrt, int vifi, int notify,
280 struct list_head *head) 526 struct list_head *head)
281{ 527{
282 struct vif_device *v; 528 struct vif_device *v;
283 struct net_device *dev; 529 struct net_device *dev;
284 struct in_device *in_dev; 530 struct in_device *in_dev;
285 531
286 if (vifi < 0 || vifi >= net->ipv4.maxvif) 532 if (vifi < 0 || vifi >= mrt->maxvif)
287 return -EADDRNOTAVAIL; 533 return -EADDRNOTAVAIL;
288 534
289 v = &net->ipv4.vif_table[vifi]; 535 v = &mrt->vif_table[vifi];
290 536
291 write_lock_bh(&mrt_lock); 537 write_lock_bh(&mrt_lock);
292 dev = v->dev; 538 dev = v->dev;
@@ -298,17 +544,17 @@ static int vif_delete(struct net *net, int vifi, int notify,
298 } 544 }
299 545
300#ifdef CONFIG_IP_PIMSM 546#ifdef CONFIG_IP_PIMSM
301 if (vifi == net->ipv4.mroute_reg_vif_num) 547 if (vifi == mrt->mroute_reg_vif_num)
302 net->ipv4.mroute_reg_vif_num = -1; 548 mrt->mroute_reg_vif_num = -1;
303#endif 549#endif
304 550
305 if (vifi+1 == net->ipv4.maxvif) { 551 if (vifi+1 == mrt->maxvif) {
306 int tmp; 552 int tmp;
307 for (tmp=vifi-1; tmp>=0; tmp--) { 553 for (tmp=vifi-1; tmp>=0; tmp--) {
308 if (VIF_EXISTS(net, tmp)) 554 if (VIF_EXISTS(mrt, tmp))
309 break; 555 break;
310 } 556 }
311 net->ipv4.maxvif = tmp+1; 557 mrt->maxvif = tmp+1;
312 } 558 }
313 559
314 write_unlock_bh(&mrt_lock); 560 write_unlock_bh(&mrt_lock);
@@ -329,7 +575,6 @@ static int vif_delete(struct net *net, int vifi, int notify,
329 575
330static inline void ipmr_cache_free(struct mfc_cache *c) 576static inline void ipmr_cache_free(struct mfc_cache *c)
331{ 577{
332 release_net(mfc_net(c));
333 kmem_cache_free(mrt_cachep, c); 578 kmem_cache_free(mrt_cachep, c);
334} 579}
335 580
@@ -337,13 +582,13 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
337 and reporting error to netlink readers. 582 and reporting error to netlink readers.
338 */ 583 */
339 584
340static void ipmr_destroy_unres(struct mfc_cache *c) 585static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
341{ 586{
587 struct net *net = read_pnet(&mrt->net);
342 struct sk_buff *skb; 588 struct sk_buff *skb;
343 struct nlmsgerr *e; 589 struct nlmsgerr *e;
344 struct net *net = mfc_net(c);
345 590
346 atomic_dec(&net->ipv4.cache_resolve_queue_len); 591 atomic_dec(&mrt->cache_resolve_queue_len);
347 592
348 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 593 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
349 if (ip_hdr(skb)->version == 0) { 594 if (ip_hdr(skb)->version == 0) {
@@ -364,42 +609,40 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
364} 609}
365 610
366 611
367/* Single timer process for all the unresolved queue. */ 612/* Timer process for the unresolved queue. */
368 613
369static void ipmr_expire_process(unsigned long dummy) 614static void ipmr_expire_process(unsigned long arg)
370{ 615{
616 struct mr_table *mrt = (struct mr_table *)arg;
371 unsigned long now; 617 unsigned long now;
372 unsigned long expires; 618 unsigned long expires;
373 struct mfc_cache *c, **cp; 619 struct mfc_cache *c, *next;
374 620
375 if (!spin_trylock(&mfc_unres_lock)) { 621 if (!spin_trylock(&mfc_unres_lock)) {
376 mod_timer(&ipmr_expire_timer, jiffies+HZ/10); 622 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
377 return; 623 return;
378 } 624 }
379 625
380 if (mfc_unres_queue == NULL) 626 if (list_empty(&mrt->mfc_unres_queue))
381 goto out; 627 goto out;
382 628
383 now = jiffies; 629 now = jiffies;
384 expires = 10*HZ; 630 expires = 10*HZ;
385 cp = &mfc_unres_queue;
386 631
387 while ((c=*cp) != NULL) { 632 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
388 if (time_after(c->mfc_un.unres.expires, now)) { 633 if (time_after(c->mfc_un.unres.expires, now)) {
389 unsigned long interval = c->mfc_un.unres.expires - now; 634 unsigned long interval = c->mfc_un.unres.expires - now;
390 if (interval < expires) 635 if (interval < expires)
391 expires = interval; 636 expires = interval;
392 cp = &c->next;
393 continue; 637 continue;
394 } 638 }
395 639
396 *cp = c->next; 640 list_del(&c->list);
397 641 ipmr_destroy_unres(mrt, c);
398 ipmr_destroy_unres(c);
399 } 642 }
400 643
401 if (mfc_unres_queue != NULL) 644 if (!list_empty(&mrt->mfc_unres_queue))
402 mod_timer(&ipmr_expire_timer, jiffies + expires); 645 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
403 646
404out: 647out:
405 spin_unlock(&mfc_unres_lock); 648 spin_unlock(&mfc_unres_lock);
@@ -407,17 +650,17 @@ out:
407 650
408/* Fill oifs list. It is called under write locked mrt_lock. */ 651/* Fill oifs list. It is called under write locked mrt_lock. */
409 652
410static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) 653static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
654 unsigned char *ttls)
411{ 655{
412 int vifi; 656 int vifi;
413 struct net *net = mfc_net(cache);
414 657
415 cache->mfc_un.res.minvif = MAXVIFS; 658 cache->mfc_un.res.minvif = MAXVIFS;
416 cache->mfc_un.res.maxvif = 0; 659 cache->mfc_un.res.maxvif = 0;
417 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 660 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
418 661
419 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) { 662 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
420 if (VIF_EXISTS(net, vifi) && 663 if (VIF_EXISTS(mrt, vifi) &&
421 ttls[vifi] && ttls[vifi] < 255) { 664 ttls[vifi] && ttls[vifi] < 255) {
422 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 665 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
423 if (cache->mfc_un.res.minvif > vifi) 666 if (cache->mfc_un.res.minvif > vifi)
@@ -428,16 +671,17 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
428 } 671 }
429} 672}
430 673
431static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) 674static int vif_add(struct net *net, struct mr_table *mrt,
675 struct vifctl *vifc, int mrtsock)
432{ 676{
433 int vifi = vifc->vifc_vifi; 677 int vifi = vifc->vifc_vifi;
434 struct vif_device *v = &net->ipv4.vif_table[vifi]; 678 struct vif_device *v = &mrt->vif_table[vifi];
435 struct net_device *dev; 679 struct net_device *dev;
436 struct in_device *in_dev; 680 struct in_device *in_dev;
437 int err; 681 int err;
438 682
439 /* Is vif busy ? */ 683 /* Is vif busy ? */
440 if (VIF_EXISTS(net, vifi)) 684 if (VIF_EXISTS(mrt, vifi))
441 return -EADDRINUSE; 685 return -EADDRINUSE;
442 686
443 switch (vifc->vifc_flags) { 687 switch (vifc->vifc_flags) {
@@ -447,9 +691,9 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
447 * Special Purpose VIF in PIM 691 * Special Purpose VIF in PIM
448 * All the packets will be sent to the daemon 692 * All the packets will be sent to the daemon
449 */ 693 */
450 if (net->ipv4.mroute_reg_vif_num >= 0) 694 if (mrt->mroute_reg_vif_num >= 0)
451 return -EADDRINUSE; 695 return -EADDRINUSE;
452 dev = ipmr_reg_vif(net); 696 dev = ipmr_reg_vif(net, mrt);
453 if (!dev) 697 if (!dev)
454 return -ENOBUFS; 698 return -ENOBUFS;
455 err = dev_set_allmulti(dev, 1); 699 err = dev_set_allmulti(dev, 1);
@@ -525,49 +769,47 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
525 v->dev = dev; 769 v->dev = dev;
526#ifdef CONFIG_IP_PIMSM 770#ifdef CONFIG_IP_PIMSM
527 if (v->flags&VIFF_REGISTER) 771 if (v->flags&VIFF_REGISTER)
528 net->ipv4.mroute_reg_vif_num = vifi; 772 mrt->mroute_reg_vif_num = vifi;
529#endif 773#endif
530 if (vifi+1 > net->ipv4.maxvif) 774 if (vifi+1 > mrt->maxvif)
531 net->ipv4.maxvif = vifi+1; 775 mrt->maxvif = vifi+1;
532 write_unlock_bh(&mrt_lock); 776 write_unlock_bh(&mrt_lock);
533 return 0; 777 return 0;
534} 778}
535 779
536static struct mfc_cache *ipmr_cache_find(struct net *net, 780static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
537 __be32 origin, 781 __be32 origin,
538 __be32 mcastgrp) 782 __be32 mcastgrp)
539{ 783{
540 int line = MFC_HASH(mcastgrp, origin); 784 int line = MFC_HASH(mcastgrp, origin);
541 struct mfc_cache *c; 785 struct mfc_cache *c;
542 786
543 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) { 787 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
544 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) 788 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
545 break; 789 return c;
546 } 790 }
547 return c; 791 return NULL;
548} 792}
549 793
550/* 794/*
551 * Allocate a multicast cache entry 795 * Allocate a multicast cache entry
552 */ 796 */
553static struct mfc_cache *ipmr_cache_alloc(struct net *net) 797static struct mfc_cache *ipmr_cache_alloc(void)
554{ 798{
555 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 799 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
556 if (c == NULL) 800 if (c == NULL)
557 return NULL; 801 return NULL;
558 c->mfc_un.res.minvif = MAXVIFS; 802 c->mfc_un.res.minvif = MAXVIFS;
559 mfc_net_set(c, net);
560 return c; 803 return c;
561} 804}
562 805
563static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net) 806static struct mfc_cache *ipmr_cache_alloc_unres(void)
564{ 807{
565 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 808 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
566 if (c == NULL) 809 if (c == NULL)
567 return NULL; 810 return NULL;
568 skb_queue_head_init(&c->mfc_un.unres.unresolved); 811 skb_queue_head_init(&c->mfc_un.unres.unresolved);
569 c->mfc_un.unres.expires = jiffies + 10*HZ; 812 c->mfc_un.unres.expires = jiffies + 10*HZ;
570 mfc_net_set(c, net);
571 return c; 813 return c;
572} 814}
573 815
@@ -575,7 +817,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
575 * A cache entry has gone into a resolved state from queued 817 * A cache entry has gone into a resolved state from queued
576 */ 818 */
577 819
578static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) 820static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821 struct mfc_cache *uc, struct mfc_cache *c)
579{ 822{
580 struct sk_buff *skb; 823 struct sk_buff *skb;
581 struct nlmsgerr *e; 824 struct nlmsgerr *e;
@@ -588,7 +831,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
588 if (ip_hdr(skb)->version == 0) { 831 if (ip_hdr(skb)->version == 0) {
589 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 832 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
590 833
591 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 834 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
592 nlh->nlmsg_len = (skb_tail_pointer(skb) - 835 nlh->nlmsg_len = (skb_tail_pointer(skb) -
593 (u8 *)nlh); 836 (u8 *)nlh);
594 } else { 837 } else {
@@ -600,9 +843,9 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
600 memset(&e->msg, 0, sizeof(e->msg)); 843 memset(&e->msg, 0, sizeof(e->msg));
601 } 844 }
602 845
603 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid); 846 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
604 } else 847 } else
605 ip_mr_forward(skb, c, 0); 848 ip_mr_forward(net, mrt, skb, c, 0);
606 } 849 }
607} 850}
608 851
@@ -613,7 +856,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
613 * Called under mrt_lock. 856 * Called under mrt_lock.
614 */ 857 */
615 858
616static int ipmr_cache_report(struct net *net, 859static int ipmr_cache_report(struct mr_table *mrt,
617 struct sk_buff *pkt, vifi_t vifi, int assert) 860 struct sk_buff *pkt, vifi_t vifi, int assert)
618{ 861{
619 struct sk_buff *skb; 862 struct sk_buff *skb;
@@ -646,7 +889,7 @@ static int ipmr_cache_report(struct net *net,
646 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 889 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
647 msg->im_msgtype = IGMPMSG_WHOLEPKT; 890 msg->im_msgtype = IGMPMSG_WHOLEPKT;
648 msg->im_mbz = 0; 891 msg->im_mbz = 0;
649 msg->im_vif = net->ipv4.mroute_reg_vif_num; 892 msg->im_vif = mrt->mroute_reg_vif_num;
650 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 893 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
651 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 894 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
652 sizeof(struct iphdr)); 895 sizeof(struct iphdr));
@@ -678,7 +921,7 @@ static int ipmr_cache_report(struct net *net,
678 skb->transport_header = skb->network_header; 921 skb->transport_header = skb->network_header;
679 } 922 }
680 923
681 if (net->ipv4.mroute_sk == NULL) { 924 if (mrt->mroute_sk == NULL) {
682 kfree_skb(skb); 925 kfree_skb(skb);
683 return -EINVAL; 926 return -EINVAL;
684 } 927 }
@@ -686,7 +929,7 @@ static int ipmr_cache_report(struct net *net,
686 /* 929 /*
687 * Deliver to mrouted 930 * Deliver to mrouted
688 */ 931 */
689 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb); 932 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
690 if (ret < 0) { 933 if (ret < 0) {
691 if (net_ratelimit()) 934 if (net_ratelimit())
692 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 935 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -701,27 +944,29 @@ static int ipmr_cache_report(struct net *net,
701 */ 944 */
702 945
703static int 946static int
704ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) 947ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
705{ 948{
949 bool found = false;
706 int err; 950 int err;
707 struct mfc_cache *c; 951 struct mfc_cache *c;
708 const struct iphdr *iph = ip_hdr(skb); 952 const struct iphdr *iph = ip_hdr(skb);
709 953
710 spin_lock_bh(&mfc_unres_lock); 954 spin_lock_bh(&mfc_unres_lock);
711 for (c=mfc_unres_queue; c; c=c->next) { 955 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
712 if (net_eq(mfc_net(c), net) && 956 if (c->mfc_mcastgrp == iph->daddr &&
713 c->mfc_mcastgrp == iph->daddr && 957 c->mfc_origin == iph->saddr) {
714 c->mfc_origin == iph->saddr) 958 found = true;
715 break; 959 break;
960 }
716 } 961 }
717 962
718 if (c == NULL) { 963 if (!found) {
719 /* 964 /*
720 * Create a new entry if allowable 965 * Create a new entry if allowable
721 */ 966 */
722 967
723 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 || 968 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
724 (c = ipmr_cache_alloc_unres(net)) == NULL) { 969 (c = ipmr_cache_alloc_unres()) == NULL) {
725 spin_unlock_bh(&mfc_unres_lock); 970 spin_unlock_bh(&mfc_unres_lock);
726 971
727 kfree_skb(skb); 972 kfree_skb(skb);
@@ -738,7 +983,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
738 /* 983 /*
739 * Reflect first query at mrouted. 984 * Reflect first query at mrouted.
740 */ 985 */
741 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE); 986 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
742 if (err < 0) { 987 if (err < 0) {
743 /* If the report failed throw the cache entry 988 /* If the report failed throw the cache entry
744 out - Brad Parker 989 out - Brad Parker
@@ -750,11 +995,10 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
750 return err; 995 return err;
751 } 996 }
752 997
753 atomic_inc(&net->ipv4.cache_resolve_queue_len); 998 atomic_inc(&mrt->cache_resolve_queue_len);
754 c->next = mfc_unres_queue; 999 list_add(&c->list, &mrt->mfc_unres_queue);
755 mfc_unres_queue = c;
756 1000
757 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); 1001 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
758 } 1002 }
759 1003
760 /* 1004 /*
@@ -776,19 +1020,18 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
776 * MFC cache manipulation by user space mroute daemon 1020 * MFC cache manipulation by user space mroute daemon
777 */ 1021 */
778 1022
779static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc) 1023static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
780{ 1024{
781 int line; 1025 int line;
782 struct mfc_cache *c, **cp; 1026 struct mfc_cache *c, *next;
783 1027
784 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1028 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
785 1029
786 for (cp = &net->ipv4.mfc_cache_array[line]; 1030 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
787 (c = *cp) != NULL; cp = &c->next) {
788 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1031 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
789 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1032 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
790 write_lock_bh(&mrt_lock); 1033 write_lock_bh(&mrt_lock);
791 *cp = c->next; 1034 list_del(&c->list);
792 write_unlock_bh(&mrt_lock); 1035 write_unlock_bh(&mrt_lock);
793 1036
794 ipmr_cache_free(c); 1037 ipmr_cache_free(c);
@@ -798,27 +1041,30 @@ static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
798 return -ENOENT; 1041 return -ENOENT;
799} 1042}
800 1043
801static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) 1044static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1045 struct mfcctl *mfc, int mrtsock)
802{ 1046{
1047 bool found = false;
803 int line; 1048 int line;
804 struct mfc_cache *uc, *c, **cp; 1049 struct mfc_cache *uc, *c;
805 1050
806 if (mfc->mfcc_parent >= MAXVIFS) 1051 if (mfc->mfcc_parent >= MAXVIFS)
807 return -ENFILE; 1052 return -ENFILE;
808 1053
809 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1054 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
810 1055
811 for (cp = &net->ipv4.mfc_cache_array[line]; 1056 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
812 (c = *cp) != NULL; cp = &c->next) {
813 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1057 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
814 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) 1058 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1059 found = true;
815 break; 1060 break;
1061 }
816 } 1062 }
817 1063
818 if (c != NULL) { 1064 if (found) {
819 write_lock_bh(&mrt_lock); 1065 write_lock_bh(&mrt_lock);
820 c->mfc_parent = mfc->mfcc_parent; 1066 c->mfc_parent = mfc->mfcc_parent;
821 ipmr_update_thresholds(c, mfc->mfcc_ttls); 1067 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
822 if (!mrtsock) 1068 if (!mrtsock)
823 c->mfc_flags |= MFC_STATIC; 1069 c->mfc_flags |= MFC_STATIC;
824 write_unlock_bh(&mrt_lock); 1070 write_unlock_bh(&mrt_lock);
@@ -828,43 +1074,42 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
828 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1074 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
829 return -EINVAL; 1075 return -EINVAL;
830 1076
831 c = ipmr_cache_alloc(net); 1077 c = ipmr_cache_alloc();
832 if (c == NULL) 1078 if (c == NULL)
833 return -ENOMEM; 1079 return -ENOMEM;
834 1080
835 c->mfc_origin = mfc->mfcc_origin.s_addr; 1081 c->mfc_origin = mfc->mfcc_origin.s_addr;
836 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1082 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
837 c->mfc_parent = mfc->mfcc_parent; 1083 c->mfc_parent = mfc->mfcc_parent;
838 ipmr_update_thresholds(c, mfc->mfcc_ttls); 1084 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
839 if (!mrtsock) 1085 if (!mrtsock)
840 c->mfc_flags |= MFC_STATIC; 1086 c->mfc_flags |= MFC_STATIC;
841 1087
842 write_lock_bh(&mrt_lock); 1088 write_lock_bh(&mrt_lock);
843 c->next = net->ipv4.mfc_cache_array[line]; 1089 list_add(&c->list, &mrt->mfc_cache_array[line]);
844 net->ipv4.mfc_cache_array[line] = c;
845 write_unlock_bh(&mrt_lock); 1090 write_unlock_bh(&mrt_lock);
846 1091
847 /* 1092 /*
848 * Check to see if we resolved a queued list. If so we 1093 * Check to see if we resolved a queued list. If so we
849 * need to send on the frames and tidy up. 1094 * need to send on the frames and tidy up.
850 */ 1095 */
1096 found = false;
851 spin_lock_bh(&mfc_unres_lock); 1097 spin_lock_bh(&mfc_unres_lock);
852 for (cp = &mfc_unres_queue; (uc=*cp) != NULL; 1098 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
853 cp = &uc->next) { 1099 if (uc->mfc_origin == c->mfc_origin &&
854 if (net_eq(mfc_net(uc), net) &&
855 uc->mfc_origin == c->mfc_origin &&
856 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1100 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
857 *cp = uc->next; 1101 list_del(&uc->list);
858 atomic_dec(&net->ipv4.cache_resolve_queue_len); 1102 atomic_dec(&mrt->cache_resolve_queue_len);
1103 found = true;
859 break; 1104 break;
860 } 1105 }
861 } 1106 }
862 if (mfc_unres_queue == NULL) 1107 if (list_empty(&mrt->mfc_unres_queue))
863 del_timer(&ipmr_expire_timer); 1108 del_timer(&mrt->ipmr_expire_timer);
864 spin_unlock_bh(&mfc_unres_lock); 1109 spin_unlock_bh(&mfc_unres_lock);
865 1110
866 if (uc) { 1111 if (found) {
867 ipmr_cache_resolve(uc, c); 1112 ipmr_cache_resolve(net, mrt, uc, c);
868 ipmr_cache_free(uc); 1113 ipmr_cache_free(uc);
869 } 1114 }
870 return 0; 1115 return 0;
@@ -874,53 +1119,41 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
874 * Close the multicast socket, and clear the vif tables etc 1119 * Close the multicast socket, and clear the vif tables etc
875 */ 1120 */
876 1121
877static void mroute_clean_tables(struct net *net) 1122static void mroute_clean_tables(struct mr_table *mrt)
878{ 1123{
879 int i; 1124 int i;
880 LIST_HEAD(list); 1125 LIST_HEAD(list);
1126 struct mfc_cache *c, *next;
881 1127
882 /* 1128 /*
883 * Shut down all active vif entries 1129 * Shut down all active vif entries
884 */ 1130 */
885 for (i = 0; i < net->ipv4.maxvif; i++) { 1131 for (i = 0; i < mrt->maxvif; i++) {
886 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) 1132 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
887 vif_delete(net, i, 0, &list); 1133 vif_delete(mrt, i, 0, &list);
888 } 1134 }
889 unregister_netdevice_many(&list); 1135 unregister_netdevice_many(&list);
890 1136
891 /* 1137 /*
892 * Wipe the cache 1138 * Wipe the cache
893 */ 1139 */
894 for (i=0; i<MFC_LINES; i++) { 1140 for (i = 0; i < MFC_LINES; i++) {
895 struct mfc_cache *c, **cp; 1141 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
896 1142 if (c->mfc_flags&MFC_STATIC)
897 cp = &net->ipv4.mfc_cache_array[i];
898 while ((c = *cp) != NULL) {
899 if (c->mfc_flags&MFC_STATIC) {
900 cp = &c->next;
901 continue; 1143 continue;
902 }
903 write_lock_bh(&mrt_lock); 1144 write_lock_bh(&mrt_lock);
904 *cp = c->next; 1145 list_del(&c->list);
905 write_unlock_bh(&mrt_lock); 1146 write_unlock_bh(&mrt_lock);
906 1147
907 ipmr_cache_free(c); 1148 ipmr_cache_free(c);
908 } 1149 }
909 } 1150 }
910 1151
911 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) { 1152 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
912 struct mfc_cache *c, **cp;
913
914 spin_lock_bh(&mfc_unres_lock); 1153 spin_lock_bh(&mfc_unres_lock);
915 cp = &mfc_unres_queue; 1154 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
916 while ((c = *cp) != NULL) { 1155 list_del(&c->list);
917 if (!net_eq(mfc_net(c), net)) { 1156 ipmr_destroy_unres(mrt, c);
918 cp = &c->next;
919 continue;
920 }
921 *cp = c->next;
922
923 ipmr_destroy_unres(c);
924 } 1157 }
925 spin_unlock_bh(&mfc_unres_lock); 1158 spin_unlock_bh(&mfc_unres_lock);
926 } 1159 }
@@ -929,16 +1162,19 @@ static void mroute_clean_tables(struct net *net)
929static void mrtsock_destruct(struct sock *sk) 1162static void mrtsock_destruct(struct sock *sk)
930{ 1163{
931 struct net *net = sock_net(sk); 1164 struct net *net = sock_net(sk);
1165 struct mr_table *mrt;
932 1166
933 rtnl_lock(); 1167 rtnl_lock();
934 if (sk == net->ipv4.mroute_sk) { 1168 ipmr_for_each_table(mrt, net) {
935 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1169 if (sk == mrt->mroute_sk) {
1170 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
936 1171
937 write_lock_bh(&mrt_lock); 1172 write_lock_bh(&mrt_lock);
938 net->ipv4.mroute_sk = NULL; 1173 mrt->mroute_sk = NULL;
939 write_unlock_bh(&mrt_lock); 1174 write_unlock_bh(&mrt_lock);
940 1175
941 mroute_clean_tables(net); 1176 mroute_clean_tables(mrt);
1177 }
942 } 1178 }
943 rtnl_unlock(); 1179 rtnl_unlock();
944} 1180}
@@ -956,9 +1192,14 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
956 struct vifctl vif; 1192 struct vifctl vif;
957 struct mfcctl mfc; 1193 struct mfcctl mfc;
958 struct net *net = sock_net(sk); 1194 struct net *net = sock_net(sk);
1195 struct mr_table *mrt;
1196
1197 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1198 if (mrt == NULL)
1199 return -ENOENT;
959 1200
960 if (optname != MRT_INIT) { 1201 if (optname != MRT_INIT) {
961 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN)) 1202 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
962 return -EACCES; 1203 return -EACCES;
963 } 1204 }
964 1205
@@ -971,7 +1212,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
971 return -ENOPROTOOPT; 1212 return -ENOPROTOOPT;
972 1213
973 rtnl_lock(); 1214 rtnl_lock();
974 if (net->ipv4.mroute_sk) { 1215 if (mrt->mroute_sk) {
975 rtnl_unlock(); 1216 rtnl_unlock();
976 return -EADDRINUSE; 1217 return -EADDRINUSE;
977 } 1218 }
@@ -979,7 +1220,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
979 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1220 ret = ip_ra_control(sk, 1, mrtsock_destruct);
980 if (ret == 0) { 1221 if (ret == 0) {
981 write_lock_bh(&mrt_lock); 1222 write_lock_bh(&mrt_lock);
982 net->ipv4.mroute_sk = sk; 1223 mrt->mroute_sk = sk;
983 write_unlock_bh(&mrt_lock); 1224 write_unlock_bh(&mrt_lock);
984 1225
985 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1226 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
@@ -987,7 +1228,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
987 rtnl_unlock(); 1228 rtnl_unlock();
988 return ret; 1229 return ret;
989 case MRT_DONE: 1230 case MRT_DONE:
990 if (sk != net->ipv4.mroute_sk) 1231 if (sk != mrt->mroute_sk)
991 return -EACCES; 1232 return -EACCES;
992 return ip_ra_control(sk, 0, NULL); 1233 return ip_ra_control(sk, 0, NULL);
993 case MRT_ADD_VIF: 1234 case MRT_ADD_VIF:
@@ -1000,9 +1241,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1000 return -ENFILE; 1241 return -ENFILE;
1001 rtnl_lock(); 1242 rtnl_lock();
1002 if (optname == MRT_ADD_VIF) { 1243 if (optname == MRT_ADD_VIF) {
1003 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); 1244 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1004 } else { 1245 } else {
1005 ret = vif_delete(net, vif.vifc_vifi, 0, NULL); 1246 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1006 } 1247 }
1007 rtnl_unlock(); 1248 rtnl_unlock();
1008 return ret; 1249 return ret;
@@ -1019,9 +1260,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1019 return -EFAULT; 1260 return -EFAULT;
1020 rtnl_lock(); 1261 rtnl_lock();
1021 if (optname == MRT_DEL_MFC) 1262 if (optname == MRT_DEL_MFC)
1022 ret = ipmr_mfc_delete(net, &mfc); 1263 ret = ipmr_mfc_delete(mrt, &mfc);
1023 else 1264 else
1024 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk); 1265 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1025 rtnl_unlock(); 1266 rtnl_unlock();
1026 return ret; 1267 return ret;
1027 /* 1268 /*
@@ -1032,7 +1273,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1032 int v; 1273 int v;
1033 if (get_user(v,(int __user *)optval)) 1274 if (get_user(v,(int __user *)optval))
1034 return -EFAULT; 1275 return -EFAULT;
1035 net->ipv4.mroute_do_assert = (v) ? 1 : 0; 1276 mrt->mroute_do_assert = (v) ? 1 : 0;
1036 return 0; 1277 return 0;
1037 } 1278 }
1038#ifdef CONFIG_IP_PIMSM 1279#ifdef CONFIG_IP_PIMSM
@@ -1046,14 +1287,35 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1046 1287
1047 rtnl_lock(); 1288 rtnl_lock();
1048 ret = 0; 1289 ret = 0;
1049 if (v != net->ipv4.mroute_do_pim) { 1290 if (v != mrt->mroute_do_pim) {
1050 net->ipv4.mroute_do_pim = v; 1291 mrt->mroute_do_pim = v;
1051 net->ipv4.mroute_do_assert = v; 1292 mrt->mroute_do_assert = v;
1052 } 1293 }
1053 rtnl_unlock(); 1294 rtnl_unlock();
1054 return ret; 1295 return ret;
1055 } 1296 }
1056#endif 1297#endif
1298#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1299 case MRT_TABLE:
1300 {
1301 u32 v;
1302
1303 if (optlen != sizeof(u32))
1304 return -EINVAL;
1305 if (get_user(v, (u32 __user *)optval))
1306 return -EFAULT;
1307 if (sk == mrt->mroute_sk)
1308 return -EBUSY;
1309
1310 rtnl_lock();
1311 ret = 0;
1312 if (!ipmr_new_table(net, v))
1313 ret = -ENOMEM;
1314 raw_sk(sk)->ipmr_table = v;
1315 rtnl_unlock();
1316 return ret;
1317 }
1318#endif
1057 /* 1319 /*
1058 * Spurious command, or MRT_VERSION which you cannot 1320 * Spurious command, or MRT_VERSION which you cannot
1059 * set. 1321 * set.
@@ -1072,6 +1334,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1072 int olr; 1334 int olr;
1073 int val; 1335 int val;
1074 struct net *net = sock_net(sk); 1336 struct net *net = sock_net(sk);
1337 struct mr_table *mrt;
1338
1339 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1340 if (mrt == NULL)
1341 return -ENOENT;
1075 1342
1076 if (optname != MRT_VERSION && 1343 if (optname != MRT_VERSION &&
1077#ifdef CONFIG_IP_PIMSM 1344#ifdef CONFIG_IP_PIMSM
@@ -1093,10 +1360,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1093 val = 0x0305; 1360 val = 0x0305;
1094#ifdef CONFIG_IP_PIMSM 1361#ifdef CONFIG_IP_PIMSM
1095 else if (optname == MRT_PIM) 1362 else if (optname == MRT_PIM)
1096 val = net->ipv4.mroute_do_pim; 1363 val = mrt->mroute_do_pim;
1097#endif 1364#endif
1098 else 1365 else
1099 val = net->ipv4.mroute_do_assert; 1366 val = mrt->mroute_do_assert;
1100 if (copy_to_user(optval, &val, olr)) 1367 if (copy_to_user(optval, &val, olr))
1101 return -EFAULT; 1368 return -EFAULT;
1102 return 0; 1369 return 0;
@@ -1113,16 +1380,21 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1113 struct vif_device *vif; 1380 struct vif_device *vif;
1114 struct mfc_cache *c; 1381 struct mfc_cache *c;
1115 struct net *net = sock_net(sk); 1382 struct net *net = sock_net(sk);
1383 struct mr_table *mrt;
1384
1385 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1386 if (mrt == NULL)
1387 return -ENOENT;
1116 1388
1117 switch (cmd) { 1389 switch (cmd) {
1118 case SIOCGETVIFCNT: 1390 case SIOCGETVIFCNT:
1119 if (copy_from_user(&vr, arg, sizeof(vr))) 1391 if (copy_from_user(&vr, arg, sizeof(vr)))
1120 return -EFAULT; 1392 return -EFAULT;
1121 if (vr.vifi >= net->ipv4.maxvif) 1393 if (vr.vifi >= mrt->maxvif)
1122 return -EINVAL; 1394 return -EINVAL;
1123 read_lock(&mrt_lock); 1395 read_lock(&mrt_lock);
1124 vif = &net->ipv4.vif_table[vr.vifi]; 1396 vif = &mrt->vif_table[vr.vifi];
1125 if (VIF_EXISTS(net, vr.vifi)) { 1397 if (VIF_EXISTS(mrt, vr.vifi)) {
1126 vr.icount = vif->pkt_in; 1398 vr.icount = vif->pkt_in;
1127 vr.ocount = vif->pkt_out; 1399 vr.ocount = vif->pkt_out;
1128 vr.ibytes = vif->bytes_in; 1400 vr.ibytes = vif->bytes_in;
@@ -1140,7 +1412,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1140 return -EFAULT; 1412 return -EFAULT;
1141 1413
1142 read_lock(&mrt_lock); 1414 read_lock(&mrt_lock);
1143 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr); 1415 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1144 if (c) { 1416 if (c) {
1145 sr.pktcnt = c->mfc_un.res.pkt; 1417 sr.pktcnt = c->mfc_un.res.pkt;
1146 sr.bytecnt = c->mfc_un.res.bytes; 1418 sr.bytecnt = c->mfc_un.res.bytes;
@@ -1163,16 +1435,20 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1163{ 1435{
1164 struct net_device *dev = ptr; 1436 struct net_device *dev = ptr;
1165 struct net *net = dev_net(dev); 1437 struct net *net = dev_net(dev);
1438 struct mr_table *mrt;
1166 struct vif_device *v; 1439 struct vif_device *v;
1167 int ct; 1440 int ct;
1168 LIST_HEAD(list); 1441 LIST_HEAD(list);
1169 1442
1170 if (event != NETDEV_UNREGISTER) 1443 if (event != NETDEV_UNREGISTER)
1171 return NOTIFY_DONE; 1444 return NOTIFY_DONE;
1172 v = &net->ipv4.vif_table[0]; 1445
1173 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { 1446 ipmr_for_each_table(mrt, net) {
1174 if (v->dev == dev) 1447 v = &mrt->vif_table[0];
1175 vif_delete(net, ct, 1, &list); 1448 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1449 if (v->dev == dev)
1450 vif_delete(mrt, ct, 1, &list);
1451 }
1176 } 1452 }
1177 unregister_netdevice_many(&list); 1453 unregister_netdevice_many(&list);
1178 return NOTIFY_DONE; 1454 return NOTIFY_DONE;
@@ -1231,11 +1507,11 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
1231 * Processing handlers for ipmr_forward 1507 * Processing handlers for ipmr_forward
1232 */ 1508 */
1233 1509
1234static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) 1510static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1511 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1235{ 1512{
1236 struct net *net = mfc_net(c);
1237 const struct iphdr *iph = ip_hdr(skb); 1513 const struct iphdr *iph = ip_hdr(skb);
1238 struct vif_device *vif = &net->ipv4.vif_table[vifi]; 1514 struct vif_device *vif = &mrt->vif_table[vifi];
1239 struct net_device *dev; 1515 struct net_device *dev;
1240 struct rtable *rt; 1516 struct rtable *rt;
1241 int encap = 0; 1517 int encap = 0;
@@ -1249,7 +1525,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1249 vif->bytes_out += skb->len; 1525 vif->bytes_out += skb->len;
1250 vif->dev->stats.tx_bytes += skb->len; 1526 vif->dev->stats.tx_bytes += skb->len;
1251 vif->dev->stats.tx_packets++; 1527 vif->dev->stats.tx_packets++;
1252 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT); 1528 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1253 goto out_free; 1529 goto out_free;
1254 } 1530 }
1255#endif 1531#endif
@@ -1332,12 +1608,12 @@ out_free:
1332 return; 1608 return;
1333} 1609}
1334 1610
1335static int ipmr_find_vif(struct net_device *dev) 1611static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1336{ 1612{
1337 struct net *net = dev_net(dev);
1338 int ct; 1613 int ct;
1339 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) { 1614
1340 if (net->ipv4.vif_table[ct].dev == dev) 1615 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1616 if (mrt->vif_table[ct].dev == dev)
1341 break; 1617 break;
1342 } 1618 }
1343 return ct; 1619 return ct;
@@ -1345,11 +1621,12 @@ static int ipmr_find_vif(struct net_device *dev)
1345 1621
1346/* "local" means that we should preserve one skb (for local delivery) */ 1622/* "local" means that we should preserve one skb (for local delivery) */
1347 1623
1348static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) 1624static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1625 struct sk_buff *skb, struct mfc_cache *cache,
1626 int local)
1349{ 1627{
1350 int psend = -1; 1628 int psend = -1;
1351 int vif, ct; 1629 int vif, ct;
1352 struct net *net = mfc_net(cache);
1353 1630
1354 vif = cache->mfc_parent; 1631 vif = cache->mfc_parent;
1355 cache->mfc_un.res.pkt++; 1632 cache->mfc_un.res.pkt++;
@@ -1358,7 +1635,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1358 /* 1635 /*
1359 * Wrong interface: drop packet and (maybe) send PIM assert. 1636 * Wrong interface: drop packet and (maybe) send PIM assert.
1360 */ 1637 */
1361 if (net->ipv4.vif_table[vif].dev != skb->dev) { 1638 if (mrt->vif_table[vif].dev != skb->dev) {
1362 int true_vifi; 1639 int true_vifi;
1363 1640
1364 if (skb_rtable(skb)->fl.iif == 0) { 1641 if (skb_rtable(skb)->fl.iif == 0) {
@@ -1377,26 +1654,26 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1377 } 1654 }
1378 1655
1379 cache->mfc_un.res.wrong_if++; 1656 cache->mfc_un.res.wrong_if++;
1380 true_vifi = ipmr_find_vif(skb->dev); 1657 true_vifi = ipmr_find_vif(mrt, skb->dev);
1381 1658
1382 if (true_vifi >= 0 && net->ipv4.mroute_do_assert && 1659 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1383 /* pimsm uses asserts, when switching from RPT to SPT, 1660 /* pimsm uses asserts, when switching from RPT to SPT,
1384 so that we cannot check that packet arrived on an oif. 1661 so that we cannot check that packet arrived on an oif.
1385 It is bad, but otherwise we would need to move pretty 1662 It is bad, but otherwise we would need to move pretty
1386 large chunk of pimd to kernel. Ough... --ANK 1663 large chunk of pimd to kernel. Ough... --ANK
1387 */ 1664 */
1388 (net->ipv4.mroute_do_pim || 1665 (mrt->mroute_do_pim ||
1389 cache->mfc_un.res.ttls[true_vifi] < 255) && 1666 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1390 time_after(jiffies, 1667 time_after(jiffies,
1391 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1668 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1392 cache->mfc_un.res.last_assert = jiffies; 1669 cache->mfc_un.res.last_assert = jiffies;
1393 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF); 1670 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1394 } 1671 }
1395 goto dont_forward; 1672 goto dont_forward;
1396 } 1673 }
1397 1674
1398 net->ipv4.vif_table[vif].pkt_in++; 1675 mrt->vif_table[vif].pkt_in++;
1399 net->ipv4.vif_table[vif].bytes_in += skb->len; 1676 mrt->vif_table[vif].bytes_in += skb->len;
1400 1677
1401 /* 1678 /*
1402 * Forward the frame 1679 * Forward the frame
@@ -1406,7 +1683,8 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1406 if (psend != -1) { 1683 if (psend != -1) {
1407 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1684 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1408 if (skb2) 1685 if (skb2)
1409 ipmr_queue_xmit(skb2, cache, psend); 1686 ipmr_queue_xmit(net, mrt, skb2, cache,
1687 psend);
1410 } 1688 }
1411 psend = ct; 1689 psend = ct;
1412 } 1690 }
@@ -1415,9 +1693,9 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1415 if (local) { 1693 if (local) {
1416 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1694 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1417 if (skb2) 1695 if (skb2)
1418 ipmr_queue_xmit(skb2, cache, psend); 1696 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1419 } else { 1697 } else {
1420 ipmr_queue_xmit(skb, cache, psend); 1698 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1421 return 0; 1699 return 0;
1422 } 1700 }
1423 } 1701 }
@@ -1438,6 +1716,8 @@ int ip_mr_input(struct sk_buff *skb)
1438 struct mfc_cache *cache; 1716 struct mfc_cache *cache;
1439 struct net *net = dev_net(skb->dev); 1717 struct net *net = dev_net(skb->dev);
1440 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 1718 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1719 struct mr_table *mrt;
1720 int err;
1441 1721
1442 /* Packet is looped back after forward, it should not be 1722 /* Packet is looped back after forward, it should not be
1443 forwarded second time, but still can be delivered locally. 1723 forwarded second time, but still can be delivered locally.
@@ -1445,6 +1725,10 @@ int ip_mr_input(struct sk_buff *skb)
1445 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1725 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1446 goto dont_forward; 1726 goto dont_forward;
1447 1727
1728 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1729 if (err < 0)
1730 return err;
1731
1448 if (!local) { 1732 if (!local) {
1449 if (IPCB(skb)->opt.router_alert) { 1733 if (IPCB(skb)->opt.router_alert) {
1450 if (ip_call_ra_chain(skb)) 1734 if (ip_call_ra_chain(skb))
@@ -1457,9 +1741,9 @@ int ip_mr_input(struct sk_buff *skb)
1457 that we can forward NO IGMP messages. 1741 that we can forward NO IGMP messages.
1458 */ 1742 */
1459 read_lock(&mrt_lock); 1743 read_lock(&mrt_lock);
1460 if (net->ipv4.mroute_sk) { 1744 if (mrt->mroute_sk) {
1461 nf_reset(skb); 1745 nf_reset(skb);
1462 raw_rcv(net->ipv4.mroute_sk, skb); 1746 raw_rcv(mrt->mroute_sk, skb);
1463 read_unlock(&mrt_lock); 1747 read_unlock(&mrt_lock);
1464 return 0; 1748 return 0;
1465 } 1749 }
@@ -1468,7 +1752,7 @@ int ip_mr_input(struct sk_buff *skb)
1468 } 1752 }
1469 1753
1470 read_lock(&mrt_lock); 1754 read_lock(&mrt_lock);
1471 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1755 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1472 1756
1473 /* 1757 /*
1474 * No usable cache entry 1758 * No usable cache entry
@@ -1486,19 +1770,19 @@ int ip_mr_input(struct sk_buff *skb)
1486 skb = skb2; 1770 skb = skb2;
1487 } 1771 }
1488 1772
1489 vif = ipmr_find_vif(skb->dev); 1773 vif = ipmr_find_vif(mrt, skb->dev);
1490 if (vif >= 0) { 1774 if (vif >= 0) {
1491 int err = ipmr_cache_unresolved(net, vif, skb); 1775 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1492 read_unlock(&mrt_lock); 1776 read_unlock(&mrt_lock);
1493 1777
1494 return err; 1778 return err2;
1495 } 1779 }
1496 read_unlock(&mrt_lock); 1780 read_unlock(&mrt_lock);
1497 kfree_skb(skb); 1781 kfree_skb(skb);
1498 return -ENODEV; 1782 return -ENODEV;
1499 } 1783 }
1500 1784
1501 ip_mr_forward(skb, cache, local); 1785 ip_mr_forward(net, mrt, skb, cache, local);
1502 1786
1503 read_unlock(&mrt_lock); 1787 read_unlock(&mrt_lock);
1504 1788
@@ -1515,11 +1799,11 @@ dont_forward:
1515} 1799}
1516 1800
1517#ifdef CONFIG_IP_PIMSM 1801#ifdef CONFIG_IP_PIMSM
1518static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) 1802static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1803 unsigned int pimlen)
1519{ 1804{
1520 struct net_device *reg_dev = NULL; 1805 struct net_device *reg_dev = NULL;
1521 struct iphdr *encap; 1806 struct iphdr *encap;
1522 struct net *net = dev_net(skb->dev);
1523 1807
1524 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1808 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1525 /* 1809 /*
@@ -1534,8 +1818,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1534 return 1; 1818 return 1;
1535 1819
1536 read_lock(&mrt_lock); 1820 read_lock(&mrt_lock);
1537 if (net->ipv4.mroute_reg_vif_num >= 0) 1821 if (mrt->mroute_reg_vif_num >= 0)
1538 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev; 1822 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1539 if (reg_dev) 1823 if (reg_dev)
1540 dev_hold(reg_dev); 1824 dev_hold(reg_dev);
1541 read_unlock(&mrt_lock); 1825 read_unlock(&mrt_lock);
@@ -1570,17 +1854,21 @@ int pim_rcv_v1(struct sk_buff * skb)
1570{ 1854{
1571 struct igmphdr *pim; 1855 struct igmphdr *pim;
1572 struct net *net = dev_net(skb->dev); 1856 struct net *net = dev_net(skb->dev);
1857 struct mr_table *mrt;
1573 1858
1574 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1859 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1575 goto drop; 1860 goto drop;
1576 1861
1577 pim = igmp_hdr(skb); 1862 pim = igmp_hdr(skb);
1578 1863
1579 if (!net->ipv4.mroute_do_pim || 1864 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1865 goto drop;
1866
1867 if (!mrt->mroute_do_pim ||
1580 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1868 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1581 goto drop; 1869 goto drop;
1582 1870
1583 if (__pim_rcv(skb, sizeof(*pim))) { 1871 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1584drop: 1872drop:
1585 kfree_skb(skb); 1873 kfree_skb(skb);
1586 } 1874 }
@@ -1592,6 +1880,8 @@ drop:
1592static int pim_rcv(struct sk_buff * skb) 1880static int pim_rcv(struct sk_buff * skb)
1593{ 1881{
1594 struct pimreghdr *pim; 1882 struct pimreghdr *pim;
1883 struct net *net = dev_net(skb->dev);
1884 struct mr_table *mrt;
1595 1885
1596 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1886 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1597 goto drop; 1887 goto drop;
@@ -1603,7 +1893,10 @@ static int pim_rcv(struct sk_buff * skb)
1603 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1893 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1604 goto drop; 1894 goto drop;
1605 1895
1606 if (__pim_rcv(skb, sizeof(*pim))) { 1896 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1897 goto drop;
1898
1899 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1607drop: 1900drop:
1608 kfree_skb(skb); 1901 kfree_skb(skb);
1609 } 1902 }
@@ -1611,12 +1904,11 @@ drop:
1611} 1904}
1612#endif 1905#endif
1613 1906
1614static int 1907static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1615ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) 1908 struct mfc_cache *c, struct rtmsg *rtm)
1616{ 1909{
1617 int ct; 1910 int ct;
1618 struct rtnexthop *nhp; 1911 struct rtnexthop *nhp;
1619 struct net *net = mfc_net(c);
1620 u8 *b = skb_tail_pointer(skb); 1912 u8 *b = skb_tail_pointer(skb);
1621 struct rtattr *mp_head; 1913 struct rtattr *mp_head;
1622 1914
@@ -1624,19 +1916,19 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1624 if (c->mfc_parent > MAXVIFS) 1916 if (c->mfc_parent > MAXVIFS)
1625 return -ENOENT; 1917 return -ENOENT;
1626 1918
1627 if (VIF_EXISTS(net, c->mfc_parent)) 1919 if (VIF_EXISTS(mrt, c->mfc_parent))
1628 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex); 1920 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1629 1921
1630 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 1922 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1631 1923
1632 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1924 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1633 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) { 1925 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1634 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1926 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1635 goto rtattr_failure; 1927 goto rtattr_failure;
1636 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1928 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1637 nhp->rtnh_flags = 0; 1929 nhp->rtnh_flags = 0;
1638 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1930 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1639 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex; 1931 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1640 nhp->rtnh_len = sizeof(*nhp); 1932 nhp->rtnh_len = sizeof(*nhp);
1641 } 1933 }
1642 } 1934 }
@@ -1654,11 +1946,16 @@ int ipmr_get_route(struct net *net,
1654 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1946 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1655{ 1947{
1656 int err; 1948 int err;
1949 struct mr_table *mrt;
1657 struct mfc_cache *cache; 1950 struct mfc_cache *cache;
1658 struct rtable *rt = skb_rtable(skb); 1951 struct rtable *rt = skb_rtable(skb);
1659 1952
1953 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1954 if (mrt == NULL)
1955 return -ENOENT;
1956
1660 read_lock(&mrt_lock); 1957 read_lock(&mrt_lock);
1661 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst); 1958 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1662 1959
1663 if (cache == NULL) { 1960 if (cache == NULL) {
1664 struct sk_buff *skb2; 1961 struct sk_buff *skb2;
@@ -1672,7 +1969,7 @@ int ipmr_get_route(struct net *net,
1672 } 1969 }
1673 1970
1674 dev = skb->dev; 1971 dev = skb->dev;
1675 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { 1972 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1676 read_unlock(&mrt_lock); 1973 read_unlock(&mrt_lock);
1677 return -ENODEV; 1974 return -ENODEV;
1678 } 1975 }
@@ -1689,24 +1986,107 @@ int ipmr_get_route(struct net *net,
1689 iph->saddr = rt->rt_src; 1986 iph->saddr = rt->rt_src;
1690 iph->daddr = rt->rt_dst; 1987 iph->daddr = rt->rt_dst;
1691 iph->version = 0; 1988 iph->version = 0;
1692 err = ipmr_cache_unresolved(net, vif, skb2); 1989 err = ipmr_cache_unresolved(mrt, vif, skb2);
1693 read_unlock(&mrt_lock); 1990 read_unlock(&mrt_lock);
1694 return err; 1991 return err;
1695 } 1992 }
1696 1993
1697 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1994 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1698 cache->mfc_flags |= MFC_NOTIFY; 1995 cache->mfc_flags |= MFC_NOTIFY;
1699 err = ipmr_fill_mroute(skb, cache, rtm); 1996 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1700 read_unlock(&mrt_lock); 1997 read_unlock(&mrt_lock);
1701 return err; 1998 return err;
1702} 1999}
1703 2000
2001static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2002 u32 pid, u32 seq, struct mfc_cache *c)
2003{
2004 struct nlmsghdr *nlh;
2005 struct rtmsg *rtm;
2006
2007 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2008 if (nlh == NULL)
2009 return -EMSGSIZE;
2010
2011 rtm = nlmsg_data(nlh);
2012 rtm->rtm_family = RTNL_FAMILY_IPMR;
2013 rtm->rtm_dst_len = 32;
2014 rtm->rtm_src_len = 32;
2015 rtm->rtm_tos = 0;
2016 rtm->rtm_table = mrt->id;
2017 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2018 rtm->rtm_type = RTN_MULTICAST;
2019 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2020 rtm->rtm_protocol = RTPROT_UNSPEC;
2021 rtm->rtm_flags = 0;
2022
2023 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2024 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2025
2026 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2027 goto nla_put_failure;
2028
2029 return nlmsg_end(skb, nlh);
2030
2031nla_put_failure:
2032 nlmsg_cancel(skb, nlh);
2033 return -EMSGSIZE;
2034}
2035
2036static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2037{
2038 struct net *net = sock_net(skb->sk);
2039 struct mr_table *mrt;
2040 struct mfc_cache *mfc;
2041 unsigned int t = 0, s_t;
2042 unsigned int h = 0, s_h;
2043 unsigned int e = 0, s_e;
2044
2045 s_t = cb->args[0];
2046 s_h = cb->args[1];
2047 s_e = cb->args[2];
2048
2049 read_lock(&mrt_lock);
2050 ipmr_for_each_table(mrt, net) {
2051 if (t < s_t)
2052 goto next_table;
2053 if (t > s_t)
2054 s_h = 0;
2055 for (h = s_h; h < MFC_LINES; h++) {
2056 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2057 if (e < s_e)
2058 goto next_entry;
2059 if (ipmr_fill_mroute(mrt, skb,
2060 NETLINK_CB(cb->skb).pid,
2061 cb->nlh->nlmsg_seq,
2062 mfc) < 0)
2063 goto done;
2064next_entry:
2065 e++;
2066 }
2067 e = s_e = 0;
2068 }
2069 s_h = 0;
2070next_table:
2071 t++;
2072 }
2073done:
2074 read_unlock(&mrt_lock);
2075
2076 cb->args[2] = e;
2077 cb->args[1] = h;
2078 cb->args[0] = t;
2079
2080 return skb->len;
2081}
2082
1704#ifdef CONFIG_PROC_FS 2083#ifdef CONFIG_PROC_FS
1705/* 2084/*
1706 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 2085 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1707 */ 2086 */
1708struct ipmr_vif_iter { 2087struct ipmr_vif_iter {
1709 struct seq_net_private p; 2088 struct seq_net_private p;
2089 struct mr_table *mrt;
1710 int ct; 2090 int ct;
1711}; 2091};
1712 2092
@@ -1714,11 +2094,13 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1714 struct ipmr_vif_iter *iter, 2094 struct ipmr_vif_iter *iter,
1715 loff_t pos) 2095 loff_t pos)
1716{ 2096{
1717 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) { 2097 struct mr_table *mrt = iter->mrt;
1718 if (!VIF_EXISTS(net, iter->ct)) 2098
2099 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2100 if (!VIF_EXISTS(mrt, iter->ct))
1719 continue; 2101 continue;
1720 if (pos-- == 0) 2102 if (pos-- == 0)
1721 return &net->ipv4.vif_table[iter->ct]; 2103 return &mrt->vif_table[iter->ct];
1722 } 2104 }
1723 return NULL; 2105 return NULL;
1724} 2106}
@@ -1726,7 +2108,15 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1726static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2108static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1727 __acquires(mrt_lock) 2109 __acquires(mrt_lock)
1728{ 2110{
2111 struct ipmr_vif_iter *iter = seq->private;
1729 struct net *net = seq_file_net(seq); 2112 struct net *net = seq_file_net(seq);
2113 struct mr_table *mrt;
2114
2115 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2116 if (mrt == NULL)
2117 return ERR_PTR(-ENOENT);
2118
2119 iter->mrt = mrt;
1730 2120
1731 read_lock(&mrt_lock); 2121 read_lock(&mrt_lock);
1732 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2122 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
@@ -1737,15 +2127,16 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1737{ 2127{
1738 struct ipmr_vif_iter *iter = seq->private; 2128 struct ipmr_vif_iter *iter = seq->private;
1739 struct net *net = seq_file_net(seq); 2129 struct net *net = seq_file_net(seq);
2130 struct mr_table *mrt = iter->mrt;
1740 2131
1741 ++*pos; 2132 ++*pos;
1742 if (v == SEQ_START_TOKEN) 2133 if (v == SEQ_START_TOKEN)
1743 return ipmr_vif_seq_idx(net, iter, 0); 2134 return ipmr_vif_seq_idx(net, iter, 0);
1744 2135
1745 while (++iter->ct < net->ipv4.maxvif) { 2136 while (++iter->ct < mrt->maxvif) {
1746 if (!VIF_EXISTS(net, iter->ct)) 2137 if (!VIF_EXISTS(mrt, iter->ct))
1747 continue; 2138 continue;
1748 return &net->ipv4.vif_table[iter->ct]; 2139 return &mrt->vif_table[iter->ct];
1749 } 2140 }
1750 return NULL; 2141 return NULL;
1751} 2142}
@@ -1758,7 +2149,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1758 2149
1759static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2150static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1760{ 2151{
1761 struct net *net = seq_file_net(seq); 2152 struct ipmr_vif_iter *iter = seq->private;
2153 struct mr_table *mrt = iter->mrt;
1762 2154
1763 if (v == SEQ_START_TOKEN) { 2155 if (v == SEQ_START_TOKEN) {
1764 seq_puts(seq, 2156 seq_puts(seq,
@@ -1769,7 +2161,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1769 2161
1770 seq_printf(seq, 2162 seq_printf(seq,
1771 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2163 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1772 vif - net->ipv4.vif_table, 2164 vif - mrt->vif_table,
1773 name, vif->bytes_in, vif->pkt_in, 2165 name, vif->bytes_in, vif->pkt_in,
1774 vif->bytes_out, vif->pkt_out, 2166 vif->bytes_out, vif->pkt_out,
1775 vif->flags, vif->local, vif->remote); 2167 vif->flags, vif->local, vif->remote);
@@ -1800,7 +2192,8 @@ static const struct file_operations ipmr_vif_fops = {
1800 2192
1801struct ipmr_mfc_iter { 2193struct ipmr_mfc_iter {
1802 struct seq_net_private p; 2194 struct seq_net_private p;
1803 struct mfc_cache **cache; 2195 struct mr_table *mrt;
2196 struct list_head *cache;
1804 int ct; 2197 int ct;
1805}; 2198};
1806 2199
@@ -1808,22 +2201,22 @@ struct ipmr_mfc_iter {
1808static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 2201static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1809 struct ipmr_mfc_iter *it, loff_t pos) 2202 struct ipmr_mfc_iter *it, loff_t pos)
1810{ 2203{
2204 struct mr_table *mrt = it->mrt;
1811 struct mfc_cache *mfc; 2205 struct mfc_cache *mfc;
1812 2206
1813 it->cache = net->ipv4.mfc_cache_array;
1814 read_lock(&mrt_lock); 2207 read_lock(&mrt_lock);
1815 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 2208 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
1816 for (mfc = net->ipv4.mfc_cache_array[it->ct]; 2209 it->cache = &mrt->mfc_cache_array[it->ct];
1817 mfc; mfc = mfc->next) 2210 list_for_each_entry(mfc, it->cache, list)
1818 if (pos-- == 0) 2211 if (pos-- == 0)
1819 return mfc; 2212 return mfc;
2213 }
1820 read_unlock(&mrt_lock); 2214 read_unlock(&mrt_lock);
1821 2215
1822 it->cache = &mfc_unres_queue;
1823 spin_lock_bh(&mfc_unres_lock); 2216 spin_lock_bh(&mfc_unres_lock);
1824 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) 2217 it->cache = &mrt->mfc_unres_queue;
1825 if (net_eq(mfc_net(mfc), net) && 2218 list_for_each_entry(mfc, it->cache, list)
1826 pos-- == 0) 2219 if (pos-- == 0)
1827 return mfc; 2220 return mfc;
1828 spin_unlock_bh(&mfc_unres_lock); 2221 spin_unlock_bh(&mfc_unres_lock);
1829 2222
@@ -1836,7 +2229,13 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1836{ 2229{
1837 struct ipmr_mfc_iter *it = seq->private; 2230 struct ipmr_mfc_iter *it = seq->private;
1838 struct net *net = seq_file_net(seq); 2231 struct net *net = seq_file_net(seq);
2232 struct mr_table *mrt;
1839 2233
2234 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2235 if (mrt == NULL)
2236 return ERR_PTR(-ENOENT);
2237
2238 it->mrt = mrt;
1840 it->cache = NULL; 2239 it->cache = NULL;
1841 it->ct = 0; 2240 it->ct = 0;
1842 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 2241 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
@@ -1848,37 +2247,36 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1848 struct mfc_cache *mfc = v; 2247 struct mfc_cache *mfc = v;
1849 struct ipmr_mfc_iter *it = seq->private; 2248 struct ipmr_mfc_iter *it = seq->private;
1850 struct net *net = seq_file_net(seq); 2249 struct net *net = seq_file_net(seq);
2250 struct mr_table *mrt = it->mrt;
1851 2251
1852 ++*pos; 2252 ++*pos;
1853 2253
1854 if (v == SEQ_START_TOKEN) 2254 if (v == SEQ_START_TOKEN)
1855 return ipmr_mfc_seq_idx(net, seq->private, 0); 2255 return ipmr_mfc_seq_idx(net, seq->private, 0);
1856 2256
1857 if (mfc->next) 2257 if (mfc->list.next != it->cache)
1858 return mfc->next; 2258 return list_entry(mfc->list.next, struct mfc_cache, list);
1859 2259
1860 if (it->cache == &mfc_unres_queue) 2260 if (it->cache == &mrt->mfc_unres_queue)
1861 goto end_of_list; 2261 goto end_of_list;
1862 2262
1863 BUG_ON(it->cache != net->ipv4.mfc_cache_array); 2263 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1864 2264
1865 while (++it->ct < MFC_LINES) { 2265 while (++it->ct < MFC_LINES) {
1866 mfc = net->ipv4.mfc_cache_array[it->ct]; 2266 it->cache = &mrt->mfc_cache_array[it->ct];
1867 if (mfc) 2267 if (list_empty(it->cache))
1868 return mfc; 2268 continue;
2269 return list_first_entry(it->cache, struct mfc_cache, list);
1869 } 2270 }
1870 2271
1871 /* exhausted cache_array, show unresolved */ 2272 /* exhausted cache_array, show unresolved */
1872 read_unlock(&mrt_lock); 2273 read_unlock(&mrt_lock);
1873 it->cache = &mfc_unres_queue; 2274 it->cache = &mrt->mfc_unres_queue;
1874 it->ct = 0; 2275 it->ct = 0;
1875 2276
1876 spin_lock_bh(&mfc_unres_lock); 2277 spin_lock_bh(&mfc_unres_lock);
1877 mfc = mfc_unres_queue; 2278 if (!list_empty(it->cache))
1878 while (mfc && !net_eq(mfc_net(mfc), net)) 2279 return list_first_entry(it->cache, struct mfc_cache, list);
1879 mfc = mfc->next;
1880 if (mfc)
1881 return mfc;
1882 2280
1883 end_of_list: 2281 end_of_list:
1884 spin_unlock_bh(&mfc_unres_lock); 2282 spin_unlock_bh(&mfc_unres_lock);
@@ -1890,18 +2288,17 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1890static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 2288static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1891{ 2289{
1892 struct ipmr_mfc_iter *it = seq->private; 2290 struct ipmr_mfc_iter *it = seq->private;
1893 struct net *net = seq_file_net(seq); 2291 struct mr_table *mrt = it->mrt;
1894 2292
1895 if (it->cache == &mfc_unres_queue) 2293 if (it->cache == &mrt->mfc_unres_queue)
1896 spin_unlock_bh(&mfc_unres_lock); 2294 spin_unlock_bh(&mfc_unres_lock);
1897 else if (it->cache == net->ipv4.mfc_cache_array) 2295 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1898 read_unlock(&mrt_lock); 2296 read_unlock(&mrt_lock);
1899} 2297}
1900 2298
1901static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2299static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1902{ 2300{
1903 int n; 2301 int n;
1904 struct net *net = seq_file_net(seq);
1905 2302
1906 if (v == SEQ_START_TOKEN) { 2303 if (v == SEQ_START_TOKEN) {
1907 seq_puts(seq, 2304 seq_puts(seq,
@@ -1909,20 +2306,21 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1909 } else { 2306 } else {
1910 const struct mfc_cache *mfc = v; 2307 const struct mfc_cache *mfc = v;
1911 const struct ipmr_mfc_iter *it = seq->private; 2308 const struct ipmr_mfc_iter *it = seq->private;
2309 const struct mr_table *mrt = it->mrt;
1912 2310
1913 seq_printf(seq, "%08lX %08lX %-3hd", 2311 seq_printf(seq, "%08X %08X %-3hd",
1914 (unsigned long) mfc->mfc_mcastgrp, 2312 (__force u32) mfc->mfc_mcastgrp,
1915 (unsigned long) mfc->mfc_origin, 2313 (__force u32) mfc->mfc_origin,
1916 mfc->mfc_parent); 2314 mfc->mfc_parent);
1917 2315
1918 if (it->cache != &mfc_unres_queue) { 2316 if (it->cache != &mrt->mfc_unres_queue) {
1919 seq_printf(seq, " %8lu %8lu %8lu", 2317 seq_printf(seq, " %8lu %8lu %8lu",
1920 mfc->mfc_un.res.pkt, 2318 mfc->mfc_un.res.pkt,
1921 mfc->mfc_un.res.bytes, 2319 mfc->mfc_un.res.bytes,
1922 mfc->mfc_un.res.wrong_if); 2320 mfc->mfc_un.res.wrong_if);
1923 for (n = mfc->mfc_un.res.minvif; 2321 for (n = mfc->mfc_un.res.minvif;
1924 n < mfc->mfc_un.res.maxvif; n++ ) { 2322 n < mfc->mfc_un.res.maxvif; n++ ) {
1925 if (VIF_EXISTS(net, n) && 2323 if (VIF_EXISTS(mrt, n) &&
1926 mfc->mfc_un.res.ttls[n] < 255) 2324 mfc->mfc_un.res.ttls[n] < 255)
1927 seq_printf(seq, 2325 seq_printf(seq,
1928 " %2d:%-3d", 2326 " %2d:%-3d",
@@ -1974,27 +2372,11 @@ static const struct net_protocol pim_protocol = {
1974 */ 2372 */
1975static int __net_init ipmr_net_init(struct net *net) 2373static int __net_init ipmr_net_init(struct net *net)
1976{ 2374{
1977 int err = 0; 2375 int err;
1978 2376
1979 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device), 2377 err = ipmr_rules_init(net);
1980 GFP_KERNEL); 2378 if (err < 0)
1981 if (!net->ipv4.vif_table) {
1982 err = -ENOMEM;
1983 goto fail; 2379 goto fail;
1984 }
1985
1986 /* Forwarding cache */
1987 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1988 sizeof(struct mfc_cache *),
1989 GFP_KERNEL);
1990 if (!net->ipv4.mfc_cache_array) {
1991 err = -ENOMEM;
1992 goto fail_mfc_cache;
1993 }
1994
1995#ifdef CONFIG_IP_PIMSM
1996 net->ipv4.mroute_reg_vif_num = -1;
1997#endif
1998 2380
1999#ifdef CONFIG_PROC_FS 2381#ifdef CONFIG_PROC_FS
2000 err = -ENOMEM; 2382 err = -ENOMEM;
@@ -2009,10 +2391,8 @@ static int __net_init ipmr_net_init(struct net *net)
2009proc_cache_fail: 2391proc_cache_fail:
2010 proc_net_remove(net, "ip_mr_vif"); 2392 proc_net_remove(net, "ip_mr_vif");
2011proc_vif_fail: 2393proc_vif_fail:
2012 kfree(net->ipv4.mfc_cache_array); 2394 ipmr_rules_exit(net);
2013#endif 2395#endif
2014fail_mfc_cache:
2015 kfree(net->ipv4.vif_table);
2016fail: 2396fail:
2017 return err; 2397 return err;
2018} 2398}
@@ -2023,8 +2403,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
2023 proc_net_remove(net, "ip_mr_cache"); 2403 proc_net_remove(net, "ip_mr_cache");
2024 proc_net_remove(net, "ip_mr_vif"); 2404 proc_net_remove(net, "ip_mr_vif");
2025#endif 2405#endif
2026 kfree(net->ipv4.mfc_cache_array); 2406 ipmr_rules_exit(net);
2027 kfree(net->ipv4.vif_table);
2028} 2407}
2029 2408
2030static struct pernet_operations ipmr_net_ops = { 2409static struct pernet_operations ipmr_net_ops = {
@@ -2047,7 +2426,6 @@ int __init ip_mr_init(void)
2047 if (err) 2426 if (err)
2048 goto reg_pernet_fail; 2427 goto reg_pernet_fail;
2049 2428
2050 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2051 err = register_netdevice_notifier(&ip_mr_notifier); 2429 err = register_netdevice_notifier(&ip_mr_notifier);
2052 if (err) 2430 if (err)
2053 goto reg_notif_fail; 2431 goto reg_notif_fail;
@@ -2058,6 +2436,7 @@ int __init ip_mr_init(void)
2058 goto add_proto_fail; 2436 goto add_proto_fail;
2059 } 2437 }
2060#endif 2438#endif
2439 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2061 return 0; 2440 return 0;
2062 2441
2063#ifdef CONFIG_IP_PIMSM_V2 2442#ifdef CONFIG_IP_PIMSM_V2
@@ -2065,7 +2444,6 @@ add_proto_fail:
2065 unregister_netdevice_notifier(&ip_mr_notifier); 2444 unregister_netdevice_notifier(&ip_mr_notifier);
2066#endif 2445#endif
2067reg_notif_fail: 2446reg_notif_fail:
2068 del_timer(&ipmr_expire_timer);
2069 unregister_pernet_subsys(&ipmr_net_ops); 2447 unregister_pernet_subsys(&ipmr_net_ops);
2070reg_pernet_fail: 2448reg_pernet_fail:
2071 kmem_cache_destroy(mrt_cachep); 2449 kmem_cache_destroy(mrt_cachep);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index ab828400ed71..a992dc826f1c 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -88,7 +88,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
88 list_del(&c->list); 88 list_del(&c->list);
89 write_unlock_bh(&clusterip_lock); 89 write_unlock_bh(&clusterip_lock);
90 90
91 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); 91 dev_mc_del(c->dev, c->clustermac);
92 dev_put(c->dev); 92 dev_put(c->dev);
93 93
94 /* In case anyone still accesses the file, the open/close 94 /* In case anyone still accesses the file, the open/close
@@ -397,7 +397,7 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
397 dev_put(dev); 397 dev_put(dev);
398 return false; 398 return false;
399 } 399 }
400 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); 400 dev_mc_add(config->dev, config->clustermac);
401 } 401 }
402 } 402 }
403 cipinfo->config = config; 403 cipinfo->config = config;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4f1f337f4337..3dc9914c1dce 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -251,6 +251,7 @@ static const struct snmp_mib snmp4_net_list[] = {
251 SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), 251 SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
252 SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), 252 SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), 253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
254 SNMP_MIB_SENTINEL 255 SNMP_MIB_SENTINEL
255}; 256};
256 257
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index cc6f097fbd5f..52ef5af78a45 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -290,7 +290,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
290{ 290{
291 /* Charge it to the socket. */ 291 /* Charge it to the socket. */
292 292
293 if (sock_queue_rcv_skb(sk, skb) < 0) { 293 if (ip_queue_rcv_skb(sk, skb) < 0) {
294 kfree_skb(skb); 294 kfree_skb(skb);
295 return NET_RX_DROP; 295 return NET_RX_DROP;
296 } 296 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cb562fdd9b9a..a947428ef0ae 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -258,10 +258,9 @@ static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
258 (__raw_get_cpu_var(rt_cache_stat).field++) 258 (__raw_get_cpu_var(rt_cache_stat).field++)
259 259
260static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, 260static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
261 int genid) 261 int genid)
262{ 262{
263 return jhash_3words((__force u32)(__be32)(daddr), 263 return jhash_3words((__force u32)daddr, (__force u32)saddr,
264 (__force u32)(__be32)(saddr),
265 idx, genid) 264 idx, genid)
266 & rt_hash_mask; 265 & rt_hash_mask;
267} 266}
@@ -378,12 +377,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
378 struct rtable *r = v; 377 struct rtable *r = v;
379 int len; 378 int len;
380 379
381 seq_printf(seq, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" 380 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
382 "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", 381 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
383 r->u.dst.dev ? r->u.dst.dev->name : "*", 382 r->u.dst.dev ? r->u.dst.dev->name : "*",
384 (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, 383 (__force u32)r->rt_dst,
384 (__force u32)r->rt_gateway,
385 r->rt_flags, atomic_read(&r->u.dst.__refcnt), 385 r->rt_flags, atomic_read(&r->u.dst.__refcnt),
386 r->u.dst.__use, 0, (unsigned long)r->rt_src, 386 r->u.dst.__use, 0, (__force u32)r->rt_src,
387 (dst_metric(&r->u.dst, RTAX_ADVMSS) ? 387 (dst_metric(&r->u.dst, RTAX_ADVMSS) ?
388 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), 388 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
389 dst_metric(&r->u.dst, RTAX_WINDOW), 389 dst_metric(&r->u.dst, RTAX_WINDOW),
@@ -685,18 +685,17 @@ static inline bool rt_caching(const struct net *net)
685static inline bool compare_hash_inputs(const struct flowi *fl1, 685static inline bool compare_hash_inputs(const struct flowi *fl1,
686 const struct flowi *fl2) 686 const struct flowi *fl2)
687{ 687{
688 return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | 688 return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
689 (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | 689 ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
690 (fl1->iif ^ fl2->iif)) == 0); 690 (fl1->iif ^ fl2->iif)) == 0);
691} 691}
692 692
693static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 693static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
694{ 694{
695 return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | 695 return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
696 (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | 696 ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
697 (fl1->mark ^ fl2->mark) | 697 (fl1->mark ^ fl2->mark) |
698 (*(u16 *)&fl1->nl_u.ip4_u.tos ^ 698 (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
699 *(u16 *)&fl2->nl_u.ip4_u.tos) |
700 (fl1->oif ^ fl2->oif) | 699 (fl1->oif ^ fl2->oif) |
701 (fl1->iif ^ fl2->iif)) == 0; 700 (fl1->iif ^ fl2->iif)) == 0;
702} 701}
@@ -2319,8 +2318,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2319 rcu_read_lock(); 2318 rcu_read_lock();
2320 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2319 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2321 rth = rcu_dereference(rth->u.dst.rt_next)) { 2320 rth = rcu_dereference(rth->u.dst.rt_next)) {
2322 if (((rth->fl.fl4_dst ^ daddr) | 2321 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
2323 (rth->fl.fl4_src ^ saddr) | 2322 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
2324 (rth->fl.iif ^ iif) | 2323 (rth->fl.iif ^ iif) |
2325 rth->fl.oif | 2324 rth->fl.oif |
2326 (rth->fl.fl4_tos ^ tos)) == 0 && 2325 (rth->fl.fl4_tos ^ tos)) == 0 &&
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0f8caf64caa3..8ce29747ad9b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -378,7 +378,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
378 struct sock *sk = sock->sk; 378 struct sock *sk = sock->sk;
379 struct tcp_sock *tp = tcp_sk(sk); 379 struct tcp_sock *tp = tcp_sk(sk);
380 380
381 sock_poll_wait(file, sk->sk_sleep, wait); 381 sock_poll_wait(file, sk_sleep(sk), wait);
382 if (sk->sk_state == TCP_LISTEN) 382 if (sk->sk_state == TCP_LISTEN)
383 return inet_csk_listen_poll(sk); 383 return inet_csk_listen_poll(sk);
384 384
@@ -2298,7 +2298,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2298 if (sock_flag(sk, SOCK_KEEPOPEN) && 2298 if (sock_flag(sk, SOCK_KEEPOPEN) &&
2299 !((1 << sk->sk_state) & 2299 !((1 << sk->sk_state) &
2300 (TCPF_CLOSE | TCPF_LISTEN))) { 2300 (TCPF_CLOSE | TCPF_LISTEN))) {
2301 __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp; 2301 u32 elapsed = keepalive_time_elapsed(tp);
2302 if (tp->keepalive_time > elapsed) 2302 if (tp->keepalive_time > elapsed)
2303 elapsed = tp->keepalive_time - elapsed; 2303 elapsed = tp->keepalive_time - elapsed;
2304 else 2304 else
@@ -2721,7 +2721,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2721 struct tcphdr *th2; 2721 struct tcphdr *th2;
2722 unsigned int len; 2722 unsigned int len;
2723 unsigned int thlen; 2723 unsigned int thlen;
2724 unsigned int flags; 2724 __be32 flags;
2725 unsigned int mss = 1; 2725 unsigned int mss = 1;
2726 unsigned int hlen; 2726 unsigned int hlen;
2727 unsigned int off; 2727 unsigned int off;
@@ -2771,10 +2771,10 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2771 2771
2772found: 2772found:
2773 flush = NAPI_GRO_CB(p)->flush; 2773 flush = NAPI_GRO_CB(p)->flush;
2774 flush |= flags & TCP_FLAG_CWR; 2774 flush |= (__force int)(flags & TCP_FLAG_CWR);
2775 flush |= (flags ^ tcp_flag_word(th2)) & 2775 flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
2776 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); 2776 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
2777 flush |= th->ack_seq ^ th2->ack_seq; 2777 flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
2778 for (i = sizeof(*th); i < thlen; i += 4) 2778 for (i = sizeof(*th); i < thlen; i += 4)
2779 flush |= *(u32 *)((u8 *)th + i) ^ 2779 flush |= *(u32 *)((u8 *)th + i) ^
2780 *(u32 *)((u8 *)th2 + i); 2780 *(u32 *)((u8 *)th2 + i);
@@ -2795,8 +2795,9 @@ found:
2795 2795
2796out_check_final: 2796out_check_final:
2797 flush = len < mss; 2797 flush = len < mss;
2798 flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | 2798 flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
2799 TCP_FLAG_SYN | TCP_FLAG_FIN); 2799 TCP_FLAG_RST | TCP_FLAG_SYN |
2800 TCP_FLAG_FIN));
2800 2801
2801 if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) 2802 if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
2802 pp = head; 2803 pp = head;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f240f57b2199..e82162c211bf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3710,7 +3710,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3710 } 3710 }
3711 3711
3712 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) 3712 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3713 dst_confirm(sk->sk_dst_cache); 3713 dst_confirm(__sk_dst_get(sk));
3714 3714
3715 return 1; 3715 return 1;
3716 3716
@@ -4319,7 +4319,7 @@ static void tcp_ofo_queue(struct sock *sk)
4319 } 4319 }
4320 4320
4321 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { 4321 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4322 SOCK_DEBUG(sk, "ofo packet was already received \n"); 4322 SOCK_DEBUG(sk, "ofo packet was already received\n");
4323 __skb_unlink(skb, &tp->out_of_order_queue); 4323 __skb_unlink(skb, &tp->out_of_order_queue);
4324 __kfree_skb(skb); 4324 __kfree_skb(skb);
4325 continue; 4325 continue;
@@ -4367,6 +4367,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4367 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) 4367 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4368 goto drop; 4368 goto drop;
4369 4369
4370 skb_dst_drop(skb);
4370 __skb_pull(skb, th->doff * 4); 4371 __skb_pull(skb, th->doff * 4);
4371 4372
4372 TCP_ECN_accept_cwr(tp, skb); 4373 TCP_ECN_accept_cwr(tp, skb);
@@ -5833,7 +5834,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5833 if (tp->snd_una == tp->write_seq) { 5834 if (tp->snd_una == tp->write_seq) {
5834 tcp_set_state(sk, TCP_FIN_WAIT2); 5835 tcp_set_state(sk, TCP_FIN_WAIT2);
5835 sk->sk_shutdown |= SEND_SHUTDOWN; 5836 sk->sk_shutdown |= SEND_SHUTDOWN;
5836 dst_confirm(sk->sk_dst_cache); 5837 dst_confirm(__sk_dst_get(sk));
5837 5838
5838 if (!sock_flag(sk, SOCK_DEAD)) 5839 if (!sock_flag(sk, SOCK_DEAD))
5839 /* Wake up lingering close() */ 5840 /* Wake up lingering close() */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3c23e70885f4..771f8146a2e5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -519,26 +519,31 @@ out:
519 sock_put(sk); 519 sock_put(sk);
520} 520}
521 521
522/* This routine computes an IPv4 TCP checksum. */ 522static void __tcp_v4_send_check(struct sk_buff *skb,
523void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) 523 __be32 saddr, __be32 daddr)
524{ 524{
525 struct inet_sock *inet = inet_sk(sk);
526 struct tcphdr *th = tcp_hdr(skb); 525 struct tcphdr *th = tcp_hdr(skb);
527 526
528 if (skb->ip_summed == CHECKSUM_PARTIAL) { 527 if (skb->ip_summed == CHECKSUM_PARTIAL) {
529 th->check = ~tcp_v4_check(len, inet->inet_saddr, 528 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
530 inet->inet_daddr, 0);
531 skb->csum_start = skb_transport_header(skb) - skb->head; 529 skb->csum_start = skb_transport_header(skb) - skb->head;
532 skb->csum_offset = offsetof(struct tcphdr, check); 530 skb->csum_offset = offsetof(struct tcphdr, check);
533 } else { 531 } else {
534 th->check = tcp_v4_check(len, inet->inet_saddr, 532 th->check = tcp_v4_check(skb->len, saddr, daddr,
535 inet->inet_daddr,
536 csum_partial(th, 533 csum_partial(th,
537 th->doff << 2, 534 th->doff << 2,
538 skb->csum)); 535 skb->csum));
539 } 536 }
540} 537}
541 538
539/* This routine computes an IPv4 TCP checksum. */
540void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
541{
542 struct inet_sock *inet = inet_sk(sk);
543
544 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
545}
546
542int tcp_v4_gso_send_check(struct sk_buff *skb) 547int tcp_v4_gso_send_check(struct sk_buff *skb)
543{ 548{
544 const struct iphdr *iph; 549 const struct iphdr *iph;
@@ -551,10 +556,8 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
551 th = tcp_hdr(skb); 556 th = tcp_hdr(skb);
552 557
553 th->check = 0; 558 th->check = 0;
554 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
555 skb->csum_start = skb_transport_header(skb) - skb->head;
556 skb->csum_offset = offsetof(struct tcphdr, check);
557 skb->ip_summed = CHECKSUM_PARTIAL; 559 skb->ip_summed = CHECKSUM_PARTIAL;
560 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
558 return 0; 561 return 0;
559} 562}
560 563
@@ -763,13 +766,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
763 skb = tcp_make_synack(sk, dst, req, rvp); 766 skb = tcp_make_synack(sk, dst, req, rvp);
764 767
765 if (skb) { 768 if (skb) {
766 struct tcphdr *th = tcp_hdr(skb); 769 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
767
768 th->check = tcp_v4_check(skb->len,
769 ireq->loc_addr,
770 ireq->rmt_addr,
771 csum_partial(th, skb->len,
772 skb->csum));
773 770
774 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 771 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
775 ireq->rmt_addr, 772 ireq->rmt_addr,
@@ -1289,8 +1286,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1289 goto drop_and_release; 1286 goto drop_and_release;
1290 1287
1291 /* Secret recipe starts with IP addresses */ 1288 /* Secret recipe starts with IP addresses */
1292 *mess++ ^= daddr; 1289 *mess++ ^= (__force u32)daddr;
1293 *mess++ ^= saddr; 1290 *mess++ ^= (__force u32)saddr;
1294 1291
1295 /* plus variable length Initiator Cookie */ 1292 /* plus variable length Initiator Cookie */
1296 c = (u8 *)mess; 1293 c = (u8 *)mess;
@@ -1675,6 +1672,8 @@ process:
1675 1672
1676 skb->dev = NULL; 1673 skb->dev = NULL;
1677 1674
1675 sock_rps_save_rxhash(sk, skb->rxhash);
1676
1678 bh_lock_sock_nested(sk); 1677 bh_lock_sock_nested(sk);
1679 ret = 0; 1678 ret = 0;
1680 if (!sock_owned_by_user(sk)) { 1679 if (!sock_owned_by_user(sk)) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 5fabff9ac6d6..794c2e122a41 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -672,6 +672,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
672 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 672 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
673 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 673 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
674 inet_rsk(req)->acked = 1; 674 inet_rsk(req)->acked = 1;
675 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
675 return NULL; 676 return NULL;
676 } 677 }
677 678
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0dda86e72ad8..5db3a2c6cb33 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -350,6 +350,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
350 */ 350 */
351static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) 351static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
352{ 352{
353 skb->ip_summed = CHECKSUM_PARTIAL;
353 skb->csum = 0; 354 skb->csum = 0;
354 355
355 TCP_SKB_CB(skb)->flags = flags; 356 TCP_SKB_CB(skb)->flags = flags;
@@ -860,7 +861,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
860 th->urg_ptr = htons(tp->snd_up - tcb->seq); 861 th->urg_ptr = htons(tp->snd_up - tcb->seq);
861 th->urg = 1; 862 th->urg = 1;
862 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { 863 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
863 th->urg_ptr = 0xFFFF; 864 th->urg_ptr = htons(0xFFFF);
864 th->urg = 1; 865 th->urg = 1;
865 } 866 }
866 } 867 }
@@ -878,7 +879,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
878 } 879 }
879#endif 880#endif
880 881
881 icsk->icsk_af_ops->send_check(sk, skb->len, skb); 882 icsk->icsk_af_ops->send_check(sk, skb);
882 883
883 if (likely(tcb->flags & TCPCB_FLAG_ACK)) 884 if (likely(tcb->flags & TCPCB_FLAG_ACK))
884 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 885 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
@@ -887,9 +888,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
887 tcp_event_data_sent(tp, skb, sk); 888 tcp_event_data_sent(tp, skb, sk);
888 889
889 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) 890 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
890 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); 891 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
892 tcp_skb_pcount(skb));
891 893
892 err = icsk->icsk_af_ops->queue_xmit(skb, 0); 894 err = icsk->icsk_af_ops->queue_xmit(skb);
893 if (likely(err <= 0)) 895 if (likely(err <= 0))
894 return err; 896 return err;
895 897
@@ -2484,7 +2486,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2484 *tail-- ^= TCP_SKB_CB(skb)->seq + 1; 2486 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2485 2487
2486 /* recommended */ 2488 /* recommended */
2487 *tail-- ^= ((th->dest << 16) | th->source); 2489 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2488 *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ 2490 *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
2489 2491
2490 sha_transform((__u32 *)&xvp->cookie_bakery[0], 2492 sha_transform((__u32 *)&xvp->cookie_bakery[0],
@@ -2502,7 +2504,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2502 th->window = htons(min(req->rcv_wnd, 65535U)); 2504 th->window = htons(min(req->rcv_wnd, 65535U));
2503 tcp_options_write((__be32 *)(th + 1), tp, &opts); 2505 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2504 th->doff = (tcp_header_size >> 2); 2506 th->doff = (tcp_header_size >> 2);
2505 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); 2507 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
2506 2508
2507#ifdef CONFIG_TCP_MD5SIG 2509#ifdef CONFIG_TCP_MD5SIG
2508 /* Okay, we have all we need - do the md5 hash if needed */ 2510 /* Okay, we have all we need - do the md5 hash if needed */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8a0ab2977f1f..440a5c6004f6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -172,14 +172,14 @@ static int tcp_write_timeout(struct sock *sk)
172 172
173 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 173 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
174 if (icsk->icsk_retransmits) 174 if (icsk->icsk_retransmits)
175 dst_negative_advice(&sk->sk_dst_cache, sk); 175 dst_negative_advice(sk);
176 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 176 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
177 } else { 177 } else {
178 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { 178 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
179 /* Black hole detection */ 179 /* Black hole detection */
180 tcp_mtu_probing(icsk, sk); 180 tcp_mtu_probing(icsk, sk);
181 181
182 dst_negative_advice(&sk->sk_dst_cache, sk); 182 dst_negative_advice(sk);
183 } 183 }
184 184
185 retry_until = sysctl_tcp_retries2; 185 retry_until = sysctl_tcp_retries2;
@@ -517,7 +517,7 @@ static void tcp_keepalive_timer (unsigned long data)
517 struct sock *sk = (struct sock *) data; 517 struct sock *sk = (struct sock *) data;
518 struct inet_connection_sock *icsk = inet_csk(sk); 518 struct inet_connection_sock *icsk = inet_csk(sk);
519 struct tcp_sock *tp = tcp_sk(sk); 519 struct tcp_sock *tp = tcp_sk(sk);
520 __u32 elapsed; 520 u32 elapsed;
521 521
522 /* Only process if socket is not in use. */ 522 /* Only process if socket is not in use. */
523 bh_lock_sock(sk); 523 bh_lock_sock(sk);
@@ -554,7 +554,7 @@ static void tcp_keepalive_timer (unsigned long data)
554 if (tp->packets_out || tcp_send_head(sk)) 554 if (tp->packets_out || tcp_send_head(sk))
555 goto resched; 555 goto resched;
556 556
557 elapsed = tcp_time_stamp - tp->rcv_tstamp; 557 elapsed = keepalive_time_elapsed(tp);
558 558
559 if (elapsed >= keepalive_time_when(tp)) { 559 if (elapsed >= keepalive_time_when(tp)) {
560 if (icsk->icsk_probes_out >= keepalive_probes(tp)) { 560 if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fef859db35d..4560b291180b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -307,13 +307,13 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
307static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, 307static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
308 unsigned int port) 308 unsigned int port)
309{ 309{
310 return jhash_1word(saddr, net_hash_mix(net)) ^ port; 310 return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
311} 311}
312 312
313int udp_v4_get_port(struct sock *sk, unsigned short snum) 313int udp_v4_get_port(struct sock *sk, unsigned short snum)
314{ 314{
315 unsigned int hash2_nulladdr = 315 unsigned int hash2_nulladdr =
316 udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum); 316 udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
317 unsigned int hash2_partial = 317 unsigned int hash2_partial =
318 udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); 318 udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
319 319
@@ -466,14 +466,14 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
466 daddr, hnum, dif, 466 daddr, hnum, dif,
467 hslot2, slot2); 467 hslot2, slot2);
468 if (!result) { 468 if (!result) {
469 hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum); 469 hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
470 slot2 = hash2 & udptable->mask; 470 slot2 = hash2 & udptable->mask;
471 hslot2 = &udptable->hash2[slot2]; 471 hslot2 = &udptable->hash2[slot2];
472 if (hslot->count < hslot2->count) 472 if (hslot->count < hslot2->count)
473 goto begin; 473 goto begin;
474 474
475 result = udp4_lib_lookup2(net, saddr, sport, 475 result = udp4_lib_lookup2(net, saddr, sport,
476 INADDR_ANY, hnum, dif, 476 htonl(INADDR_ANY), hnum, dif,
477 hslot2, slot2); 477 hslot2, slot2);
478 } 478 }
479 rcu_read_unlock(); 479 rcu_read_unlock();
@@ -1062,10 +1062,10 @@ static unsigned int first_packet_length(struct sock *sk)
1062 spin_unlock_bh(&rcvq->lock); 1062 spin_unlock_bh(&rcvq->lock);
1063 1063
1064 if (!skb_queue_empty(&list_kill)) { 1064 if (!skb_queue_empty(&list_kill)) {
1065 lock_sock(sk); 1065 lock_sock_bh(sk);
1066 __skb_queue_purge(&list_kill); 1066 __skb_queue_purge(&list_kill);
1067 sk_mem_reclaim_partial(sk); 1067 sk_mem_reclaim_partial(sk);
1068 release_sock(sk); 1068 unlock_sock_bh(sk);
1069 } 1069 }
1070 return res; 1070 return res;
1071} 1071}
@@ -1196,10 +1196,10 @@ out:
1196 return err; 1196 return err;
1197 1197
1198csum_copy_err: 1198csum_copy_err:
1199 lock_sock(sk); 1199 lock_sock_bh(sk);
1200 if (!skb_kill_datagram(sk, skb, flags)) 1200 if (!skb_kill_datagram(sk, skb, flags))
1201 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1201 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1202 release_sock(sk); 1202 unlock_sock_bh(sk);
1203 1203
1204 if (noblock) 1204 if (noblock)
1205 return -EAGAIN; 1205 return -EAGAIN;
@@ -1217,6 +1217,7 @@ int udp_disconnect(struct sock *sk, int flags)
1217 sk->sk_state = TCP_CLOSE; 1217 sk->sk_state = TCP_CLOSE;
1218 inet->inet_daddr = 0; 1218 inet->inet_daddr = 0;
1219 inet->inet_dport = 0; 1219 inet->inet_dport = 0;
1220 sock_rps_save_rxhash(sk, 0);
1220 sk->sk_bound_dev_if = 0; 1221 sk->sk_bound_dev_if = 0;
1221 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 1222 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
1222 inet_reset_saddr(sk); 1223 inet_reset_saddr(sk);
@@ -1258,8 +1259,12 @@ EXPORT_SYMBOL(udp_lib_unhash);
1258 1259
1259static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1260static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1260{ 1261{
1261 int rc = sock_queue_rcv_skb(sk, skb); 1262 int rc;
1263
1264 if (inet_sk(sk)->inet_daddr)
1265 sock_rps_save_rxhash(sk, skb->rxhash);
1262 1266
1267 rc = ip_queue_rcv_skb(sk, skb);
1263 if (rc < 0) { 1268 if (rc < 0) {
1264 int is_udplite = IS_UDPLITE(sk); 1269 int is_udplite = IS_UDPLITE(sk);
1265 1270
@@ -1367,6 +1372,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1367 goto drop; 1372 goto drop;
1368 } 1373 }
1369 1374
1375
1376 if (sk_rcvqueues_full(sk, skb))
1377 goto drop;
1378
1370 rc = 0; 1379 rc = 0;
1371 1380
1372 bh_lock_sock(sk); 1381 bh_lock_sock(sk);
@@ -1615,9 +1624,9 @@ int udp_rcv(struct sk_buff *skb)
1615 1624
1616void udp_destroy_sock(struct sock *sk) 1625void udp_destroy_sock(struct sock *sk)
1617{ 1626{
1618 lock_sock(sk); 1627 lock_sock_bh(sk);
1619 udp_flush_pending_frames(sk); 1628 udp_flush_pending_frames(sk);
1620 release_sock(sk); 1629 unlock_sock_bh(sk);
1621} 1630}
1622 1631
1623/* 1632/*
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e4a1483fba77..1705476670ef 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -59,27 +59,6 @@ static int xfrm4_get_saddr(struct net *net,
59 return 0; 59 return 0;
60} 60}
61 61
62static struct dst_entry *
63__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
64{
65 struct dst_entry *dst;
66
67 read_lock_bh(&policy->lock);
68 for (dst = policy->bundles; dst; dst = dst->next) {
69 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
70 if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/
71 xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
72 xdst->u.rt.fl.fl4_src == fl->fl4_src &&
73 xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
74 xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
75 dst_clone(dst);
76 break;
77 }
78 }
79 read_unlock_bh(&policy->lock);
80 return dst;
81}
82
83static int xfrm4_get_tos(struct flowi *fl) 62static int xfrm4_get_tos(struct flowi *fl)
84{ 63{
85 return fl->fl4_tos; 64 return fl->fl4_tos;
@@ -259,7 +238,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
259 .dst_ops = &xfrm4_dst_ops, 238 .dst_ops = &xfrm4_dst_ops,
260 .dst_lookup = xfrm4_dst_lookup, 239 .dst_lookup = xfrm4_dst_lookup,
261 .get_saddr = xfrm4_get_saddr, 240 .get_saddr = xfrm4_get_saddr,
262 .find_bundle = __xfrm4_find_bundle,
263 .decode_session = _decode_session4, 241 .decode_session = _decode_session4,
264 .get_tos = xfrm4_get_tos, 242 .get_tos = xfrm4_get_tos,
265 .init_path = xfrm4_init_path, 243 .init_path = xfrm4_init_path,