aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/ipmr.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ipmr.c')
-rw-r--r--net/ipv4/ipmr.c764
1 files changed, 400 insertions, 364 deletions
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 92dd4b74d513..395e2814a46d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -66,28 +66,7 @@
66#include <net/netlink.h> 66#include <net/netlink.h>
67#include <net/fib_rules.h> 67#include <net/fib_rules.h>
68#include <linux/netconf.h> 68#include <linux/netconf.h>
69 69#include <net/nexthop.h>
70#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
71#define CONFIG_IP_PIMSM 1
72#endif
73
74struct mr_table {
75 struct list_head list;
76 possible_net_t net;
77 u32 id;
78 struct sock __rcu *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 bool mroute_do_assert;
86 bool mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91 70
92struct ipmr_rule { 71struct ipmr_rule {
93 struct fib_rule common; 72 struct fib_rule common;
@@ -103,11 +82,7 @@ struct ipmr_result {
103 82
104static DEFINE_RWLOCK(mrt_lock); 83static DEFINE_RWLOCK(mrt_lock);
105 84
106/* 85/* Multicast router control variables */
107 * Multicast router control variables
108 */
109
110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
111 86
112/* Special spinlock for queue of unresolved entries */ 87/* Special spinlock for queue of unresolved entries */
113static DEFINE_SPINLOCK(mfc_unres_lock); 88static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -134,7 +109,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
134 struct mfc_cache *c, struct rtmsg *rtm); 109 struct mfc_cache *c, struct rtmsg *rtm);
135static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, 110static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
136 int cmd); 111 int cmd);
137static void mroute_clean_tables(struct mr_table *mrt); 112static void mroute_clean_tables(struct mr_table *mrt, bool all);
138static void ipmr_expire_process(unsigned long arg); 113static void ipmr_expire_process(unsigned long arg);
139 114
140#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 115#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -252,8 +227,8 @@ static int __net_init ipmr_rules_init(struct net *net)
252 INIT_LIST_HEAD(&net->ipv4.mr_tables); 227 INIT_LIST_HEAD(&net->ipv4.mr_tables);
253 228
254 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 229 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
255 if (!mrt) { 230 if (IS_ERR(mrt)) {
256 err = -ENOMEM; 231 err = PTR_ERR(mrt);
257 goto err1; 232 goto err1;
258 } 233 }
259 234
@@ -301,8 +276,13 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
301 276
302static int __net_init ipmr_rules_init(struct net *net) 277static int __net_init ipmr_rules_init(struct net *net)
303{ 278{
304 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT); 279 struct mr_table *mrt;
305 return net->ipv4.mrt ? 0 : -ENOMEM; 280
281 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
282 if (IS_ERR(mrt))
283 return PTR_ERR(mrt);
284 net->ipv4.mrt = mrt;
285 return 0;
306} 286}
307 287
308static void __net_exit ipmr_rules_exit(struct net *net) 288static void __net_exit ipmr_rules_exit(struct net *net)
@@ -319,13 +299,17 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
319 struct mr_table *mrt; 299 struct mr_table *mrt;
320 unsigned int i; 300 unsigned int i;
321 301
302 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
303 if (id != RT_TABLE_DEFAULT && id >= 1000000000)
304 return ERR_PTR(-EINVAL);
305
322 mrt = ipmr_get_table(net, id); 306 mrt = ipmr_get_table(net, id);
323 if (mrt) 307 if (mrt)
324 return mrt; 308 return mrt;
325 309
326 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); 310 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
327 if (!mrt) 311 if (!mrt)
328 return NULL; 312 return ERR_PTR(-ENOMEM);
329 write_pnet(&mrt->net, net); 313 write_pnet(&mrt->net, net);
330 mrt->id = id; 314 mrt->id = id;
331 315
@@ -338,9 +322,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
338 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, 322 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
339 (unsigned long)mrt); 323 (unsigned long)mrt);
340 324
341#ifdef CONFIG_IP_PIMSM
342 mrt->mroute_reg_vif_num = -1; 325 mrt->mroute_reg_vif_num = -1;
343#endif
344#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES 326#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
345 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); 327 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
346#endif 328#endif
@@ -350,7 +332,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
350static void ipmr_free_table(struct mr_table *mrt) 332static void ipmr_free_table(struct mr_table *mrt)
351{ 333{
352 del_timer_sync(&mrt->ipmr_expire_timer); 334 del_timer_sync(&mrt->ipmr_expire_timer);
353 mroute_clean_tables(mrt); 335 mroute_clean_tables(mrt, true);
354 kfree(mrt); 336 kfree(mrt);
355} 337}
356 338
@@ -387,8 +369,24 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
387 } 369 }
388} 370}
389 371
390static 372/* Initialize ipmr pimreg/tunnel in_device */
391struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) 373static bool ipmr_init_vif_indev(const struct net_device *dev)
374{
375 struct in_device *in_dev;
376
377 ASSERT_RTNL();
378
379 in_dev = __in_dev_get_rtnl(dev);
380 if (!in_dev)
381 return false;
382 ipv4_devconf_setall(in_dev);
383 neigh_parms_data_state_setall(in_dev->arp_parms);
384 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
385
386 return true;
387}
388
389static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
392{ 390{
393 struct net_device *dev; 391 struct net_device *dev;
394 392
@@ -399,7 +397,6 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
399 int err; 397 int err;
400 struct ifreq ifr; 398 struct ifreq ifr;
401 struct ip_tunnel_parm p; 399 struct ip_tunnel_parm p;
402 struct in_device *in_dev;
403 400
404 memset(&p, 0, sizeof(p)); 401 memset(&p, 0, sizeof(p));
405 p.iph.daddr = v->vifc_rmt_addr.s_addr; 402 p.iph.daddr = v->vifc_rmt_addr.s_addr;
@@ -424,15 +421,8 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
424 if (err == 0 && 421 if (err == 0 &&
425 (dev = __dev_get_by_name(net, p.name)) != NULL) { 422 (dev = __dev_get_by_name(net, p.name)) != NULL) {
426 dev->flags |= IFF_MULTICAST; 423 dev->flags |= IFF_MULTICAST;
427 424 if (!ipmr_init_vif_indev(dev))
428 in_dev = __in_dev_get_rtnl(dev);
429 if (!in_dev)
430 goto failure; 425 goto failure;
431
432 ipv4_devconf_setall(in_dev);
433 neigh_parms_data_state_setall(in_dev->arp_parms);
434 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
435
436 if (dev_open(dev)) 426 if (dev_open(dev))
437 goto failure; 427 goto failure;
438 dev_hold(dev); 428 dev_hold(dev);
@@ -441,16 +431,11 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
441 return dev; 431 return dev;
442 432
443failure: 433failure:
444 /* allow the register to be completed before unregistering. */
445 rtnl_unlock();
446 rtnl_lock();
447
448 unregister_netdevice(dev); 434 unregister_netdevice(dev);
449 return NULL; 435 return NULL;
450} 436}
451 437
452#ifdef CONFIG_IP_PIMSM 438#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
453
454static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 439static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
455{ 440{
456 struct net *net = dev_net(dev); 441 struct net *net = dev_net(dev);
@@ -500,7 +485,6 @@ static void reg_vif_setup(struct net_device *dev)
500static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) 485static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
501{ 486{
502 struct net_device *dev; 487 struct net_device *dev;
503 struct in_device *in_dev;
504 char name[IFNAMSIZ]; 488 char name[IFNAMSIZ];
505 489
506 if (mrt->id == RT_TABLE_DEFAULT) 490 if (mrt->id == RT_TABLE_DEFAULT)
@@ -520,18 +504,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
520 return NULL; 504 return NULL;
521 } 505 }
522 506
523 rcu_read_lock(); 507 if (!ipmr_init_vif_indev(dev))
524 in_dev = __in_dev_get_rcu(dev);
525 if (!in_dev) {
526 rcu_read_unlock();
527 goto failure; 508 goto failure;
528 }
529
530 ipv4_devconf_setall(in_dev);
531 neigh_parms_data_state_setall(in_dev->arp_parms);
532 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
533 rcu_read_unlock();
534
535 if (dev_open(dev)) 509 if (dev_open(dev))
536 goto failure; 510 goto failure;
537 511
@@ -540,20 +514,59 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
540 return dev; 514 return dev;
541 515
542failure: 516failure:
543 /* allow the register to be completed before unregistering. */
544 rtnl_unlock();
545 rtnl_lock();
546
547 unregister_netdevice(dev); 517 unregister_netdevice(dev);
548 return NULL; 518 return NULL;
549} 519}
520
521/* called with rcu_read_lock() */
522static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
523 unsigned int pimlen)
524{
525 struct net_device *reg_dev = NULL;
526 struct iphdr *encap;
527
528 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
529 /* Check that:
530 * a. packet is really sent to a multicast group
531 * b. packet is not a NULL-REGISTER
532 * c. packet is not truncated
533 */
534 if (!ipv4_is_multicast(encap->daddr) ||
535 encap->tot_len == 0 ||
536 ntohs(encap->tot_len) + pimlen > skb->len)
537 return 1;
538
539 read_lock(&mrt_lock);
540 if (mrt->mroute_reg_vif_num >= 0)
541 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
542 read_unlock(&mrt_lock);
543
544 if (!reg_dev)
545 return 1;
546
547 skb->mac_header = skb->network_header;
548 skb_pull(skb, (u8 *)encap - skb->data);
549 skb_reset_network_header(skb);
550 skb->protocol = htons(ETH_P_IP);
551 skb->ip_summed = CHECKSUM_NONE;
552
553 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
554
555 netif_rx(skb);
556
557 return NET_RX_SUCCESS;
558}
559#else
560static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
561{
562 return NULL;
563}
550#endif 564#endif
551 565
552/** 566/**
553 * vif_delete - Delete a VIF entry 567 * vif_delete - Delete a VIF entry
554 * @notify: Set to 1, if the caller is a notifier_call 568 * @notify: Set to 1, if the caller is a notifier_call
555 */ 569 */
556
557static int vif_delete(struct mr_table *mrt, int vifi, int notify, 570static int vif_delete(struct mr_table *mrt, int vifi, int notify,
558 struct list_head *head) 571 struct list_head *head)
559{ 572{
@@ -575,10 +588,8 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
575 return -EADDRNOTAVAIL; 588 return -EADDRNOTAVAIL;
576 } 589 }
577 590
578#ifdef CONFIG_IP_PIMSM
579 if (vifi == mrt->mroute_reg_vif_num) 591 if (vifi == mrt->mroute_reg_vif_num)
580 mrt->mroute_reg_vif_num = -1; 592 mrt->mroute_reg_vif_num = -1;
581#endif
582 593
583 if (vifi + 1 == mrt->maxvif) { 594 if (vifi + 1 == mrt->maxvif) {
584 int tmp; 595 int tmp;
@@ -625,7 +636,6 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
625/* Destroy an unresolved cache entry, killing queued skbs 636/* Destroy an unresolved cache entry, killing queued skbs
626 * and reporting error to netlink readers. 637 * and reporting error to netlink readers.
627 */ 638 */
628
629static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 639static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
630{ 640{
631 struct net *net = read_pnet(&mrt->net); 641 struct net *net = read_pnet(&mrt->net);
@@ -653,9 +663,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
653 ipmr_cache_free(c); 663 ipmr_cache_free(c);
654} 664}
655 665
656
657/* Timer process for the unresolved queue. */ 666/* Timer process for the unresolved queue. */
658
659static void ipmr_expire_process(unsigned long arg) 667static void ipmr_expire_process(unsigned long arg)
660{ 668{
661 struct mr_table *mrt = (struct mr_table *)arg; 669 struct mr_table *mrt = (struct mr_table *)arg;
@@ -695,7 +703,6 @@ out:
695} 703}
696 704
697/* Fill oifs list. It is called under write locked mrt_lock. */ 705/* Fill oifs list. It is called under write locked mrt_lock. */
698
699static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, 706static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
700 unsigned char *ttls) 707 unsigned char *ttls)
701{ 708{
@@ -731,10 +738,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
731 return -EADDRINUSE; 738 return -EADDRINUSE;
732 739
733 switch (vifc->vifc_flags) { 740 switch (vifc->vifc_flags) {
734#ifdef CONFIG_IP_PIMSM
735 case VIFF_REGISTER: 741 case VIFF_REGISTER:
736 /* 742 if (!ipmr_pimsm_enabled())
737 * Special Purpose VIF in PIM 743 return -EINVAL;
744 /* Special Purpose VIF in PIM
738 * All the packets will be sent to the daemon 745 * All the packets will be sent to the daemon
739 */ 746 */
740 if (mrt->mroute_reg_vif_num >= 0) 747 if (mrt->mroute_reg_vif_num >= 0)
@@ -749,7 +756,6 @@ static int vif_add(struct net *net, struct mr_table *mrt,
749 return err; 756 return err;
750 } 757 }
751 break; 758 break;
752#endif
753 case VIFF_TUNNEL: 759 case VIFF_TUNNEL:
754 dev = ipmr_new_tunnel(net, vifc); 760 dev = ipmr_new_tunnel(net, vifc);
755 if (!dev) 761 if (!dev)
@@ -761,7 +767,6 @@ static int vif_add(struct net *net, struct mr_table *mrt,
761 return err; 767 return err;
762 } 768 }
763 break; 769 break;
764
765 case VIFF_USE_IFINDEX: 770 case VIFF_USE_IFINDEX:
766 case 0: 771 case 0:
767 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 772 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
@@ -815,10 +820,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
815 /* And finish update writing critical data */ 820 /* And finish update writing critical data */
816 write_lock_bh(&mrt_lock); 821 write_lock_bh(&mrt_lock);
817 v->dev = dev; 822 v->dev = dev;
818#ifdef CONFIG_IP_PIMSM
819 if (v->flags & VIFF_REGISTER) 823 if (v->flags & VIFF_REGISTER)
820 mrt->mroute_reg_vif_num = vifi; 824 mrt->mroute_reg_vif_num = vifi;
821#endif
822 if (vifi+1 > mrt->maxvif) 825 if (vifi+1 > mrt->maxvif)
823 mrt->maxvif = vifi+1; 826 mrt->maxvif = vifi+1;
824 write_unlock_bh(&mrt_lock); 827 write_unlock_bh(&mrt_lock);
@@ -883,9 +886,7 @@ skip:
883 return ipmr_cache_find_any_parent(mrt, vifi); 886 return ipmr_cache_find_any_parent(mrt, vifi);
884} 887}
885 888
886/* 889/* Allocate a multicast cache entry */
887 * Allocate a multicast cache entry
888 */
889static struct mfc_cache *ipmr_cache_alloc(void) 890static struct mfc_cache *ipmr_cache_alloc(void)
890{ 891{
891 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 892 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
@@ -906,10 +907,7 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
906 return c; 907 return c;
907} 908}
908 909
909/* 910/* A cache entry has gone into a resolved state from queued */
910 * A cache entry has gone into a resolved state from queued
911 */
912
913static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, 911static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
914 struct mfc_cache *uc, struct mfc_cache *c) 912 struct mfc_cache *uc, struct mfc_cache *c)
915{ 913{
@@ -917,7 +915,6 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
917 struct nlmsgerr *e; 915 struct nlmsgerr *e;
918 916
919 /* Play the pending entries through our router */ 917 /* Play the pending entries through our router */
920
921 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 918 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
922 if (ip_hdr(skb)->version == 0) { 919 if (ip_hdr(skb)->version == 0) {
923 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 920 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
@@ -941,34 +938,29 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
941 } 938 }
942} 939}
943 940
944/* 941/* Bounce a cache query up to mrouted. We could use netlink for this but mrouted
945 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted 942 * expects the following bizarre scheme.
946 * expects the following bizarre scheme.
947 * 943 *
948 * Called under mrt_lock. 944 * Called under mrt_lock.
949 */ 945 */
950
951static int ipmr_cache_report(struct mr_table *mrt, 946static int ipmr_cache_report(struct mr_table *mrt,
952 struct sk_buff *pkt, vifi_t vifi, int assert) 947 struct sk_buff *pkt, vifi_t vifi, int assert)
953{ 948{
954 struct sk_buff *skb;
955 const int ihl = ip_hdrlen(pkt); 949 const int ihl = ip_hdrlen(pkt);
950 struct sock *mroute_sk;
956 struct igmphdr *igmp; 951 struct igmphdr *igmp;
957 struct igmpmsg *msg; 952 struct igmpmsg *msg;
958 struct sock *mroute_sk; 953 struct sk_buff *skb;
959 int ret; 954 int ret;
960 955
961#ifdef CONFIG_IP_PIMSM
962 if (assert == IGMPMSG_WHOLEPKT) 956 if (assert == IGMPMSG_WHOLEPKT)
963 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); 957 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
964 else 958 else
965#endif
966 skb = alloc_skb(128, GFP_ATOMIC); 959 skb = alloc_skb(128, GFP_ATOMIC);
967 960
968 if (!skb) 961 if (!skb)
969 return -ENOBUFS; 962 return -ENOBUFS;
970 963
971#ifdef CONFIG_IP_PIMSM
972 if (assert == IGMPMSG_WHOLEPKT) { 964 if (assert == IGMPMSG_WHOLEPKT) {
973 /* Ugly, but we have no choice with this interface. 965 /* Ugly, but we have no choice with this interface.
974 * Duplicate old header, fix ihl, length etc. 966 * Duplicate old header, fix ihl, length etc.
@@ -986,28 +978,23 @@ static int ipmr_cache_report(struct mr_table *mrt,
986 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 978 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
987 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 979 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
988 sizeof(struct iphdr)); 980 sizeof(struct iphdr));
989 } else 981 } else {
990#endif 982 /* Copy the IP header */
991 { 983 skb_set_network_header(skb, skb->len);
992 984 skb_put(skb, ihl);
993 /* Copy the IP header */ 985 skb_copy_to_linear_data(skb, pkt->data, ihl);
994 986 /* Flag to the kernel this is a route add */
995 skb_set_network_header(skb, skb->len); 987 ip_hdr(skb)->protocol = 0;
996 skb_put(skb, ihl); 988 msg = (struct igmpmsg *)skb_network_header(skb);
997 skb_copy_to_linear_data(skb, pkt->data, ihl); 989 msg->im_vif = vifi;
998 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ 990 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
999 msg = (struct igmpmsg *)skb_network_header(skb); 991 /* Add our header */
1000 msg->im_vif = vifi; 992 igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
1001 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 993 igmp->type = assert;
1002 994 msg->im_msgtype = assert;
1003 /* Add our header */ 995 igmp->code = 0;
1004 996 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
1005 igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 997 skb->transport_header = skb->network_header;
1006 igmp->type =
1007 msg->im_msgtype = assert;
1008 igmp->code = 0;
1009 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
1010 skb->transport_header = skb->network_header;
1011 } 998 }
1012 999
1013 rcu_read_lock(); 1000 rcu_read_lock();
@@ -1019,7 +1006,6 @@ static int ipmr_cache_report(struct mr_table *mrt,
1019 } 1006 }
1020 1007
1021 /* Deliver to mrouted */ 1008 /* Deliver to mrouted */
1022
1023 ret = sock_queue_rcv_skb(mroute_sk, skb); 1009 ret = sock_queue_rcv_skb(mroute_sk, skb);
1024 rcu_read_unlock(); 1010 rcu_read_unlock();
1025 if (ret < 0) { 1011 if (ret < 0) {
@@ -1030,12 +1016,9 @@ static int ipmr_cache_report(struct mr_table *mrt,
1030 return ret; 1016 return ret;
1031} 1017}
1032 1018
1033/* 1019/* Queue a packet for resolution. It gets locked cache entry! */
1034 * Queue a packet for resolution. It gets locked cache entry! 1020static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
1035 */ 1021 struct sk_buff *skb)
1036
1037static int
1038ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1039{ 1022{
1040 bool found = false; 1023 bool found = false;
1041 int err; 1024 int err;
@@ -1053,7 +1036,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1053 1036
1054 if (!found) { 1037 if (!found) {
1055 /* Create a new entry if allowable */ 1038 /* Create a new entry if allowable */
1056
1057 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 1039 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1058 (c = ipmr_cache_alloc_unres()) == NULL) { 1040 (c = ipmr_cache_alloc_unres()) == NULL) {
1059 spin_unlock_bh(&mfc_unres_lock); 1041 spin_unlock_bh(&mfc_unres_lock);
@@ -1063,13 +1045,11 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1063 } 1045 }
1064 1046
1065 /* Fill in the new cache entry */ 1047 /* Fill in the new cache entry */
1066
1067 c->mfc_parent = -1; 1048 c->mfc_parent = -1;
1068 c->mfc_origin = iph->saddr; 1049 c->mfc_origin = iph->saddr;
1069 c->mfc_mcastgrp = iph->daddr; 1050 c->mfc_mcastgrp = iph->daddr;
1070 1051
1071 /* Reflect first query at mrouted. */ 1052 /* Reflect first query at mrouted. */
1072
1073 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 1053 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
1074 if (err < 0) { 1054 if (err < 0) {
1075 /* If the report failed throw the cache entry 1055 /* If the report failed throw the cache entry
@@ -1091,7 +1071,6 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1091 } 1071 }
1092 1072
1093 /* See if we can append the packet */ 1073 /* See if we can append the packet */
1094
1095 if (c->mfc_un.unres.unresolved.qlen > 3) { 1074 if (c->mfc_un.unres.unresolved.qlen > 3) {
1096 kfree_skb(skb); 1075 kfree_skb(skb);
1097 err = -ENOBUFS; 1076 err = -ENOBUFS;
@@ -1104,9 +1083,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1104 return err; 1083 return err;
1105} 1084}
1106 1085
1107/* 1086/* MFC cache manipulation by user space mroute daemon */
1108 * MFC cache manipulation by user space mroute daemon
1109 */
1110 1087
1111static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) 1088static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
1112{ 1089{
@@ -1177,9 +1154,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1177 1154
1178 list_add_rcu(&c->list, &mrt->mfc_cache_array[line]); 1155 list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
1179 1156
1180 /* 1157 /* Check to see if we resolved a queued list. If so we
1181 * Check to see if we resolved a queued list. If so we 1158 * need to send on the frames and tidy up.
1182 * need to send on the frames and tidy up.
1183 */ 1159 */
1184 found = false; 1160 found = false;
1185 spin_lock_bh(&mfc_unres_lock); 1161 spin_lock_bh(&mfc_unres_lock);
@@ -1204,29 +1180,25 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1204 return 0; 1180 return 0;
1205} 1181}
1206 1182
1207/* 1183/* Close the multicast socket, and clear the vif tables etc */
1208 * Close the multicast socket, and clear the vif tables etc 1184static void mroute_clean_tables(struct mr_table *mrt, bool all)
1209 */
1210
1211static void mroute_clean_tables(struct mr_table *mrt)
1212{ 1185{
1213 int i; 1186 int i;
1214 LIST_HEAD(list); 1187 LIST_HEAD(list);
1215 struct mfc_cache *c, *next; 1188 struct mfc_cache *c, *next;
1216 1189
1217 /* Shut down all active vif entries */ 1190 /* Shut down all active vif entries */
1218
1219 for (i = 0; i < mrt->maxvif; i++) { 1191 for (i = 0; i < mrt->maxvif; i++) {
1220 if (!(mrt->vif_table[i].flags & VIFF_STATIC)) 1192 if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1221 vif_delete(mrt, i, 0, &list); 1193 continue;
1194 vif_delete(mrt, i, 0, &list);
1222 } 1195 }
1223 unregister_netdevice_many(&list); 1196 unregister_netdevice_many(&list);
1224 1197
1225 /* Wipe the cache */ 1198 /* Wipe the cache */
1226
1227 for (i = 0; i < MFC_LINES; i++) { 1199 for (i = 0; i < MFC_LINES; i++) {
1228 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { 1200 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1229 if (c->mfc_flags & MFC_STATIC) 1201 if (!all && (c->mfc_flags & MFC_STATIC))
1230 continue; 1202 continue;
1231 list_del_rcu(&c->list); 1203 list_del_rcu(&c->list);
1232 mroute_netlink_event(mrt, c, RTM_DELROUTE); 1204 mroute_netlink_event(mrt, c, RTM_DELROUTE);
@@ -1261,50 +1233,58 @@ static void mrtsock_destruct(struct sock *sk)
1261 NETCONFA_IFINDEX_ALL, 1233 NETCONFA_IFINDEX_ALL,
1262 net->ipv4.devconf_all); 1234 net->ipv4.devconf_all);
1263 RCU_INIT_POINTER(mrt->mroute_sk, NULL); 1235 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1264 mroute_clean_tables(mrt); 1236 mroute_clean_tables(mrt, false);
1265 } 1237 }
1266 } 1238 }
1267 rtnl_unlock(); 1239 rtnl_unlock();
1268} 1240}
1269 1241
1270/* 1242/* Socket options and virtual interface manipulation. The whole
1271 * Socket options and virtual interface manipulation. The whole 1243 * virtual interface system is a complete heap, but unfortunately
1272 * virtual interface system is a complete heap, but unfortunately 1244 * that's how BSD mrouted happens to think. Maybe one day with a proper
1273 * that's how BSD mrouted happens to think. Maybe one day with a proper 1245 * MOSPF/PIM router set up we can clean this up.
1274 * MOSPF/PIM router set up we can clean this up.
1275 */ 1246 */
1276 1247
1277int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) 1248int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
1249 unsigned int optlen)
1278{ 1250{
1279 int ret, parent = 0;
1280 struct vifctl vif;
1281 struct mfcctl mfc;
1282 struct net *net = sock_net(sk); 1251 struct net *net = sock_net(sk);
1252 int val, ret = 0, parent = 0;
1283 struct mr_table *mrt; 1253 struct mr_table *mrt;
1254 struct vifctl vif;
1255 struct mfcctl mfc;
1256 u32 uval;
1284 1257
1258 /* There's one exception to the lock - MRT_DONE which needs to unlock */
1259 rtnl_lock();
1285 if (sk->sk_type != SOCK_RAW || 1260 if (sk->sk_type != SOCK_RAW ||
1286 inet_sk(sk)->inet_num != IPPROTO_IGMP) 1261 inet_sk(sk)->inet_num != IPPROTO_IGMP) {
1287 return -EOPNOTSUPP; 1262 ret = -EOPNOTSUPP;
1263 goto out_unlock;
1264 }
1288 1265
1289 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); 1266 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1290 if (!mrt) 1267 if (!mrt) {
1291 return -ENOENT; 1268 ret = -ENOENT;
1292 1269 goto out_unlock;
1270 }
1293 if (optname != MRT_INIT) { 1271 if (optname != MRT_INIT) {
1294 if (sk != rcu_access_pointer(mrt->mroute_sk) && 1272 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1295 !ns_capable(net->user_ns, CAP_NET_ADMIN)) 1273 !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
1296 return -EACCES; 1274 ret = -EACCES;
1275 goto out_unlock;
1276 }
1297 } 1277 }
1298 1278
1299 switch (optname) { 1279 switch (optname) {
1300 case MRT_INIT: 1280 case MRT_INIT:
1301 if (optlen != sizeof(int)) 1281 if (optlen != sizeof(int)) {
1302 return -EINVAL; 1282 ret = -EINVAL;
1303 1283 break;
1304 rtnl_lock(); 1284 }
1305 if (rtnl_dereference(mrt->mroute_sk)) { 1285 if (rtnl_dereference(mrt->mroute_sk)) {
1306 rtnl_unlock(); 1286 ret = -EADDRINUSE;
1307 return -EADDRINUSE; 1287 break;
1308 } 1288 }
1309 1289
1310 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1290 ret = ip_ra_control(sk, 1, mrtsock_destruct);
@@ -1315,129 +1295,133 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1315 NETCONFA_IFINDEX_ALL, 1295 NETCONFA_IFINDEX_ALL,
1316 net->ipv4.devconf_all); 1296 net->ipv4.devconf_all);
1317 } 1297 }
1318 rtnl_unlock(); 1298 break;
1319 return ret;
1320 case MRT_DONE: 1299 case MRT_DONE:
1321 if (sk != rcu_access_pointer(mrt->mroute_sk)) 1300 if (sk != rcu_access_pointer(mrt->mroute_sk)) {
1322 return -EACCES; 1301 ret = -EACCES;
1323 return ip_ra_control(sk, 0, NULL); 1302 } else {
1303 /* We need to unlock here because mrtsock_destruct takes
1304 * care of rtnl itself and we can't change that due to
1305 * the IP_ROUTER_ALERT setsockopt which runs without it.
1306 */
1307 rtnl_unlock();
1308 ret = ip_ra_control(sk, 0, NULL);
1309 goto out;
1310 }
1311 break;
1324 case MRT_ADD_VIF: 1312 case MRT_ADD_VIF:
1325 case MRT_DEL_VIF: 1313 case MRT_DEL_VIF:
1326 if (optlen != sizeof(vif)) 1314 if (optlen != sizeof(vif)) {
1327 return -EINVAL; 1315 ret = -EINVAL;
1328 if (copy_from_user(&vif, optval, sizeof(vif))) 1316 break;
1329 return -EFAULT; 1317 }
1330 if (vif.vifc_vifi >= MAXVIFS) 1318 if (copy_from_user(&vif, optval, sizeof(vif))) {
1331 return -ENFILE; 1319 ret = -EFAULT;
1332 rtnl_lock(); 1320 break;
1321 }
1322 if (vif.vifc_vifi >= MAXVIFS) {
1323 ret = -ENFILE;
1324 break;
1325 }
1333 if (optname == MRT_ADD_VIF) { 1326 if (optname == MRT_ADD_VIF) {
1334 ret = vif_add(net, mrt, &vif, 1327 ret = vif_add(net, mrt, &vif,
1335 sk == rtnl_dereference(mrt->mroute_sk)); 1328 sk == rtnl_dereference(mrt->mroute_sk));
1336 } else { 1329 } else {
1337 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1330 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1338 } 1331 }
1339 rtnl_unlock(); 1332 break;
1340 return ret; 1333 /* Manipulate the forwarding caches. These live
1341 1334 * in a sort of kernel/user symbiosis.
1342 /* 1335 */
1343 * Manipulate the forwarding caches. These live
1344 * in a sort of kernel/user symbiosis.
1345 */
1346 case MRT_ADD_MFC: 1336 case MRT_ADD_MFC:
1347 case MRT_DEL_MFC: 1337 case MRT_DEL_MFC:
1348 parent = -1; 1338 parent = -1;
1349 case MRT_ADD_MFC_PROXY: 1339 case MRT_ADD_MFC_PROXY:
1350 case MRT_DEL_MFC_PROXY: 1340 case MRT_DEL_MFC_PROXY:
1351 if (optlen != sizeof(mfc)) 1341 if (optlen != sizeof(mfc)) {
1352 return -EINVAL; 1342 ret = -EINVAL;
1353 if (copy_from_user(&mfc, optval, sizeof(mfc))) 1343 break;
1354 return -EFAULT; 1344 }
1345 if (copy_from_user(&mfc, optval, sizeof(mfc))) {
1346 ret = -EFAULT;
1347 break;
1348 }
1355 if (parent == 0) 1349 if (parent == 0)
1356 parent = mfc.mfcc_parent; 1350 parent = mfc.mfcc_parent;
1357 rtnl_lock();
1358 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) 1351 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
1359 ret = ipmr_mfc_delete(mrt, &mfc, parent); 1352 ret = ipmr_mfc_delete(mrt, &mfc, parent);
1360 else 1353 else
1361 ret = ipmr_mfc_add(net, mrt, &mfc, 1354 ret = ipmr_mfc_add(net, mrt, &mfc,
1362 sk == rtnl_dereference(mrt->mroute_sk), 1355 sk == rtnl_dereference(mrt->mroute_sk),
1363 parent); 1356 parent);
1364 rtnl_unlock(); 1357 break;
1365 return ret; 1358 /* Control PIM assert. */
1366 /*
1367 * Control PIM assert.
1368 */
1369 case MRT_ASSERT: 1359 case MRT_ASSERT:
1370 { 1360 if (optlen != sizeof(val)) {
1371 int v; 1361 ret = -EINVAL;
1372 if (optlen != sizeof(v)) 1362 break;
1373 return -EINVAL; 1363 }
1374 if (get_user(v, (int __user *)optval)) 1364 if (get_user(val, (int __user *)optval)) {
1375 return -EFAULT; 1365 ret = -EFAULT;
1376 mrt->mroute_do_assert = v; 1366 break;
1377 return 0; 1367 }
1378 } 1368 mrt->mroute_do_assert = val;
1379#ifdef CONFIG_IP_PIMSM 1369 break;
1380 case MRT_PIM: 1370 case MRT_PIM:
1381 { 1371 if (!ipmr_pimsm_enabled()) {
1382 int v; 1372 ret = -ENOPROTOOPT;
1383 1373 break;
1384 if (optlen != sizeof(v)) 1374 }
1385 return -EINVAL; 1375 if (optlen != sizeof(val)) {
1386 if (get_user(v, (int __user *)optval)) 1376 ret = -EINVAL;
1387 return -EFAULT; 1377 break;
1388 v = !!v; 1378 }
1379 if (get_user(val, (int __user *)optval)) {
1380 ret = -EFAULT;
1381 break;
1382 }
1389 1383
1390 rtnl_lock(); 1384 val = !!val;
1391 ret = 0; 1385 if (val != mrt->mroute_do_pim) {
1392 if (v != mrt->mroute_do_pim) { 1386 mrt->mroute_do_pim = val;
1393 mrt->mroute_do_pim = v; 1387 mrt->mroute_do_assert = val;
1394 mrt->mroute_do_assert = v;
1395 } 1388 }
1396 rtnl_unlock(); 1389 break;
1397 return ret;
1398 }
1399#endif
1400#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1401 case MRT_TABLE: 1390 case MRT_TABLE:
1402 { 1391 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) {
1403 u32 v; 1392 ret = -ENOPROTOOPT;
1404 1393 break;
1405 if (optlen != sizeof(u32)) 1394 }
1406 return -EINVAL; 1395 if (optlen != sizeof(uval)) {
1407 if (get_user(v, (u32 __user *)optval)) 1396 ret = -EINVAL;
1408 return -EFAULT; 1397 break;
1409 1398 }
1410 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ 1399 if (get_user(uval, (u32 __user *)optval)) {
1411 if (v != RT_TABLE_DEFAULT && v >= 1000000000) 1400 ret = -EFAULT;
1412 return -EINVAL; 1401 break;
1402 }
1413 1403
1414 rtnl_lock();
1415 ret = 0;
1416 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1404 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1417 ret = -EBUSY; 1405 ret = -EBUSY;
1418 } else { 1406 } else {
1419 if (!ipmr_new_table(net, v)) 1407 mrt = ipmr_new_table(net, uval);
1420 ret = -ENOMEM; 1408 if (IS_ERR(mrt))
1409 ret = PTR_ERR(mrt);
1421 else 1410 else
1422 raw_sk(sk)->ipmr_table = v; 1411 raw_sk(sk)->ipmr_table = uval;
1423 } 1412 }
1424 rtnl_unlock(); 1413 break;
1425 return ret; 1414 /* Spurious command, or MRT_VERSION which you cannot set. */
1426 }
1427#endif
1428 /*
1429 * Spurious command, or MRT_VERSION which you cannot
1430 * set.
1431 */
1432 default: 1415 default:
1433 return -ENOPROTOOPT; 1416 ret = -ENOPROTOOPT;
1434 } 1417 }
1418out_unlock:
1419 rtnl_unlock();
1420out:
1421 return ret;
1435} 1422}
1436 1423
1437/* 1424/* Getsock opt support for the multicast routing system. */
1438 * Getsock opt support for the multicast routing system.
1439 */
1440
1441int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) 1425int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1442{ 1426{
1443 int olr; 1427 int olr;
@@ -1453,39 +1437,35 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1453 if (!mrt) 1437 if (!mrt)
1454 return -ENOENT; 1438 return -ENOENT;
1455 1439
1456 if (optname != MRT_VERSION && 1440 switch (optname) {
1457#ifdef CONFIG_IP_PIMSM 1441 case MRT_VERSION:
1458 optname != MRT_PIM && 1442 val = 0x0305;
1459#endif 1443 break;
1460 optname != MRT_ASSERT) 1444 case MRT_PIM:
1445 if (!ipmr_pimsm_enabled())
1446 return -ENOPROTOOPT;
1447 val = mrt->mroute_do_pim;
1448 break;
1449 case MRT_ASSERT:
1450 val = mrt->mroute_do_assert;
1451 break;
1452 default:
1461 return -ENOPROTOOPT; 1453 return -ENOPROTOOPT;
1454 }
1462 1455
1463 if (get_user(olr, optlen)) 1456 if (get_user(olr, optlen))
1464 return -EFAULT; 1457 return -EFAULT;
1465
1466 olr = min_t(unsigned int, olr, sizeof(int)); 1458 olr = min_t(unsigned int, olr, sizeof(int));
1467 if (olr < 0) 1459 if (olr < 0)
1468 return -EINVAL; 1460 return -EINVAL;
1469
1470 if (put_user(olr, optlen)) 1461 if (put_user(olr, optlen))
1471 return -EFAULT; 1462 return -EFAULT;
1472 if (optname == MRT_VERSION)
1473 val = 0x0305;
1474#ifdef CONFIG_IP_PIMSM
1475 else if (optname == MRT_PIM)
1476 val = mrt->mroute_do_pim;
1477#endif
1478 else
1479 val = mrt->mroute_do_assert;
1480 if (copy_to_user(optval, &val, olr)) 1463 if (copy_to_user(optval, &val, olr))
1481 return -EFAULT; 1464 return -EFAULT;
1482 return 0; 1465 return 0;
1483} 1466}
1484 1467
1485/* 1468/* The IP multicast ioctl support routines. */
1486 * The IP multicast ioctl support routines.
1487 */
1488
1489int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) 1469int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1490{ 1470{
1491 struct sioc_sg_req sr; 1471 struct sioc_sg_req sr;
@@ -1618,7 +1598,6 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1618} 1598}
1619#endif 1599#endif
1620 1600
1621
1622static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1601static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1623{ 1602{
1624 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1603 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -1640,17 +1619,14 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1640 return NOTIFY_DONE; 1619 return NOTIFY_DONE;
1641} 1620}
1642 1621
1643
1644static struct notifier_block ip_mr_notifier = { 1622static struct notifier_block ip_mr_notifier = {
1645 .notifier_call = ipmr_device_event, 1623 .notifier_call = ipmr_device_event,
1646}; 1624};
1647 1625
1648/* 1626/* Encapsulate a packet by attaching a valid IPIP header to it.
1649 * Encapsulate a packet by attaching a valid IPIP header to it. 1627 * This avoids tunnel drivers and other mess and gives us the speed so
1650 * This avoids tunnel drivers and other mess and gives us the speed so 1628 * important for multicast video.
1651 * important for multicast video.
1652 */ 1629 */
1653
1654static void ip_encap(struct net *net, struct sk_buff *skb, 1630static void ip_encap(struct net *net, struct sk_buff *skb,
1655 __be32 saddr, __be32 daddr) 1631 __be32 saddr, __be32 daddr)
1656{ 1632{
@@ -1692,9 +1668,7 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
1692 return dst_output(net, sk, skb); 1668 return dst_output(net, sk, skb);
1693} 1669}
1694 1670
1695/* 1671/* Processing handlers for ipmr_forward */
1696 * Processing handlers for ipmr_forward
1697 */
1698 1672
1699static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, 1673static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1700 struct sk_buff *skb, struct mfc_cache *c, int vifi) 1674 struct sk_buff *skb, struct mfc_cache *c, int vifi)
@@ -1709,7 +1683,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1709 if (!vif->dev) 1683 if (!vif->dev)
1710 goto out_free; 1684 goto out_free;
1711 1685
1712#ifdef CONFIG_IP_PIMSM
1713 if (vif->flags & VIFF_REGISTER) { 1686 if (vif->flags & VIFF_REGISTER) {
1714 vif->pkt_out++; 1687 vif->pkt_out++;
1715 vif->bytes_out += skb->len; 1688 vif->bytes_out += skb->len;
@@ -1718,7 +1691,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1718 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); 1691 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1719 goto out_free; 1692 goto out_free;
1720 } 1693 }
1721#endif
1722 1694
1723 if (vif->flags & VIFF_TUNNEL) { 1695 if (vif->flags & VIFF_TUNNEL) {
1724 rt = ip_route_output_ports(net, &fl4, NULL, 1696 rt = ip_route_output_ports(net, &fl4, NULL,
@@ -1745,7 +1717,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1745 * allow to send ICMP, so that packets will disappear 1717 * allow to send ICMP, so that packets will disappear
1746 * to blackhole. 1718 * to blackhole.
1747 */ 1719 */
1748
1749 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); 1720 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
1750 ip_rt_put(rt); 1721 ip_rt_put(rt);
1751 goto out_free; 1722 goto out_free;
@@ -1777,8 +1748,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1777 1748
1778 IPCB(skb)->flags |= IPSKB_FORWARDED; 1749 IPCB(skb)->flags |= IPSKB_FORWARDED;
1779 1750
1780 /* 1751 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1781 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1782 * not only before forwarding, but after forwarding on all output 1752 * not only before forwarding, but after forwarding on all output
1783 * interfaces. It is clear, if mrouter runs a multicasting 1753 * interfaces. It is clear, if mrouter runs a multicasting
1784 * program, it should receive packets not depending to what interface 1754 * program, it should receive packets not depending to what interface
@@ -1809,7 +1779,6 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1809} 1779}
1810 1780
1811/* "local" means that we should preserve one skb (for local delivery) */ 1781/* "local" means that we should preserve one skb (for local delivery) */
1812
1813static void ip_mr_forward(struct net *net, struct mr_table *mrt, 1782static void ip_mr_forward(struct net *net, struct mr_table *mrt,
1814 struct sk_buff *skb, struct mfc_cache *cache, 1783 struct sk_buff *skb, struct mfc_cache *cache,
1815 int local) 1784 int local)
@@ -1834,9 +1803,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
1834 goto forward; 1803 goto forward;
1835 } 1804 }
1836 1805
1837 /* 1806 /* Wrong interface: drop packet and (maybe) send PIM assert. */
1838 * Wrong interface: drop packet and (maybe) send PIM assert.
1839 */
1840 if (mrt->vif_table[vif].dev != skb->dev) { 1807 if (mrt->vif_table[vif].dev != skb->dev) {
1841 if (rt_is_output_route(skb_rtable(skb))) { 1808 if (rt_is_output_route(skb_rtable(skb))) {
1842 /* It is our own packet, looped back. 1809 /* It is our own packet, looped back.
@@ -1875,9 +1842,7 @@ forward:
1875 mrt->vif_table[vif].pkt_in++; 1842 mrt->vif_table[vif].pkt_in++;
1876 mrt->vif_table[vif].bytes_in += skb->len; 1843 mrt->vif_table[vif].bytes_in += skb->len;
1877 1844
1878 /* 1845 /* Forward the frame */
1879 * Forward the frame
1880 */
1881 if (cache->mfc_origin == htonl(INADDR_ANY) && 1846 if (cache->mfc_origin == htonl(INADDR_ANY) &&
1882 cache->mfc_mcastgrp == htonl(INADDR_ANY)) { 1847 cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
1883 if (true_vifi >= 0 && 1848 if (true_vifi >= 0 &&
@@ -1951,11 +1916,9 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
1951 return mrt; 1916 return mrt;
1952} 1917}
1953 1918
1954/* 1919/* Multicast packets for forwarding arrive here
1955 * Multicast packets for forwarding arrive here 1920 * Called with rcu_read_lock();
1956 * Called with rcu_read_lock();
1957 */ 1921 */
1958
1959int ip_mr_input(struct sk_buff *skb) 1922int ip_mr_input(struct sk_buff *skb)
1960{ 1923{
1961 struct mfc_cache *cache; 1924 struct mfc_cache *cache;
@@ -2006,9 +1969,7 @@ int ip_mr_input(struct sk_buff *skb)
2006 vif); 1969 vif);
2007 } 1970 }
2008 1971
2009 /* 1972 /* No usable cache entry */
2010 * No usable cache entry
2011 */
2012 if (!cache) { 1973 if (!cache) {
2013 int vif; 1974 int vif;
2014 1975
@@ -2049,53 +2010,8 @@ dont_forward:
2049 return 0; 2010 return 0;
2050} 2011}
2051 2012
2052#ifdef CONFIG_IP_PIMSM
2053/* called with rcu_read_lock() */
2054static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
2055 unsigned int pimlen)
2056{
2057 struct net_device *reg_dev = NULL;
2058 struct iphdr *encap;
2059
2060 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
2061 /*
2062 * Check that:
2063 * a. packet is really sent to a multicast group
2064 * b. packet is not a NULL-REGISTER
2065 * c. packet is not truncated
2066 */
2067 if (!ipv4_is_multicast(encap->daddr) ||
2068 encap->tot_len == 0 ||
2069 ntohs(encap->tot_len) + pimlen > skb->len)
2070 return 1;
2071
2072 read_lock(&mrt_lock);
2073 if (mrt->mroute_reg_vif_num >= 0)
2074 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
2075 read_unlock(&mrt_lock);
2076
2077 if (!reg_dev)
2078 return 1;
2079
2080 skb->mac_header = skb->network_header;
2081 skb_pull(skb, (u8 *)encap - skb->data);
2082 skb_reset_network_header(skb);
2083 skb->protocol = htons(ETH_P_IP);
2084 skb->ip_summed = CHECKSUM_NONE;
2085
2086 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
2087
2088 netif_rx(skb);
2089
2090 return NET_RX_SUCCESS;
2091}
2092#endif
2093
2094#ifdef CONFIG_IP_PIMSM_V1 2013#ifdef CONFIG_IP_PIMSM_V1
2095/* 2014/* Handle IGMP messages of PIMv1 */
2096 * Handle IGMP messages of PIMv1
2097 */
2098
2099int pim_rcv_v1(struct sk_buff *skb) 2015int pim_rcv_v1(struct sk_buff *skb)
2100{ 2016{
2101 struct igmphdr *pim; 2017 struct igmphdr *pim;
@@ -2256,8 +2172,6 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
2256 } 2172 }
2257 2173
2258 read_lock(&mrt_lock); 2174 read_lock(&mrt_lock);
2259 if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
2260 cache->mfc_flags |= MFC_NOTIFY;
2261 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 2175 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
2262 read_unlock(&mrt_lock); 2176 read_unlock(&mrt_lock);
2263 rcu_read_unlock(); 2177 rcu_read_unlock();
@@ -2419,10 +2333,133 @@ done:
2419 return skb->len; 2333 return skb->len;
2420} 2334}
2421 2335
2336static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
2337 [RTA_SRC] = { .type = NLA_U32 },
2338 [RTA_DST] = { .type = NLA_U32 },
2339 [RTA_IIF] = { .type = NLA_U32 },
2340 [RTA_TABLE] = { .type = NLA_U32 },
2341 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2342};
2343
2344static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol)
2345{
2346 switch (rtm_protocol) {
2347 case RTPROT_STATIC:
2348 case RTPROT_MROUTED:
2349 return true;
2350 }
2351 return false;
2352}
2353
2354static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc)
2355{
2356 struct rtnexthop *rtnh = nla_data(nla);
2357 int remaining = nla_len(nla), vifi = 0;
2358
2359 while (rtnh_ok(rtnh, remaining)) {
2360 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops;
2361 if (++vifi == MAXVIFS)
2362 break;
2363 rtnh = rtnh_next(rtnh, &remaining);
2364 }
2365
2366 return remaining > 0 ? -EINVAL : vifi;
2367}
2368
2369/* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */
2370static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
2371 struct mfcctl *mfcc, int *mrtsock,
2372 struct mr_table **mrtret)
2373{
2374 struct net_device *dev = NULL;
2375 u32 tblid = RT_TABLE_DEFAULT;
2376 struct mr_table *mrt;
2377 struct nlattr *attr;
2378 struct rtmsg *rtm;
2379 int ret, rem;
2380
2381 ret = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy);
2382 if (ret < 0)
2383 goto out;
2384 rtm = nlmsg_data(nlh);
2385
2386 ret = -EINVAL;
2387 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 ||
2388 rtm->rtm_type != RTN_MULTICAST ||
2389 rtm->rtm_scope != RT_SCOPE_UNIVERSE ||
2390 !ipmr_rtm_validate_proto(rtm->rtm_protocol))
2391 goto out;
2392
2393 memset(mfcc, 0, sizeof(*mfcc));
2394 mfcc->mfcc_parent = -1;
2395 ret = 0;
2396 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) {
2397 switch (nla_type(attr)) {
2398 case RTA_SRC:
2399 mfcc->mfcc_origin.s_addr = nla_get_be32(attr);
2400 break;
2401 case RTA_DST:
2402 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr);
2403 break;
2404 case RTA_IIF:
2405 dev = __dev_get_by_index(net, nla_get_u32(attr));
2406 if (!dev) {
2407 ret = -ENODEV;
2408 goto out;
2409 }
2410 break;
2411 case RTA_MULTIPATH:
2412 if (ipmr_nla_get_ttls(attr, mfcc) < 0) {
2413 ret = -EINVAL;
2414 goto out;
2415 }
2416 break;
2417 case RTA_PREFSRC:
2418 ret = 1;
2419 break;
2420 case RTA_TABLE:
2421 tblid = nla_get_u32(attr);
2422 break;
2423 }
2424 }
2425 mrt = ipmr_get_table(net, tblid);
2426 if (!mrt) {
2427 ret = -ENOENT;
2428 goto out;
2429 }
2430 *mrtret = mrt;
2431 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0;
2432 if (dev)
2433 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev);
2434
2435out:
2436 return ret;
2437}
2438
2439/* takes care of both newroute and delroute */
2440static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh)
2441{
2442 struct net *net = sock_net(skb->sk);
2443 int ret, mrtsock, parent;
2444 struct mr_table *tbl;
2445 struct mfcctl mfcc;
2446
2447 mrtsock = 0;
2448 tbl = NULL;
2449 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl);
2450 if (ret < 0)
2451 return ret;
2452
2453 parent = ret ? mfcc.mfcc_parent : -1;
2454 if (nlh->nlmsg_type == RTM_NEWROUTE)
2455 return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent);
2456 else
2457 return ipmr_mfc_delete(tbl, &mfcc, parent);
2458}
2459
2422#ifdef CONFIG_PROC_FS 2460#ifdef CONFIG_PROC_FS
2423/* 2461/* The /proc interfaces to multicast routing :
2424 * The /proc interfaces to multicast routing : 2462 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
2425 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
2426 */ 2463 */
2427struct ipmr_vif_iter { 2464struct ipmr_vif_iter {
2428 struct seq_net_private p; 2465 struct seq_net_private p;
@@ -2706,10 +2743,7 @@ static const struct net_protocol pim_protocol = {
2706}; 2743};
2707#endif 2744#endif
2708 2745
2709 2746/* Setup for IP multicast routing */
2710/*
2711 * Setup for IP multicast routing
2712 */
2713static int __net_init ipmr_net_init(struct net *net) 2747static int __net_init ipmr_net_init(struct net *net)
2714{ 2748{
2715 int err; 2749 int err;
@@ -2759,8 +2793,6 @@ int __init ip_mr_init(void)
2759 sizeof(struct mfc_cache), 2793 sizeof(struct mfc_cache),
2760 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 2794 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
2761 NULL); 2795 NULL);
2762 if (!mrt_cachep)
2763 return -ENOMEM;
2764 2796
2765 err = register_pernet_subsys(&ipmr_net_ops); 2797 err = register_pernet_subsys(&ipmr_net_ops);
2766 if (err) 2798 if (err)
@@ -2778,6 +2810,10 @@ int __init ip_mr_init(void)
2778#endif 2810#endif
2779 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, 2811 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
2780 NULL, ipmr_rtm_dumproute, NULL); 2812 NULL, ipmr_rtm_dumproute, NULL);
2813 rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
2814 ipmr_rtm_route, NULL, NULL);
2815 rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
2816 ipmr_rtm_route, NULL, NULL);
2781 return 0; 2817 return 0;
2782 2818
2783#ifdef CONFIG_IP_PIMSM_V2 2819#ifdef CONFIG_IP_PIMSM_V2