diff options
author | Madhu Challa <challa@noironetworks.com> | 2015-02-25 12:58:35 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-02-27 16:25:25 -0500 |
commit | 93a714d6b53d87872e552dbb273544bdeaaf6e12 (patch) | |
tree | 3cc79521cb6dbcd600384507329ecef93ae618ab | |
parent | 46a4dee074b58c4256dbf6c2dbf199c372f85b04 (diff) |
multicast: Extend ip address command to enable multicast group join/leave on
Joining multicast group on ethernet level via "ip maddr" command would
not work if we have an Ethernet switch that does igmp snooping since
the switch would not replicate multicast packets on ports that did not
have IGMP reports for the multicast addresses.
Linux vxlan interfaces created via "ip link add vxlan" have the group option
that enables then to do the required join.
By extending ip address command with option "autojoin" we can get similar
functionality for openvswitch vxlan interfaces as well as other tunneling
mechanisms that need to receive multicast traffic. The kernel code is
structured similar to how the vxlan driver does a group join / leave.
example:
ip address add 224.1.1.10/24 dev eth5 autojoin
ip address del 224.1.1.10/24 dev eth5
Signed-off-by: Madhu Challa <challa@noironetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/netns/ipv4.h | 1 | ||||
-rw-r--r-- | include/net/netns/ipv6.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/if_addr.h | 1 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 31 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 13 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 38 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 20 |
7 files changed, 98 insertions, 7 deletions
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index dbe225478adb..1b26c6c3fd7c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h | |||
@@ -49,6 +49,7 @@ struct netns_ipv4 { | |||
49 | struct sock *fibnl; | 49 | struct sock *fibnl; |
50 | 50 | ||
51 | struct sock * __percpu *icmp_sk; | 51 | struct sock * __percpu *icmp_sk; |
52 | struct sock *mc_autojoin_sk; | ||
52 | 53 | ||
53 | struct inet_peer_base *peers; | 54 | struct inet_peer_base *peers; |
54 | struct tcpm_hash_bucket *tcp_metrics_hash; | 55 | struct tcpm_hash_bucket *tcp_metrics_hash; |
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 69ae41f2098c..ca0db12cd089 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h | |||
@@ -67,6 +67,7 @@ struct netns_ipv6 { | |||
67 | struct sock *ndisc_sk; | 67 | struct sock *ndisc_sk; |
68 | struct sock *tcp_sk; | 68 | struct sock *tcp_sk; |
69 | struct sock *igmp_sk; | 69 | struct sock *igmp_sk; |
70 | struct sock *mc_autojoin_sk; | ||
70 | #ifdef CONFIG_IPV6_MROUTE | 71 | #ifdef CONFIG_IPV6_MROUTE |
71 | #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES | 72 | #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES |
72 | struct mr6_table *mrt6; | 73 | struct mr6_table *mrt6; |
diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index dea10a87dfd1..40fdfea39714 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h | |||
@@ -50,6 +50,7 @@ enum { | |||
50 | #define IFA_F_PERMANENT 0x80 | 50 | #define IFA_F_PERMANENT 0x80 |
51 | #define IFA_F_MANAGETEMPADDR 0x100 | 51 | #define IFA_F_MANAGETEMPADDR 0x100 |
52 | #define IFA_F_NOPREFIXROUTE 0x200 | 52 | #define IFA_F_NOPREFIXROUTE 0x200 |
53 | #define IFA_F_MCAUTOJOIN 0x400 | ||
53 | 54 | ||
54 | struct ifa_cacheinfo { | 55 | struct ifa_cacheinfo { |
55 | __u32 ifa_prefered; | 56 | __u32 ifa_prefered; |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 3a8985c94581..5105759e4e00 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -548,6 +548,26 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, | |||
548 | return NULL; | 548 | return NULL; |
549 | } | 549 | } |
550 | 550 | ||
551 | static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) | ||
552 | { | ||
553 | struct ip_mreqn mreq = { | ||
554 | .imr_multiaddr.s_addr = ifa->ifa_address, | ||
555 | .imr_ifindex = ifa->ifa_dev->dev->ifindex, | ||
556 | }; | ||
557 | int ret; | ||
558 | |||
559 | ASSERT_RTNL(); | ||
560 | |||
561 | lock_sock(sk); | ||
562 | if (join) | ||
563 | ret = __ip_mc_join_group(sk, &mreq); | ||
564 | else | ||
565 | ret = __ip_mc_leave_group(sk, &mreq); | ||
566 | release_sock(sk); | ||
567 | |||
568 | return ret; | ||
569 | } | ||
570 | |||
551 | static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) | 571 | static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) |
552 | { | 572 | { |
553 | struct net *net = sock_net(skb->sk); | 573 | struct net *net = sock_net(skb->sk); |
@@ -584,6 +604,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
584 | !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) | 604 | !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) |
585 | continue; | 605 | continue; |
586 | 606 | ||
607 | if (ipv4_is_multicast(ifa->ifa_address)) | ||
608 | ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); | ||
587 | __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); | 609 | __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); |
588 | return 0; | 610 | return 0; |
589 | } | 611 | } |
@@ -838,6 +860,15 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
838 | * userspace already relies on not having to provide this. | 860 | * userspace already relies on not having to provide this. |
839 | */ | 861 | */ |
840 | set_ifa_lifetime(ifa, valid_lft, prefered_lft); | 862 | set_ifa_lifetime(ifa, valid_lft, prefered_lft); |
863 | if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { | ||
864 | int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, | ||
865 | true, ifa); | ||
866 | |||
867 | if (ret < 0) { | ||
868 | inet_free_ifa(ifa); | ||
869 | return ret; | ||
870 | } | ||
871 | } | ||
841 | return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); | 872 | return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); |
842 | } else { | 873 | } else { |
843 | inet_free_ifa(ifa); | 874 | inet_free_ifa(ifa); |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 4b1172d73e03..5cb1ef4ce292 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -97,6 +97,7 @@ | |||
97 | #include <net/route.h> | 97 | #include <net/route.h> |
98 | #include <net/sock.h> | 98 | #include <net/sock.h> |
99 | #include <net/checksum.h> | 99 | #include <net/checksum.h> |
100 | #include <net/inet_common.h> | ||
100 | #include <linux/netfilter_ipv4.h> | 101 | #include <linux/netfilter_ipv4.h> |
101 | #ifdef CONFIG_IP_MROUTE | 102 | #ifdef CONFIG_IP_MROUTE |
102 | #include <linux/mroute.h> | 103 | #include <linux/mroute.h> |
@@ -2740,6 +2741,7 @@ static const struct file_operations igmp_mcf_seq_fops = { | |||
2740 | static int __net_init igmp_net_init(struct net *net) | 2741 | static int __net_init igmp_net_init(struct net *net) |
2741 | { | 2742 | { |
2742 | struct proc_dir_entry *pde; | 2743 | struct proc_dir_entry *pde; |
2744 | int err; | ||
2743 | 2745 | ||
2744 | pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops); | 2746 | pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops); |
2745 | if (!pde) | 2747 | if (!pde) |
@@ -2748,8 +2750,18 @@ static int __net_init igmp_net_init(struct net *net) | |||
2748 | &igmp_mcf_seq_fops); | 2750 | &igmp_mcf_seq_fops); |
2749 | if (!pde) | 2751 | if (!pde) |
2750 | goto out_mcfilter; | 2752 | goto out_mcfilter; |
2753 | err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET, | ||
2754 | SOCK_DGRAM, 0, net); | ||
2755 | if (err < 0) { | ||
2756 | pr_err("Failed to initialize the IGMP autojoin socket (err %d)\n", | ||
2757 | err); | ||
2758 | goto out_sock; | ||
2759 | } | ||
2760 | |||
2751 | return 0; | 2761 | return 0; |
2752 | 2762 | ||
2763 | out_sock: | ||
2764 | remove_proc_entry("mcfilter", net->proc_net); | ||
2753 | out_mcfilter: | 2765 | out_mcfilter: |
2754 | remove_proc_entry("igmp", net->proc_net); | 2766 | remove_proc_entry("igmp", net->proc_net); |
2755 | out_igmp: | 2767 | out_igmp: |
@@ -2760,6 +2772,7 @@ static void __net_exit igmp_net_exit(struct net *net) | |||
2760 | { | 2772 | { |
2761 | remove_proc_entry("mcfilter", net->proc_net); | 2773 | remove_proc_entry("mcfilter", net->proc_net); |
2762 | remove_proc_entry("igmp", net->proc_net); | 2774 | remove_proc_entry("igmp", net->proc_net); |
2775 | inet_ctl_sock_destroy(net->ipv4.mc_autojoin_sk); | ||
2763 | } | 2776 | } |
2764 | 2777 | ||
2765 | static struct pernet_operations igmp_net_ops = { | 2778 | static struct pernet_operations igmp_net_ops = { |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 98e4a63d72bb..783bccfcc060 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -2464,6 +2464,23 @@ err_exit: | |||
2464 | return err; | 2464 | return err; |
2465 | } | 2465 | } |
2466 | 2466 | ||
2467 | static int ipv6_mc_config(struct sock *sk, bool join, | ||
2468 | const struct in6_addr *addr, int ifindex) | ||
2469 | { | ||
2470 | int ret; | ||
2471 | |||
2472 | ASSERT_RTNL(); | ||
2473 | |||
2474 | lock_sock(sk); | ||
2475 | if (join) | ||
2476 | ret = __ipv6_sock_mc_join(sk, ifindex, addr); | ||
2477 | else | ||
2478 | ret = __ipv6_sock_mc_drop(sk, ifindex, addr); | ||
2479 | release_sock(sk); | ||
2480 | |||
2481 | return ret; | ||
2482 | } | ||
2483 | |||
2467 | /* | 2484 | /* |
2468 | * Manual configuration of address on an interface | 2485 | * Manual configuration of address on an interface |
2469 | */ | 2486 | */ |
@@ -2476,10 +2493,10 @@ static int inet6_addr_add(struct net *net, int ifindex, | |||
2476 | struct inet6_ifaddr *ifp; | 2493 | struct inet6_ifaddr *ifp; |
2477 | struct inet6_dev *idev; | 2494 | struct inet6_dev *idev; |
2478 | struct net_device *dev; | 2495 | struct net_device *dev; |
2496 | unsigned long timeout; | ||
2497 | clock_t expires; | ||
2479 | int scope; | 2498 | int scope; |
2480 | u32 flags; | 2499 | u32 flags; |
2481 | clock_t expires; | ||
2482 | unsigned long timeout; | ||
2483 | 2500 | ||
2484 | ASSERT_RTNL(); | 2501 | ASSERT_RTNL(); |
2485 | 2502 | ||
@@ -2501,6 +2518,14 @@ static int inet6_addr_add(struct net *net, int ifindex, | |||
2501 | if (IS_ERR(idev)) | 2518 | if (IS_ERR(idev)) |
2502 | return PTR_ERR(idev); | 2519 | return PTR_ERR(idev); |
2503 | 2520 | ||
2521 | if (ifa_flags & IFA_F_MCAUTOJOIN) { | ||
2522 | int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk, | ||
2523 | true, pfx, ifindex); | ||
2524 | |||
2525 | if (ret < 0) | ||
2526 | return ret; | ||
2527 | } | ||
2528 | |||
2504 | scope = ipv6_addr_scope(pfx); | 2529 | scope = ipv6_addr_scope(pfx); |
2505 | 2530 | ||
2506 | timeout = addrconf_timeout_fixup(valid_lft, HZ); | 2531 | timeout = addrconf_timeout_fixup(valid_lft, HZ); |
@@ -2542,6 +2567,9 @@ static int inet6_addr_add(struct net *net, int ifindex, | |||
2542 | in6_ifa_put(ifp); | 2567 | in6_ifa_put(ifp); |
2543 | addrconf_verify_rtnl(); | 2568 | addrconf_verify_rtnl(); |
2544 | return 0; | 2569 | return 0; |
2570 | } else if (ifa_flags & IFA_F_MCAUTOJOIN) { | ||
2571 | ipv6_mc_config(net->ipv6.mc_autojoin_sk, | ||
2572 | false, pfx, ifindex); | ||
2545 | } | 2573 | } |
2546 | 2574 | ||
2547 | return PTR_ERR(ifp); | 2575 | return PTR_ERR(ifp); |
@@ -2578,6 +2606,10 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, | |||
2578 | jiffies); | 2606 | jiffies); |
2579 | ipv6_del_addr(ifp); | 2607 | ipv6_del_addr(ifp); |
2580 | addrconf_verify_rtnl(); | 2608 | addrconf_verify_rtnl(); |
2609 | if (ipv6_addr_is_multicast(pfx)) { | ||
2610 | ipv6_mc_config(net->ipv6.mc_autojoin_sk, | ||
2611 | false, pfx, dev->ifindex); | ||
2612 | } | ||
2581 | return 0; | 2613 | return 0; |
2582 | } | 2614 | } |
2583 | } | 2615 | } |
@@ -3945,7 +3977,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
3945 | 3977 | ||
3946 | /* We ignore other flags so far. */ | 3978 | /* We ignore other flags so far. */ |
3947 | ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | | 3979 | ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | |
3948 | IFA_F_NOPREFIXROUTE; | 3980 | IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN; |
3949 | 3981 | ||
3950 | ifa = ipv6_get_ifaddr(net, pfx, dev, 1); | 3982 | ifa = ipv6_get_ifaddr(net, pfx, dev, 1); |
3951 | if (ifa == NULL) { | 3983 | if (ifa == NULL) { |
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index e4955d019734..1dd1fedff9f4 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c | |||
@@ -2929,20 +2929,32 @@ static int __net_init igmp6_net_init(struct net *net) | |||
2929 | 2929 | ||
2930 | inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1; | 2930 | inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1; |
2931 | 2931 | ||
2932 | err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6, | ||
2933 | SOCK_RAW, IPPROTO_ICMPV6, net); | ||
2934 | if (err < 0) { | ||
2935 | pr_err("Failed to initialize the IGMP6 autojoin socket (err %d)\n", | ||
2936 | err); | ||
2937 | goto out_sock_create; | ||
2938 | } | ||
2939 | |||
2932 | err = igmp6_proc_init(net); | 2940 | err = igmp6_proc_init(net); |
2933 | if (err) | 2941 | if (err) |
2934 | goto out_sock_create; | 2942 | goto out_sock_create_autojoin; |
2935 | out: | 2943 | |
2936 | return err; | 2944 | return 0; |
2937 | 2945 | ||
2946 | out_sock_create_autojoin: | ||
2947 | inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk); | ||
2938 | out_sock_create: | 2948 | out_sock_create: |
2939 | inet_ctl_sock_destroy(net->ipv6.igmp_sk); | 2949 | inet_ctl_sock_destroy(net->ipv6.igmp_sk); |
2940 | goto out; | 2950 | out: |
2951 | return err; | ||
2941 | } | 2952 | } |
2942 | 2953 | ||
2943 | static void __net_exit igmp6_net_exit(struct net *net) | 2954 | static void __net_exit igmp6_net_exit(struct net *net) |
2944 | { | 2955 | { |
2945 | inet_ctl_sock_destroy(net->ipv6.igmp_sk); | 2956 | inet_ctl_sock_destroy(net->ipv6.igmp_sk); |
2957 | inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk); | ||
2946 | igmp6_proc_exit(net); | 2958 | igmp6_proc_exit(net); |
2947 | } | 2959 | } |
2948 | 2960 | ||