/* * Bridge multicast support. * * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * */ #include <linux/err.h> #include <linux/if_ether.h> #include <linux/igmp.h> #include <linux/jhash.h> #include <linux/kernel.h> #include <linux/log2.h> #include <linux/netdevice.h> #include <linux/netfilter_bridge.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/timer.h> #include <net/ip.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/mld.h> #include <net/addrconf.h> #include <net/ip6_checksum.h> #endif #include "br_private.h" #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) #if IS_ENABLED(CONFIG_IPV6) static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) { if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) return 1; return 0; } #endif static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) { if (a->proto != b->proto) return 0; switch (a->proto) { case htons(ETH_P_IP): return a->u.ip4 == b->u.ip4; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): return ipv6_addr_equal(&a->u.ip6, &b->u.ip6); #endif } return 0; } static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip) { return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1); } #if IS_ENABLED(CONFIG_IPV6) static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb, const struct in6_addr *ip) { return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1); } #endif static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, struct br_ip *ip) { switch (ip->proto) { case htons(ETH_P_IP): return __br_ip4_hash(mdb, ip->u.ip4); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): return __br_ip6_hash(mdb, &ip->u.ip6); #endif } return 0; } static struct net_bridge_mdb_entry *__br_mdb_ip_get( struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash) { struct net_bridge_mdb_entry *mp; struct hlist_node *p; hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { if (br_ip_equal(&mp->addr, dst)) return mp; } return NULL; } static struct net_bridge_mdb_entry *br_mdb_ip_get( struct net_bridge_mdb_htable *mdb, struct br_ip *dst) { if (!mdb) return NULL; return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); } static struct net_bridge_mdb_entry *br_mdb_ip4_get( struct net_bridge_mdb_htable *mdb, __be32 dst) { struct br_ip br_dst; br_dst.u.ip4 = dst; br_dst.proto = htons(ETH_P_IP); return br_mdb_ip_get(mdb, &br_dst); } #if IS_ENABLED(CONFIG_IPV6) static struct net_bridge_mdb_entry *br_mdb_ip6_get( struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst) { struct br_ip br_dst; br_dst.u.ip6 = *dst; br_dst.proto = htons(ETH_P_IPV6); return br_mdb_ip_get(mdb, &br_dst); } #endif struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb) { struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb); struct br_ip ip; if (br->multicast_disabled) return NULL; if (BR_INPUT_SKB_CB(skb)->igmp) return NULL; ip.proto = skb->protocol; switch (skb->protocol) { case htons(ETH_P_IP): ip.u.ip4 = ip_hdr(skb)->daddr; break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): ip.u.ip6 = ipv6_hdr(skb)->daddr; break; #endif default: return NULL; } return br_mdb_ip_get(mdb, &ip); } static void br_mdb_free(struct rcu_head *head) { struct net_bridge_mdb_htable *mdb = container_of(head, struct net_bridge_mdb_htable, rcu); struct net_bridge_mdb_htable *old = mdb->old; mdb->old = NULL; kfree(old->mhash); kfree(old); } static int br_mdb_copy(struct net_bridge_mdb_htable *new, struct net_bridge_mdb_htable *old, int elasticity) { struct net_bridge_mdb_entry *mp; struct hlist_node *p; int maxlen; int len; int i; for (i = 0; i < old->max; i++) hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver]) hlist_add_head(&mp->hlist[new->ver], &new->mhash[br_ip_hash(new, &mp->addr)]); if (!elasticity) return 0; maxlen = 0; for (i = 0; i < new->max; i++) { len = 0; hlist_for_each_entry(mp, p, &new->mhash[i], hlist[new->ver]) len++; if (len > maxlen) maxlen = len; } return maxlen > elasticity ? -EINVAL : 0; } static void br_multicast_free_pg(struct rcu_head *head) { struct net_bridge_port_group *p = container_of(head, struct net_bridge_port_group, rcu); kfree(p); } static void br_multicast_free_group(struct rcu_head *head) { struct net_bridge_mdb_entry *mp = container_of(head, struct net_bridge_mdb_entry, rcu); kfree(mp); } static void br_multicast_group_expired(unsigned long data) { struct net_bridge_mdb_entry *mp = (void *)data; struct net_bridge *br = mp->br; struct net_bridge_mdb_htable *mdb; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || timer_pending(&mp->timer)) goto out; mp->mglist = false; if (mp->ports) goto out; mdb = mlock_dereference(br->mdb, br); hlist_del_rcu(&mp->hlist[mdb->ver]); mdb->size--; del_timer(&mp->query_timer); call_rcu_bh(&mp->rcu, br_multicast_free_group); out: spin_unlock(&br->multicast_lock); } static void br_multicast_del_pg(struct net_bridge *br, struct net_bridge_port_group *pg) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; mdb = mlock_dereference(br->mdb, br); mp = br_mdb_ip_get(mdb, &pg->addr); if (WARN_ON(!mp)) return; for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p != pg) continue; rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); del_timer(&p->query_timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); return; } WARN_ON(1); } static void br_multicast_port_group_expired(unsigned long data) { struct net_bridge_port_group *pg = (void *)data; struct net_bridge *br = pg->port->br; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || timer_pending(&pg->timer) || hlist_unhashed(&pg->mglist)) goto out; br_multicast_del_pg(br, pg); out: spin_unlock(&br->multicast_lock); } static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max, int elasticity) { struct net_bridge_mdb_htable *old = rcu_dereference_protected(*mdbp, 1); struct net_bridge_mdb_htable *mdb; int err; mdb = kmalloc(sizeof(*mdb), GFP_ATOMIC); if (!mdb) return -ENOMEM; mdb->max = max; mdb->old = old; mdb->mhash = kzalloc(max * sizeof(*mdb->mhash), GFP_ATOMIC); if (!mdb->mhash) { kfree(mdb); return -ENOMEM; } mdb->size = old ? old->size : 0; mdb->ver = old ? old->ver ^ 1 : 0; if (!old || elasticity) get_random_bytes(&mdb->secret, sizeof(mdb->secret)); else mdb->secret = old->secret; if (!old) goto out; err = br_mdb_copy(mdb, old, elasticity); if (err) { kfree(mdb->mhash); kfree(mdb); return err; } call_rcu_bh(&mdb->rcu, br_mdb_free); out: rcu_assign_pointer(*mdbp, mdb); return 0; } static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, __be32 group) { struct sk_buff *skb; struct igmphdr *ih; struct ethhdr *eth; struct iphdr *iph; skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) + sizeof(*ih) + 4); if (!skb) goto out; skb->protocol = htons(ETH_P_IP); skb_reset_mac_header(skb); eth = eth_hdr(skb); memcpy(eth->h_source, br->dev->dev_addr, 6); eth->h_dest[0] = 1; eth->h_dest[1] = 0; eth->h_dest[2] = 0x5e; eth->h_dest[3] = 0; eth->h_dest[4] = 0; eth->h_dest[5] = 1; eth->h_proto = htons(ETH_P_IP); skb_put(skb, sizeof(*eth)); skb_set_network_header(skb, skb->len); iph = ip_hdr(skb); iph->version = 4; iph->ihl = 6; iph->tos = 0xc0; iph->tot_len = htons(sizeof(*iph) + sizeof(*ih) + 4); iph->id = 0; iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->protocol = IPPROTO_IGMP; iph->saddr = 0; iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; ((u8 *)&iph[1])[3] = 0; ip_send_check(iph); skb_put(skb, 24); skb_set_transport_header(skb, skb->len); ih = igmp_hdr(skb); ih->type = IGMP_HOST_MEMBERSHIP_QUERY; ih->code = (group ? br->multicast_last_member_interval : br->multicast_query_response_interval) / (HZ / IGMP_TIMER_SCALE); ih->group = group; ih->csum = 0; ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr)); skb_put(skb, sizeof(*ih)); __skb_pull(skb, sizeof(*eth)); out: return skb; } #if IS_ENABLED(CONFIG_IPV6) static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, const struct in6_addr *group) { struct sk_buff *skb; struct ipv6hdr *ip6h; struct mld_msg *mldq; struct ethhdr *eth; u8 *hopopt; unsigned long interval; skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) + 8 + sizeof(*mldq)); if (!skb) goto out; skb->protocol = htons(ETH_P_IPV6); /* Ethernet header */ skb_reset_mac_header(skb); eth = eth_hdr(skb); memcpy(eth->h_source, br->dev->dev_addr, 6); eth->h_proto = htons(ETH_P_IPV6); skb_put(skb, sizeof(*eth)); /* IPv6 header + HbH option */ skb_set_network_header(skb, skb->len); ip6h = ipv6_hdr(skb); *(__force __be32 *)ip6h = htonl(0x60000000); ip6h->payload_len = htons(8 + sizeof(*mldq)); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); return NULL; } ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ hopopt[1] = 0; /* length of HbH */ hopopt[2] = IPV6_TLV_ROUTERALERT; /* Router Alert */ hopopt[3] = 2; /* Length of RA Option */ hopopt[4] = 0; /* Type = 0x0000 (MLD) */ hopopt[5] = 0; hopopt[6] = IPV6_TLV_PAD0; /* Pad0 */ hopopt[7] = IPV6_TLV_PAD0; /* Pad0 */ skb_put(skb, sizeof(*ip6h) + 8); /* ICMPv6 */ skb_set_transport_header(skb, skb->len); mldq = (struct mld_msg *) icmp6_hdr(skb); interval = ipv6_addr_any(group) ? br->multicast_last_member_interval : br->multicast_query_response_interval; mldq->mld_type = ICMPV6_MGM_QUERY; mldq->mld_code = 0; mldq->mld_cksum = 0; mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); mldq->mld_reserved = 0; mldq->mld_mca = *group; /* checksum */ mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, sizeof(*mldq), IPPROTO_ICMPV6, csum_partial(mldq, sizeof(*mldq), 0)); skb_put(skb, sizeof(*mldq)); __skb_pull(skb, sizeof(*eth)); out: return skb; } #endif static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, struct br_ip *addr) { switch (addr->proto) { case htons(ETH_P_IP): return br_ip4_multicast_alloc_query(br, addr->u.ip4); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): return br_ip6_multicast_alloc_query(br, &addr->u.ip6); #endif } return NULL; } static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp) { struct net_bridge *br = mp->br; struct sk_buff *skb; skb = br_multicast_alloc_query(br, &mp->addr); if (!skb) goto timer; netif_rx(skb); timer: if (++mp->queries_sent < br->multicast_last_member_count) mod_timer(&mp->query_timer, jiffies + br->multicast_last_member_interval); } static void br_multicast_group_query_expired(unsigned long data) { struct net_bridge_mdb_entry *mp = (void *)data; struct net_bridge *br = mp->br; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || !mp->mglist || mp->queries_sent >= br->multicast_last_member_count) goto out; br_multicast_send_group_query(mp); out: spin_unlock(&br->multicast_lock); } static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg) { struct net_bridge_port *port = pg->port; struct net_bridge *br = port->br; struct sk_buff *skb; skb = br_multicast_alloc_query(br, &pg->addr); if (!skb) goto timer; br_deliver(port, skb); timer: if (++pg->queries_sent < br->multicast_last_member_count) mod_timer(&pg->query_timer, jiffies + br->multicast_last_member_interval); } static void br_multicast_port_group_query_expired(unsigned long data) { struct net_bridge_port_group *pg = (void *)data; struct net_bridge_port *port = pg->port; struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) || pg->queries_sent >= br->multicast_last_member_count) goto out; br_multicast_send_port_group_query(pg); out: spin_unlock(&br->multicast_lock); } static struct net_bridge_mdb_entry *br_multicast_get_group( struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group, int hash) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; struct hlist_node *p; unsigned count = 0; unsigned max; int elasticity; int err; mdb = rcu_dereference_protected(br->mdb, 1); hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { count++; if (unlikely(br_ip_equal(group, &mp->addr))) return mp; } elasticity = 0; max = mdb->max; if (unlikely(count > br->hash_elasticity && count)) { if (net_ratelimit()) br_info(br, "Multicast hash table " "chain limit reached: %s\n", port ? port->dev->name : br->dev->name); elasticity = br->hash_elasticity; } if (mdb->size >= max) { max *= 2; if (unlikely(max >= br->hash_max)) { br_warn(br, "Multicast hash table maximum " "reached, disabling snooping: %s, %d\n", port ? port->dev->name : br->dev->name, max); err = -E2BIG; disable: br->multicast_disabled = 1; goto err; } } if (max > mdb->max || elasticity) { if (mdb->old) { if (net_ratelimit()) br_info(br, "Multicast hash table " "on fire: %s\n", port ? port->dev->name : br->dev->name); err = -EEXIST; goto err; } err = br_mdb_rehash(&br->mdb, max, elasticity); if (err) { br_warn(br, "Cannot rehash multicast " "hash table, disabling snooping: %s, %d, %d\n", port ? port->dev->name : br->dev->name, mdb->size, err); goto disable; } err = -EAGAIN; goto err; } return NULL; err: mp = ERR_PTR(err); return mp; } static struct net_bridge_mdb_entry *br_multicast_new_group( struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; int hash; int err; mdb = rcu_dereference_protected(br->mdb, 1); if (!mdb) { err = br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0); if (err) return ERR_PTR(err); goto rehash; } hash = br_ip_hash(mdb, group); mp = br_multicast_get_group(br, port, group, hash); switch (PTR_ERR(mp)) { case 0: break; case -EAGAIN: rehash: mdb = rcu_dereference_protected(br->mdb, 1); hash = br_ip_hash(mdb, group); break; default: goto out; } mp = kzalloc(sizeof(*mp), GFP_ATOMIC); if (unlikely(!mp)) return ERR_PTR(-ENOMEM); mp->br = br; mp->addr = *group; setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); setup_timer(&mp->query_timer, br_multicast_group_query_expired, (unsigned long)mp); hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); mdb->size++; out: return mp; } static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; unsigned long now = jiffies; int err; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || (port && port->state == BR_STATE_DISABLED)) goto out; mp = br_multicast_new_group(br, port, group); err = PTR_ERR(mp); if (IS_ERR(mp)) goto err; if (!port) { mp->mglist = true; mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->port == port) goto found; if ((unsigned long)p->port < (unsigned long)port) break; } p = kzalloc(sizeof(*p), GFP_ATOMIC); err = -ENOMEM; if (unlikely(!p)) goto err; p->addr = *group; p->port = port; p->next = *pp; hlist_add_head(&p->mglist, &port->mglist); setup_timer(&p->timer, br_multicast_port_group_expired, (unsigned long)p); setup_timer(&p->query_timer, br_multicast_port_group_query_expired, (unsigned long)p); rcu_assign_pointer(*pp, p); found: mod_timer(&p->timer, now + br->multicast_membership_interval); out: err = 0; err: spin_unlock(&br->multicast_lock); return err; } static int br_ip4_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, __be32 group) { struct br_ip br_group; if (ipv4_is_local_multicast(group)) return 0; br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); return br_multicast_add_group(br, port, &br_group); } #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, const struct in6_addr *group) { struct br_ip br_group; if (!ipv6_is_transient_multicast(group)) return 0; br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); return br_multicast_add_group(br, port, &br_group); } #endif static void br_multicast_router_expired(unsigned long data) { struct net_bridge_port *port = (void *)data; struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); if (port->multicast_router != 1 || timer_pending(&port->multicast_router_timer) || hlist_unhashed(&port->rlist)) goto out; hlist_del_init_rcu(&port->rlist); out: spin_unlock(&br->multicast_lock); } static void br_multicast_local_router_expired(unsigned long data) { } static void __br_multicast_send_query(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *ip) { struct sk_buff *skb; skb = br_multicast_alloc_query(br, ip); if (!skb) return; if (port) { __skb_push(skb, sizeof(struct ethhdr)); skb->dev = port->dev; NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); } else netif_rx(skb); } static void br_multicast_send_query(struct net_bridge *br, struct net_bridge_port *port, u32 sent) { unsigned long time; struct br_ip br_group; if (!netif_running(br->dev) || br->multicast_disabled || timer_pending(&br->multicast_querier_timer)) return; memset(&br_group.u, 0, sizeof(br_group.u)); br_group.proto = htons(ETH_P_IP); __br_multicast_send_query(br, port, &br_group); #if IS_ENABLED(CONFIG_IPV6) br_group.proto = htons(ETH_P_IPV6); __br_multicast_send_query(br, port, &br_group); #endif time = jiffies; time += sent < br->multicast_startup_query_count ? br->multicast_startup_query_interval : br->multicast_query_interval; mod_timer(port ? &port->multicast_query_timer : &br->multicast_query_timer, time); } static void br_multicast_port_query_expired(unsigned long data) { struct net_bridge_port *port = (void *)data; struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) goto out; if (port->multicast_startup_queries_sent < br->multicast_startup_query_count) port->multicast_startup_queries_sent++; br_multicast_send_query(port->br, port, port->multicast_startup_queries_sent); out: spin_unlock(&br->multicast_lock); } void br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = 1; setup_timer(&port->multicast_router_timer, br_multicast_router_expired, (unsigned long)port); setup_timer(&port->multicast_query_timer, br_multicast_port_query_expired, (unsigned long)port); } void br_multicast_del_port(struct net_bridge_port *port) { del_timer_sync(&port->multicast_router_timer); } static void __br_multicast_enable_port(struct net_bridge_port *port) { port->multicast_startup_queries_sent = 0; if (try_to_del_timer_sync(&port->multicast_query_timer) >= 0 || del_timer(&port->multicast_query_timer)) mod_timer(&port->multicast_query_timer, jiffies); } void br_multicast_enable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); if (br->multicast_disabled || !netif_running(br->dev)) goto out; __br_multicast_enable_port(port); out: spin_unlock(&br->multicast_lock); } void br_multicast_disable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; struct hlist_node *p, *n; spin_lock(&br->multicast_lock); hlist_for_each_entry_safe(pg, p, n, &port->mglist, mglist) br_multicast_del_pg(br, pg); if (!hlist_unhashed(&port->rlist)) hlist_del_init_rcu(&port->rlist); del_timer(&port->multicast_router_timer); del_timer(&port->multicast_query_timer); spin_unlock(&br->multicast_lock); } static int br_ip4_multicast_igmp3_report(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { struct igmpv3_report *ih; struct igmpv3_grec *grec; int i; int len; int num; int type; int err = 0; __be32 group; if (!pskb_may_pull(skb, sizeof(*ih))) return -EINVAL; ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); for (i = 0; i < num; i++) { len += sizeof(*grec); if (!pskb_may_pull(skb, len)) return -EINVAL; grec = (void *)(skb->data + len - sizeof(*grec)); group = grec->grec_mca; type = grec->grec_type; len += ntohs(grec->grec_nsrcs) * 4; if (!pskb_may_pull(skb, len)) return -EINVAL; /* We treat this as an IGMPv2 report for now. */ switch (type) { case IGMPV3_MODE_IS_INCLUDE: case IGMPV3_MODE_IS_EXCLUDE: case IGMPV3_CHANGE_TO_INCLUDE: case IGMPV3_CHANGE_TO_EXCLUDE: case IGMPV3_ALLOW_NEW_SOURCES: case IGMPV3_BLOCK_OLD_SOURCES: break; default: continue; } err = br_ip4_multicast_add_group(br, port, group); if (err) break; } return err; } #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_mld2_report(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { struct icmp6hdr *icmp6h; struct mld2_grec *grec; int i; int len; int num; int err = 0; if (!pskb_may_pull(skb, sizeof(*icmp6h))) return -EINVAL; icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); len = sizeof(*icmp6h); for (i = 0; i < num; i++) { __be16 *nsrcs, _nsrcs; nsrcs = skb_header_pointer(skb, len + offsetof(struct mld2_grec, grec_nsrcs), sizeof(_nsrcs), &_nsrcs); if (!nsrcs) return -EINVAL; if (!pskb_may_pull(skb, len + sizeof(*grec) + sizeof(struct in6_addr) * ntohs(*nsrcs))) return -EINVAL; grec = (struct mld2_grec *)(skb->data + len); len += sizeof(*grec) + sizeof(struct in6_addr) * ntohs(*nsrcs); /* We treat these as MLDv1 reports for now. */ switch (grec->grec_type) { case MLD2_MODE_IS_INCLUDE: case MLD2_MODE_IS_EXCLUDE: case MLD2_CHANGE_TO_INCLUDE: case MLD2_CHANGE_TO_EXCLUDE: case MLD2_ALLOW_NEW_SOURCES: case MLD2_BLOCK_OLD_SOURCES: break; default: continue; } err = br_ip6_multicast_add_group(br, port, &grec->grec_mca); if (!err) break; } return err; } #endif /* * Add port to rotuer_list * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *port) { struct net_bridge_port *p; struct hlist_node *n, *slot = NULL; hlist_for_each_entry(p, n, &br->router_list, rlist) { if ((unsigned long) port >= (unsigned long) p) break; slot = n; } if (slot) hlist_add_after_rcu(slot, &port->rlist); else hlist_add_head_rcu(&port->rlist, &br->router_list); } static void br_multicast_mark_router(struct net_bridge *br, struct net_bridge_port *port) { unsigned long now = jiffies; if (!port) { if (br->multicast_router == 1) mod_timer(&br->multicast_router_timer, now + br->multicast_querier_interval); return; } if (port->multicast_router != 1) return; if (!hlist_unhashed(&port->rlist)) goto timer; br_multicast_add_router(br, port); timer: mod_timer(&port->multicast_router_timer, now + br->multicast_querier_interval); } static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, int saddr) { if (saddr) mod_timer(&br->multicast_querier_timer, jiffies + br->multicast_querier_interval); else if (timer_pending(&br->multicast_querier_timer)) return; br_multicast_mark_router(br, port); } static int br_ip4_multicast_query(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct igmphdr *ih = igmp_hdr(skb); struct net_bridge_mdb_entry *mp; struct igmpv3_query *ih3; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; unsigned long max_delay; unsigned long now = jiffies; __be32 group; int err = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || (port && port->state == BR_STATE_DISABLED)) goto out; br_multicast_query_received(br, port, !!iph->saddr); group = ih->group; if (skb->len == sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } } else { if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) { err = -EINVAL; goto out; } ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; max_delay = ih3->code ? IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } if (!group) goto out; mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group); if (!mp) goto out; max_delay *= br->multicast_last_member_count; if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) mod_timer(&mp->timer, now + max_delay); for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0) mod_timer(&p->timer, now + max_delay); } out: spin_unlock(&br->multicast_lock); return err; } #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_query(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb); struct net_bridge_mdb_entry *mp; struct mld2_query *mld2q; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; unsigned long max_delay; unsigned long now = jiffies; const struct in6_addr *group = NULL; int err = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || (port && port->state == BR_STATE_DISABLED)) goto out; br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr)); if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; goto out; } mld = (struct mld_msg *) icmp6_hdr(skb); max_delay = msecs_to_jiffies(htons(mld->mld_maxdelay)); if (max_delay) group = &mld->mld_mca; } else if (skb->len >= sizeof(*mld2q)) { if (!pskb_may_pull(skb, sizeof(*mld2q))) { err = -EINVAL; goto out; } mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(mld2q->mld2q_mrc) : 1; } if (!group) goto out; mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group); if (!mp) goto out; max_delay *= br->multicast_last_member_count; if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) mod_timer(&mp->timer, now + max_delay); for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : try_to_del_timer_sync(&p->timer) >= 0) mod_timer(&p->timer, now + max_delay); } out: spin_unlock(&br->multicast_lock); return err; } #endif static void br_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; unsigned long now; unsigned long time; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || (port && port->state == BR_STATE_DISABLED) || timer_pending(&br->multicast_querier_timer)) goto out; mdb = mlock_dereference(br->mdb, br); mp = br_mdb_ip_get(mdb, group); if (!mp) goto out; now = jiffies; time = now + br->multicast_last_member_count * br->multicast_last_member_interval; if (!port) { if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { mod_timer(&mp->timer, time); mp->queries_sent = 0; mod_timer(&mp->query_timer, now); } goto out; } for (p = mlock_dereference(mp->ports, br); p != NULL; p = mlock_dereference(p->next, br)) { if (p->port != port) continue; if (!hlist_unhashed(&p->mglist) && (timer_pending(&p->timer) ? time_after(p->timer.expires, time) : try_to_del_timer_sync(&p->timer) >= 0)) { mod_timer(&p->timer, time); p->queries_sent = 0; mod_timer(&p->query_timer, now); } break; } out: spin_unlock(&br->multicast_lock); } static void br_ip4_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, __be32 group) { struct br_ip br_group; if (ipv4_is_local_multicast(group)) return; br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); br_multicast_leave_group(br, port, &br_group); } #if IS_ENABLED(CONFIG_IPV6) static void br_ip6_multicast_leave_group(struct net_bridge *br, struct net_bridge_port *port, const struct in6_addr *group) { struct br_ip br_group; if (!ipv6_is_transient_multicast(group)) return; br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_multicast_leave_group(br, port, &br_group); } #endif static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { struct sk_buff *skb2 = skb; const struct iphdr *iph; struct igmphdr *ih; unsigned len; unsigned offset; int err; /* We treat OOM as packet loss for now. */ if (!pskb_may_pull(skb, sizeof(*iph))) return -EINVAL; iph = ip_hdr(skb); if (iph->ihl < 5 || iph->version != 4) return -EINVAL; if (!pskb_may_pull(skb, ip_hdrlen(skb))) return -EINVAL; iph = ip_hdr(skb); if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) return -EINVAL; if (iph->protocol != IPPROTO_IGMP) { if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP) BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; } len = ntohs(iph->tot_len); if (skb->len < len || len < ip_hdrlen(skb)) return -EINVAL; if (skb->len > len) { skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) return -ENOMEM; err = pskb_trim_rcsum(skb2, len); if (err) goto err_out; } len -= ip_hdrlen(skb2); offset = skb_network_offset(skb2) + ip_hdrlen(skb2); __skb_pull(skb2, offset); skb_reset_transport_header(skb2); err = -EINVAL; if (!pskb_may_pull(skb2, sizeof(*ih))) goto out; switch (skb2->ip_summed) { case CHECKSUM_COMPLETE: if (!csum_fold(skb2->csum)) break; /* fall through */ case CHECKSUM_NONE: skb2->csum = 0; if (skb_checksum_complete(skb2)) goto out; } err = 0; BR_INPUT_SKB_CB(skb)->igmp = 1; ih = igmp_hdr(skb2); switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip4_multicast_add_group(br, port, ih->group); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: err = br_ip4_multicast_igmp3_report(br, port, skb2); break; case IGMP_HOST_MEMBERSHIP_QUERY: err = br_ip4_multicast_query(br, port, skb2); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group); break; } out: __skb_push(skb2, offset); err_out: if (skb2 != skb) kfree_skb(skb2); return err; } #if IS_ENABLED(CONFIG_IPV6) static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { struct sk_buff *skb2; const struct ipv6hdr *ip6h; u8 icmp6_type; u8 nexthdr; __be16 frag_off; unsigned len; int offset; int err; if (!pskb_may_pull(skb, sizeof(*ip6h))) return -EINVAL; ip6h = ipv6_hdr(skb); /* * We're interested in MLD messages only. * - Version is 6 * - MLD has always Router Alert hop-by-hop option * - But we do not support jumbrograms. */ if (ip6h->version != 6 || ip6h->nexthdr != IPPROTO_HOPOPTS || ip6h->payload_len == 0) return 0; len = ntohs(ip6h->payload_len) + sizeof(*ip6h); if (skb->len < len) return -EINVAL; nexthdr = ip6h->nexthdr; offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off); if (offset < 0 || nexthdr != IPPROTO_ICMPV6) return 0; /* Okay, we found ICMPv6 header */ skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) return -ENOMEM; err = -EINVAL; if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr))) goto out; len -= offset - skb_network_offset(skb2); __skb_pull(skb2, offset); skb_reset_transport_header(skb2); skb_postpull_rcsum(skb2, skb_network_header(skb2), skb_network_header_len(skb2)); icmp6_type = icmp6_hdr(skb2)->icmp6_type; switch (icmp6_type) { case ICMPV6_MGM_QUERY: case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: case ICMPV6_MLD2_REPORT: break; default: err = 0; goto out; } /* Okay, we found MLD message. Check further. */ if (skb2->len > len) { err = pskb_trim_rcsum(skb2, len); if (err) goto out; err = -EINVAL; } ip6h = ipv6_hdr(skb2); switch (skb2->ip_summed) { case CHECKSUM_COMPLETE: if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len, IPPROTO_ICMPV6, skb2->csum)) break; /*FALLTHROUGH*/ case CHECKSUM_NONE: skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb2)) goto out; } err = 0; BR_INPUT_SKB_CB(skb)->igmp = 1; switch (icmp6_type) { case ICMPV6_MGM_REPORT: { struct mld_msg *mld; if (!pskb_may_pull(skb2, sizeof(*mld))) { err = -EINVAL; goto out; } mld = (struct mld_msg *)skb_transport_header(skb2); BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); break; } case ICMPV6_MLD2_REPORT: err = br_ip6_multicast_mld2_report(br, port, skb2); break; case ICMPV6_MGM_QUERY: err = br_ip6_multicast_query(br, port, skb2); break; case ICMPV6_MGM_REDUCTION: { struct mld_msg *mld; if (!pskb_may_pull(skb2, sizeof(*mld))) { err = -EINVAL; goto out; } mld = (struct mld_msg *)skb_transport_header(skb2); br_ip6_multicast_leave_group(br, port, &mld->mld_mca); } } out: kfree_skb(skb2); return err; } #endif int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { BR_INPUT_SKB_CB(skb)->igmp = 0; BR_INPUT_SKB_CB(skb)->mrouters_only = 0; if (br->multicast_disabled) return 0; switch (skb->protocol) { case htons(ETH_P_IP): return br_multicast_ipv4_rcv(br, port, skb); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): return br_multicast_ipv6_rcv(br, port, skb); #endif } return 0; } static void br_multicast_query_expired(unsigned long data) { struct net_bridge *br = (void *)data; spin_lock(&br->multicast_lock); if (br->multicast_startup_queries_sent < br->multicast_startup_query_count) br->multicast_startup_queries_sent++; br_multicast_send_query(br, NULL, br->multicast_startup_queries_sent); spin_unlock(&br->multicast_lock); } void br_multicast_init(struct net_bridge *br) { br->hash_elasticity = 4; br->hash_max = 512; br->multicast_router = 1; br->multicast_last_member_count = 2; br->multicast_startup_query_count = 2; br->multicast_last_member_interval = HZ; br->multicast_query_response_interval = 10 * HZ; br->multicast_startup_query_interval = 125 * HZ / 4; br->multicast_query_interval = 125 * HZ; br->multicast_querier_interval = 255 * HZ; br->multicast_membership_interval = 260 * HZ; spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, br_multicast_local_router_expired, 0); setup_timer(&br->multicast_querier_timer, br_multicast_local_router_expired, 0); setup_timer(&br->multicast_query_timer, br_multicast_query_expired, (unsigned long)br); } void br_multicast_open(struct net_bridge *br) { br->multicast_startup_queries_sent = 0; if (br->multicast_disabled) return; mod_timer(&br->multicast_query_timer, jiffies); } void br_multicast_stop(struct net_bridge *br) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; struct hlist_node *p, *n; u32 ver; int i; del_timer_sync(&br->multicast_router_timer); del_timer_sync(&br->multicast_querier_timer); del_timer_sync(&br->multicast_query_timer); spin_lock_bh(&br->multicast_lock); mdb = mlock_dereference(br->mdb, br); if (!mdb) goto out; br->mdb = NULL; ver = mdb->ver; for (i = 0; i < mdb->max; i++) { hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); del_timer(&mp->query_timer); call_rcu_bh(&mp->rcu, br_multicast_free_group); } } if (mdb->old) { spin_unlock_bh(&br->multicast_lock); rcu_barrier_bh(); spin_lock_bh(&br->multicast_lock); WARN_ON(mdb->old); } mdb->old = mdb; call_rcu_bh(&mdb->rcu, br_mdb_free); out: spin_unlock_bh(&br->multicast_lock); } int br_multicast_set_router(struct net_bridge *br, unsigned long val) { int err = -ENOENT; spin_lock_bh(&br->multicast_lock); if (!netif_running(br->dev)) goto unlock; switch (val) { case 0: case 2: del_timer(&br->multicast_router_timer); /* fall through */ case 1: br->multicast_router = val; err = 0; break; default: err = -EINVAL; break; } unlock: spin_unlock_bh(&br->multicast_lock); return err; } int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) { struct net_bridge *br = p->br; int err = -ENOENT; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || p->state == BR_STATE_DISABLED) goto unlock; switch (val) { case 0: case 1: case 2: p->multicast_router = val; err = 0; if (val < 2 && !hlist_unhashed(&p->rlist)) hlist_del_init_rcu(&p->rlist); if (val == 1) break; del_timer(&p->multicast_router_timer); if (val == 0) break; br_multicast_add_router(br, p); break; default: err = -EINVAL; break; } unlock: spin_unlock(&br->multicast_lock); return err; } int br_multicast_toggle(struct net_bridge *br, unsigned long val) { struct net_bridge_port *port; int err = 0; struct net_bridge_mdb_htable *mdb; spin_lock_bh(&br->multicast_lock); if (br->multicast_disabled == !val) goto unlock; br->multicast_disabled = !val; if (br->multicast_disabled) goto unlock; if (!netif_running(br->dev)) goto unlock; mdb = mlock_dereference(br->mdb, br); if (mdb) { if (mdb->old) { err = -EEXIST; rollback: br->multicast_disabled = !!val; goto unlock; } err = br_mdb_rehash(&br->mdb, mdb->max, br->hash_elasticity); if (err) goto rollback; } br_multicast_open(br); list_for_each_entry(port, &br->port_list, list) { if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) continue; __br_multicast_enable_port(port); } unlock: spin_unlock_bh(&br->multicast_lock); return err; } int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) { int err = -ENOENT; u32 old; struct net_bridge_mdb_htable *mdb; spin_lock(&br->multicast_lock); if (!netif_running(br->dev)) goto unlock; err = -EINVAL; if (!is_power_of_2(val)) goto unlock; mdb = mlock_dereference(br->mdb, br); if (mdb && val < mdb->size) goto unlock; err = 0; old = br->hash_max; br->hash_max = val; if (mdb) { if (mdb->old) { err = -EEXIST; rollback: br->hash_max = old; goto unlock; } err = br_mdb_rehash(&br->mdb, br->hash_max, br->hash_elasticity); if (err) goto rollback; } unlock: spin_unlock(&br->multicast_lock); return err; }