aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOr Gerlitz <ogerlitz@voltaire.com>2007-10-08 04:13:00 -0400
committerRoland Dreier <rolandd@cisco.com>2007-10-10 16:02:30 -0400
commit335a64a5a958002bc238c90de695e120c3c8c120 (patch)
treee75f6aba0b89516e4100b5b9d6e77be86ce4d79d
parent55a98e955caab78a5959933a4a3a0136e2491d6c (diff)
IPoIB: Allow setting policy to ignore multicast groups
The kernel IB stack allows (through the RDMA CM) userspace applications to join and use multicast groups from the IPoIB MGID range. This allows multicast traffic to be handled directly from userspace QPs, without going through the kernel stack, which gives better performance for some applications. However, to fully interoperate with IP multicast, such userspace applications need to participate in IGMP reports and queries, or else routers may not forward the multicast traffic to the system where the application is running. The simplest way to do this is to share the kernel IGMP implementation by using the IP_ADD_MEMBERSHIP option to join multicast groups that are being handled directly in userspace. However, in such cases, the actual multicast traffic should not also be handled by the IPoIB interface, because that would burn resources handling multicast packets that will just be discarded in the kernel. To handle this, this patch adds lookup on the database used for IB multicast group reference counting when IPoIB is joining multicast groups, and if a multicast group is already handled by user space, then the IPoIB kernel driver ignores the group. This is controlled by a per-interface policy flag. When the flag is set, IPoIB will not join and attach its QP to a multicast group which already has an entry in the database; when the flag is cleared, IPoIB will behave as before this change. For each IPoIB interface, the /sys/class/net/$intf/umcast attribute controls the policy flag. The default value is off/0. Signed-off-by: Or Gerlitz <ogerlitz@voltaire.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c33
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c9
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
4 files changed, 46 insertions, 0 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index fc16bced8e54..a198ce8371db 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -86,6 +86,7 @@ enum {
86 IPOIB_MCAST_STARTED = 8, 86 IPOIB_MCAST_STARTED = 8,
87 IPOIB_FLAG_NETIF_STOPPED = 9, 87 IPOIB_FLAG_NETIF_STOPPED = 9,
88 IPOIB_FLAG_ADMIN_CM = 10, 88 IPOIB_FLAG_ADMIN_CM = 10,
89 IPOIB_FLAG_UMCAST = 11,
89 90
90 IPOIB_MAX_BACKOFF_SECONDS = 16, 91 IPOIB_MAX_BACKOFF_SECONDS = 16,
91 92
@@ -384,6 +385,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
384 385
385int ipoib_open(struct net_device *dev); 386int ipoib_open(struct net_device *dev);
386int ipoib_add_pkey_attr(struct net_device *dev); 387int ipoib_add_pkey_attr(struct net_device *dev);
388int ipoib_add_umcast_attr(struct net_device *dev);
387 389
388void ipoib_send(struct net_device *dev, struct sk_buff *skb, 390void ipoib_send(struct net_device *dev, struct sk_buff *skb,
389 struct ipoib_ah *address, u32 qpn); 391 struct ipoib_ah *address, u32 qpn);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 900335a36e45..ff17fe3c765b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1019,6 +1019,37 @@ static ssize_t show_pkey(struct device *dev,
1019} 1019}
1020static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 1020static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
1021 1021
1022static ssize_t show_umcast(struct device *dev,
1023 struct device_attribute *attr, char *buf)
1024{
1025 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
1026
1027 return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
1028}
1029
1030static ssize_t set_umcast(struct device *dev,
1031 struct device_attribute *attr,
1032 const char *buf, size_t count)
1033{
1034 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
1035 unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
1036
1037 if (umcast_val > 0) {
1038 set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
1039 ipoib_warn(priv, "ignoring multicast groups joined directly "
1040 "by userspace\n");
1041 } else
1042 clear_bit(IPOIB_FLAG_UMCAST, &priv->flags);
1043
1044 return count;
1045}
1046static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast);
1047
1048int ipoib_add_umcast_attr(struct net_device *dev)
1049{
1050 return device_create_file(&dev->dev, &dev_attr_umcast);
1051}
1052
1022static ssize_t create_child(struct device *dev, 1053static ssize_t create_child(struct device *dev,
1023 struct device_attribute *attr, 1054 struct device_attribute *attr,
1024 const char *buf, size_t count) 1055 const char *buf, size_t count)
@@ -1136,6 +1167,8 @@ static struct net_device *ipoib_add_port(const char *format,
1136 goto sysfs_failed; 1167 goto sysfs_failed;
1137 if (ipoib_add_pkey_attr(priv->dev)) 1168 if (ipoib_add_pkey_attr(priv->dev))
1138 goto sysfs_failed; 1169 goto sysfs_failed;
1170 if (ipoib_add_umcast_attr(priv->dev))
1171 goto sysfs_failed;
1139 if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) 1172 if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
1140 goto sysfs_failed; 1173 goto sysfs_failed;
1141 if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) 1174 if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 94a57097e2b4..62abfb6f35c1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -761,6 +761,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
761 struct ipoib_mcast *mcast, *tmcast; 761 struct ipoib_mcast *mcast, *tmcast;
762 LIST_HEAD(remove_list); 762 LIST_HEAD(remove_list);
763 unsigned long flags; 763 unsigned long flags;
764 struct ib_sa_mcmember_rec rec;
764 765
765 ipoib_dbg_mcast(priv, "restarting multicast task\n"); 766 ipoib_dbg_mcast(priv, "restarting multicast task\n");
766 767
@@ -794,6 +795,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
794 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 795 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
795 struct ipoib_mcast *nmcast; 796 struct ipoib_mcast *nmcast;
796 797
798 /* ignore group which is directly joined by userspace */
799 if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
800 !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
801 ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid "
802 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
803 continue;
804 }
805
797 /* Not found or send-only group, let's add a new entry */ 806 /* Not found or send-only group, let's add a new entry */
798 ipoib_dbg_mcast(priv, "adding multicast entry for mgid " 807 ipoib_dbg_mcast(priv, "adding multicast entry for mgid "
799 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); 808 IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 6762988439d1..293f5b892e3f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -119,6 +119,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
119 goto sysfs_failed; 119 goto sysfs_failed;
120 if (ipoib_add_pkey_attr(priv->dev)) 120 if (ipoib_add_pkey_attr(priv->dev))
121 goto sysfs_failed; 121 goto sysfs_failed;
122 if (ipoib_add_umcast_attr(priv->dev))
123 goto sysfs_failed;
122 124
123 if (device_create_file(&priv->dev->dev, &dev_attr_parent)) 125 if (device_create_file(&priv->dev->dev, &dev_attr_parent))
124 goto sysfs_failed; 126 goto sysfs_failed;