summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorMark Bloch <markb@mellanox.com>2016-05-18 09:42:43 -0400
committerDoug Ledford <dledford@redhat.com>2016-05-25 15:39:03 -0400
commit492a7e67ff83fc59adb768de25ccaecd33d46beb (patch)
tree80ffff405af42de77fa5accc97513933e9e5a611 /drivers/infiniband
parent3b56113016400a4e2b9870c368ebb8080cb5739b (diff)
IB/IPoIB: Allow setting the device address
In IB networks, and specifically in IPoIB/rdmacm traffic, the device address of an IPoIB interface is used as a means to exchange information between nodes needed for communication. Currently an IPoIB interface will always be created with a device address based on its node GUID without a way to change that. This change adds the ability to set the device address of an IPoIB interface by value. We use the set mac address ndo to do that. The flow should be broken down to two: 1) The GID value is already in the GID table, in this case the interface will be able to set carrier up. 2) The GID value is not yet in the GID table, in this case the interface won't try to join the multicast group and will wait (listen on GID_CHANGE event) until the GID is inserted. In order to track those changes, we add a new flag: * IPOIB_FLAG_DEV_ADDR_SET. When set, it means the dev_addr is a based on a value in the gid table. this bit will be cleared upon a dev_addr change triggered by the user and set after validation. Per IB spec the port GUID can't change if the module is loaded. port GUID is the basis for GID at index 0 which is the basis for the default device address of a ipoib interface. The issue is that there are devices that don't follow the spec, they change the port GUID while HCA is powered on, so in order not to break userspace applications. We need to check if the user wanted to control the device address and we assume that if he sets the device address back to be based on GID index 0, he no longer wishs to control it. In order to track this, we add an additional flag: * IPOIB_FLAG_DEV_ADDR_CTRL When setting the device address, there is no validation of the upper twelve bytes of the device address (flags, qpn, subnet prefix) as those bytes are not under the control of the user. Signed-off-by: Mark Bloch <markb@mellanox.com> Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c109
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c67
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
6 files changed, 187 insertions, 6 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index c51f618f6120..bab7db6fa9ab 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -92,6 +92,8 @@ enum {
92 IPOIB_FLAG_UMCAST = 10, 92 IPOIB_FLAG_UMCAST = 10,
93 IPOIB_STOP_NEIGH_GC = 11, 93 IPOIB_STOP_NEIGH_GC = 11,
94 IPOIB_NEIGH_TBL_FLUSH = 12, 94 IPOIB_NEIGH_TBL_FLUSH = 12,
95 IPOIB_FLAG_DEV_ADDR_SET = 13,
96 IPOIB_FLAG_DEV_ADDR_CTRL = 14,
95 97
96 IPOIB_MAX_BACKOFF_SECONDS = 16, 98 IPOIB_MAX_BACKOFF_SECONDS = 16,
97 99
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index da5f28c892ca..7e9a77040a24 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -997,6 +997,106 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv)
997 return 0; 997 return 0;
998} 998}
999 999
1000/*
1001 * returns true if the device address of the ipoib interface has changed and the
1002 * new address is a valid one (i.e in the gid table), return false otherwise.
1003 */
1004static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
1005{
1006 union ib_gid search_gid;
1007 union ib_gid gid0;
1008 union ib_gid *netdev_gid;
1009 int err;
1010 u16 index;
1011 u8 port;
1012 bool ret = false;
1013
1014 netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
1015 if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
1016 return false;
1017
1018 netif_addr_lock(priv->dev);
1019
1020 /* The subnet prefix may have changed, update it now so we won't have
1021 * to do it later
1022 */
1023 priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix;
1024 netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix;
1025 search_gid.global.subnet_prefix = gid0.global.subnet_prefix;
1026
1027 search_gid.global.interface_id = priv->local_gid.global.interface_id;
1028
1029 netif_addr_unlock(priv->dev);
1030
1031 err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB,
1032 priv->dev, &port, &index);
1033
1034 netif_addr_lock(priv->dev);
1035
1036 if (search_gid.global.interface_id !=
1037 priv->local_gid.global.interface_id)
1038 /* There was a change while we were looking up the gid, bail
1039 * here and let the next work sort this out
1040 */
1041 goto out;
1042
1043 /* The next section of code needs some background:
1044 * Per IB spec the port GUID can't change if the HCA is powered on.
1045 * port GUID is the basis for GID at index 0 which is the basis for
1046 * the default device address of a ipoib interface.
1047 *
1048 * so it seems the flow should be:
1049 * if user_changed_dev_addr && gid in gid tbl
1050 * set bit dev_addr_set
1051 * return true
1052 * else
1053 * return false
1054 *
1055 * The issue is that there are devices that don't follow the spec,
1056 * they change the port GUID when the HCA is powered, so in order
1057 * not to break userspace applications, We need to check if the
1058 * user wanted to control the device address and we assume that
1059 * if he sets the device address back to be based on GID index 0,
1060 * he no longer wishs to control it.
1061 *
1062 * If the user doesn't control the the device address,
1063 * IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means
1064 * the port GUID has changed and GID at index 0 has changed
1065 * so we need to change priv->local_gid and priv->dev->dev_addr
1066 * to reflect the new GID.
1067 */
1068 if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
1069 if (!err && port == priv->port) {
1070 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
1071 if (index == 0)
1072 clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL,
1073 &priv->flags);
1074 else
1075 set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags);
1076 ret = true;
1077 } else {
1078 ret = false;
1079 }
1080 } else {
1081 if (!err && port == priv->port) {
1082 ret = true;
1083 } else {
1084 if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) {
1085 memcpy(&priv->local_gid, &gid0,
1086 sizeof(priv->local_gid));
1087 memcpy(priv->dev->dev_addr + 4, &gid0,
1088 sizeof(priv->local_gid));
1089 ret = true;
1090 }
1091 }
1092 }
1093
1094out:
1095 netif_addr_unlock(priv->dev);
1096
1097 return ret;
1098}
1099
1000static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, 1100static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1001 enum ipoib_flush_level level, 1101 enum ipoib_flush_level level,
1002 int nesting) 1102 int nesting)
@@ -1018,6 +1118,9 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1018 1118
1019 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && 1119 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
1020 level != IPOIB_FLUSH_HEAVY) { 1120 level != IPOIB_FLUSH_HEAVY) {
1121 /* Make sure the dev_addr is set even if not flushing */
1122 if (level == IPOIB_FLUSH_LIGHT)
1123 ipoib_dev_addr_changed_valid(priv);
1021 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); 1124 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
1022 return; 1125 return;
1023 } 1126 }
@@ -1029,7 +1132,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1029 update_parent_pkey(priv); 1132 update_parent_pkey(priv);
1030 else 1133 else
1031 update_child_pkey(priv); 1134 update_child_pkey(priv);
1032 } 1135 } else if (level == IPOIB_FLUSH_LIGHT)
1136 ipoib_dev_addr_changed_valid(priv);
1033 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n"); 1137 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
1034 return; 1138 return;
1035 } 1139 }
@@ -1081,7 +1185,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1081 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { 1185 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
1082 if (level >= IPOIB_FLUSH_NORMAL) 1186 if (level >= IPOIB_FLUSH_NORMAL)
1083 ipoib_ib_dev_up(dev); 1187 ipoib_ib_dev_up(dev);
1084 ipoib_mcast_restart_task(&priv->restart_task); 1188 if (ipoib_dev_addr_changed_valid(priv))
1189 ipoib_mcast_restart_task(&priv->restart_task);
1085 } 1190 }
1086} 1191}
1087 1192
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8bf1859ebed4..2c3fb5337bc1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -99,6 +99,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
99 struct ib_device *dev, u8 port, u16 pkey, 99 struct ib_device *dev, u8 port, u16 pkey,
100 const union ib_gid *gid, const struct sockaddr *addr, 100 const union ib_gid *gid, const struct sockaddr *addr,
101 void *client_data); 101 void *client_data);
102static int ipoib_set_mac(struct net_device *dev, void *addr);
102 103
103static struct ib_client ipoib_client = { 104static struct ib_client ipoib_client = {
104 .name = "ipoib", 105 .name = "ipoib",
@@ -1722,6 +1723,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
1722 .ndo_get_vf_config = ipoib_get_vf_config, 1723 .ndo_get_vf_config = ipoib_get_vf_config,
1723 .ndo_get_vf_stats = ipoib_get_vf_stats, 1724 .ndo_get_vf_stats = ipoib_get_vf_stats,
1724 .ndo_set_vf_guid = ipoib_set_vf_guid, 1725 .ndo_set_vf_guid = ipoib_set_vf_guid,
1726 .ndo_set_mac_address = ipoib_set_mac,
1725}; 1727};
1726 1728
1727static const struct net_device_ops ipoib_netdev_ops_vf = { 1729static const struct net_device_ops ipoib_netdev_ops_vf = {
@@ -1844,6 +1846,70 @@ int ipoib_add_umcast_attr(struct net_device *dev)
1844 return device_create_file(&dev->dev, &dev_attr_umcast); 1846 return device_create_file(&dev->dev, &dev_attr_umcast);
1845} 1847}
1846 1848
1849static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
1850{
1851 struct ipoib_dev_priv *child_priv;
1852 struct net_device *netdev = priv->dev;
1853
1854 netif_addr_lock(netdev);
1855
1856 memcpy(&priv->local_gid.global.interface_id,
1857 &gid->global.interface_id,
1858 sizeof(gid->global.interface_id));
1859 memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid));
1860 clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
1861
1862 netif_addr_unlock(netdev);
1863
1864 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
1865 down_read(&priv->vlan_rwsem);
1866 list_for_each_entry(child_priv, &priv->child_intfs, list)
1867 set_base_guid(child_priv, gid);
1868 up_read(&priv->vlan_rwsem);
1869 }
1870}
1871
1872static int ipoib_check_lladdr(struct net_device *dev,
1873 struct sockaddr_storage *ss)
1874{
1875 union ib_gid *gid = (union ib_gid *)(ss->__data + 4);
1876 int ret = 0;
1877
1878 netif_addr_lock(dev);
1879
1880 /* Make sure the QPN, reserved and subnet prefix match the current
1881 * lladdr, it also makes sure the lladdr is unicast.
1882 */
1883 if (memcmp(dev->dev_addr, ss->__data,
1884 4 + sizeof(gid->global.subnet_prefix)) ||
1885 gid->global.interface_id == 0)
1886 ret = -EINVAL;
1887
1888 netif_addr_unlock(dev);
1889
1890 return ret;
1891}
1892
1893static int ipoib_set_mac(struct net_device *dev, void *addr)
1894{
1895 struct ipoib_dev_priv *priv = netdev_priv(dev);
1896 struct sockaddr_storage *ss = addr;
1897 int ret;
1898
1899 if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
1900 return -EBUSY;
1901
1902 ret = ipoib_check_lladdr(dev, ss);
1903 if (ret)
1904 return ret;
1905
1906 set_base_guid(priv, (union ib_gid *)(ss->__data + 4));
1907
1908 queue_work(ipoib_workqueue, &priv->flush_light);
1909
1910 return 0;
1911}
1912
1847static ssize_t create_child(struct device *dev, 1913static ssize_t create_child(struct device *dev,
1848 struct device_attribute *attr, 1914 struct device_attribute *attr,
1849 const char *buf, size_t count) 1915 const char *buf, size_t count)
@@ -1967,6 +2033,7 @@ static struct net_device *ipoib_add_port(const char *format,
1967 goto device_init_failed; 2033 goto device_init_failed;
1968 } else 2034 } else
1969 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 2035 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
2036 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
1970 2037
1971 result = ipoib_dev_init(priv->dev, hca, port); 2038 result = ipoib_dev_init(priv->dev, hca, port);
1972 if (result < 0) { 2039 if (result < 0) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index fc3e50e8e391..82fbc9442608 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -582,11 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work)
582 return; 582 return;
583 } 583 }
584 priv->local_lid = port_attr.lid; 584 priv->local_lid = port_attr.lid;
585 netif_addr_lock(dev);
585 586
586 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL)) 587 if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
587 ipoib_warn(priv, "ib_query_gid() failed\n"); 588 netif_addr_unlock(dev);
588 else 589 return;
589 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 590 }
591 netif_addr_unlock(dev);
590 592
591 spin_lock_irq(&priv->lock); 593 spin_lock_irq(&priv->lock);
592 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) 594 if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index b809c373e40e..1e7cbbaa15bd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -307,5 +307,8 @@ void ipoib_event(struct ib_event_handler *handler,
307 queue_work(ipoib_workqueue, &priv->flush_normal); 307 queue_work(ipoib_workqueue, &priv->flush_normal);
308 } else if (record->event == IB_EVENT_PKEY_CHANGE) { 308 } else if (record->event == IB_EVENT_PKEY_CHANGE) {
309 queue_work(ipoib_workqueue, &priv->flush_heavy); 309 queue_work(ipoib_workqueue, &priv->flush_heavy);
310 } else if (record->event == IB_EVENT_GID_CHANGE &&
311 !test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
312 queue_work(ipoib_workqueue, &priv->flush_light);
310 } 313 }
311} 314}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index fca1a882de27..64a35595eab8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -68,6 +68,8 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
68 priv->pkey = pkey; 68 priv->pkey = pkey;
69 69
70 memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); 70 memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
71 memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
72 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
71 priv->dev->broadcast[8] = pkey >> 8; 73 priv->dev->broadcast[8] = pkey >> 8;
72 priv->dev->broadcast[9] = pkey & 0xff; 74 priv->dev->broadcast[9] = pkey & 0xff;
73 75