aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2019-02-12 23:12:50 -0500
committerJason Gunthorpe <jgg@mellanox.com>2019-02-19 22:52:18 -0500
commitc2261dd76b549754c14c8ac7cadadd0993b182d6 (patch)
tree1633597dbb212404d2542ea368a872e90d75d50a
parent8faea9fd4a3914f12cd343e10810ec5f4215ddd6 (diff)
RDMA/device: Add ib_device_set_netdev() as an alternative to get_netdev
The associated netdev should not actually be very dynamic, so for most drivers there is no reason for a callback like this. Provide an API to inform the core code about the net dev affiliation and use a core maintained data structure instead. This allows the core code to be more aware of the ndev relationship which will allow some new APIs based around this. This also uses locking that makes some kind of sense, many drivers had a confusing RCU lock, or missing locking which isn't right. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--drivers/infiniband/core/cache.c24
-rw-r--r--drivers/infiniband/core/core_priv.h3
-rw-r--r--drivers/infiniband/core/device.c166
-rw-r--r--drivers/infiniband/core/nldev.c4
-rw-r--r--drivers/infiniband/core/verbs.c5
-rw-r--r--include/rdma/ib_verbs.h7
6 files changed, 171 insertions, 38 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index a28dc1901c80..43c67e5f43c6 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -547,21 +547,19 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
547 unsigned long mask; 547 unsigned long mask;
548 int ret; 548 int ret;
549 549
550 if (ib_dev->ops.get_netdev) { 550 idev = ib_device_get_netdev(ib_dev, port);
551 idev = ib_dev->ops.get_netdev(ib_dev, port); 551 if (idev && attr->ndev != idev) {
552 if (idev && attr->ndev != idev) { 552 union ib_gid default_gid;
553 union ib_gid default_gid; 553
554 554 /* Adding default GIDs is not permitted */
555 /* Adding default GIDs in not permitted */ 555 make_default_gid(idev, &default_gid);
556 make_default_gid(idev, &default_gid); 556 if (!memcmp(gid, &default_gid, sizeof(*gid))) {
557 if (!memcmp(gid, &default_gid, sizeof(*gid))) {
558 dev_put(idev);
559 return -EPERM;
560 }
561 }
562 if (idev)
563 dev_put(idev); 557 dev_put(idev);
558 return -EPERM;
559 }
564 } 560 }
561 if (idev)
562 dev_put(idev);
565 563
566 mask = GID_ATTR_FIND_MASK_GID | 564 mask = GID_ATTR_FIND_MASK_GID |
567 GID_ATTR_FIND_MASK_GID_TYPE | 565 GID_ATTR_FIND_MASK_GID_TYPE |
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index eeabe9ca8427..08c690249594 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -66,6 +66,9 @@ typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
66typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port, 66typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
67 struct net_device *idev, void *cookie); 67 struct net_device *idev, void *cookie);
68 68
69struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
70 unsigned int port);
71
69void ib_enum_roce_netdev(struct ib_device *ib_dev, 72void ib_enum_roce_netdev(struct ib_device *ib_dev,
70 roce_netdev_filter filter, 73 roce_netdev_filter filter,
71 void *filter_cookie, 74 void *filter_cookie,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 8d7d63a60ef5..7680a64a98bc 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -134,6 +134,7 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
134 !xa_is_err(entry); \ 134 !xa_is_err(entry); \
135 (index)++, entry = xan_find_marked(xa, &(index), filter)) 135 (index)++, entry = xan_find_marked(xa, &(index), filter))
136 136
137static void free_netdevs(struct ib_device *ib_dev);
137static int ib_security_change(struct notifier_block *nb, unsigned long event, 138static int ib_security_change(struct notifier_block *nb, unsigned long event,
138 void *lsm_data); 139 void *lsm_data);
139static void ib_policy_change_task(struct work_struct *work); 140static void ib_policy_change_task(struct work_struct *work);
@@ -290,6 +291,7 @@ static void ib_device_release(struct device *device)
290{ 291{
291 struct ib_device *dev = container_of(device, struct ib_device, dev); 292 struct ib_device *dev = container_of(device, struct ib_device, dev);
292 293
294 free_netdevs(dev);
293 WARN_ON(refcount_read(&dev->refcount)); 295 WARN_ON(refcount_read(&dev->refcount));
294 ib_cache_release_one(dev); 296 ib_cache_release_one(dev);
295 ib_security_release_port_pkey_list(dev); 297 ib_security_release_port_pkey_list(dev);
@@ -371,6 +373,9 @@ EXPORT_SYMBOL(_ib_alloc_device);
371 */ 373 */
372void ib_dealloc_device(struct ib_device *device) 374void ib_dealloc_device(struct ib_device *device)
373{ 375{
376 /* Expedite releasing netdev references */
377 free_netdevs(device);
378
374 WARN_ON(!xa_empty(&device->client_data)); 379 WARN_ON(!xa_empty(&device->client_data));
375 WARN_ON(refcount_read(&device->refcount)); 380 WARN_ON(refcount_read(&device->refcount));
376 rdma_restrack_clean(device); 381 rdma_restrack_clean(device);
@@ -461,16 +466,16 @@ static void remove_client_context(struct ib_device *device,
461 up_read(&device->client_data_rwsem); 466 up_read(&device->client_data_rwsem);
462} 467}
463 468
464static int verify_immutable(const struct ib_device *dev, u8 port) 469static int alloc_port_data(struct ib_device *device)
465{
466 return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
467 rdma_max_mad_size(dev, port) != 0);
468}
469
470static int setup_port_data(struct ib_device *device)
471{ 470{
472 unsigned int port; 471 unsigned int port;
473 int ret; 472
473 if (device->port_data)
474 return 0;
475
476 /* This can only be called once the physical port range is defined */
477 if (WARN_ON(!device->phys_port_cnt))
478 return -EINVAL;
474 479
475 /* 480 /*
476 * device->port_data is indexed directly by the port number to make 481 * device->port_data is indexed directly by the port number to make
@@ -489,6 +494,28 @@ static int setup_port_data(struct ib_device *device)
489 494
490 spin_lock_init(&pdata->pkey_list_lock); 495 spin_lock_init(&pdata->pkey_list_lock);
491 INIT_LIST_HEAD(&pdata->pkey_list); 496 INIT_LIST_HEAD(&pdata->pkey_list);
497 spin_lock_init(&pdata->netdev_lock);
498 }
499 return 0;
500}
501
502static int verify_immutable(const struct ib_device *dev, u8 port)
503{
504 return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
505 rdma_max_mad_size(dev, port) != 0);
506}
507
508static int setup_port_data(struct ib_device *device)
509{
510 unsigned int port;
511 int ret;
512
513 ret = alloc_port_data(device);
514 if (ret)
515 return ret;
516
517 rdma_for_each_port (device, port) {
518 struct ib_port_data *pdata = &device->port_data[port];
492 519
493 ret = device->ops.get_port_immutable(device, port, 520 ret = device->ops.get_port_immutable(device, port,
494 &pdata->immutable); 521 &pdata->immutable);
@@ -682,6 +709,9 @@ static void disable_device(struct ib_device *device)
682 /* Pairs with refcount_set in enable_device */ 709 /* Pairs with refcount_set in enable_device */
683 ib_device_put(device); 710 ib_device_put(device);
684 wait_for_completion(&device->unreg_completion); 711 wait_for_completion(&device->unreg_completion);
712
713 /* Expedite removing unregistered pointers from the hash table */
714 free_netdevs(device);
685} 715}
686 716
687/* 717/*
@@ -1013,6 +1043,114 @@ int ib_query_port(struct ib_device *device,
1013EXPORT_SYMBOL(ib_query_port); 1043EXPORT_SYMBOL(ib_query_port);
1014 1044
1015/** 1045/**
1046 * ib_device_set_netdev - Associate the ib_dev with an underlying net_device
1047 * @ib_dev: Device to modify
1048 * @ndev: net_device to affiliate, may be NULL
1049 * @port: IB port the net_device is connected to
1050 *
1051 * Drivers should use this to link the ib_device to a netdev so the netdev
1052 * shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be
1053 * affiliated with any port.
1054 *
1055 * The caller must ensure that the given ndev is not unregistered or
1056 * unregistering, and that either the ib_device is unregistered or
1057 * ib_device_set_netdev() is called with NULL when the ndev sends a
1058 * NETDEV_UNREGISTER event.
1059 */
1060int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
1061 unsigned int port)
1062{
1063 struct net_device *old_ndev;
1064 struct ib_port_data *pdata;
1065 unsigned long flags;
1066 int ret;
1067
1068 /*
1069 * Drivers wish to call this before ib_register_driver, so we have to
1070 * setup the port data early.
1071 */
1072 ret = alloc_port_data(ib_dev);
1073 if (ret)
1074 return ret;
1075
1076 if (!rdma_is_port_valid(ib_dev, port))
1077 return -EINVAL;
1078
1079 pdata = &ib_dev->port_data[port];
1080 spin_lock_irqsave(&pdata->netdev_lock, flags);
1081 if (pdata->netdev == ndev) {
1082 spin_unlock_irqrestore(&pdata->netdev_lock, flags);
1083 return 0;
1084 }
1085 old_ndev = pdata->netdev;
1086
1087 if (ndev)
1088 dev_hold(ndev);
1089 pdata->netdev = ndev;
1090 spin_unlock_irqrestore(&pdata->netdev_lock, flags);
1091
1092 if (old_ndev)
1093 dev_put(old_ndev);
1094
1095 return 0;
1096}
1097EXPORT_SYMBOL(ib_device_set_netdev);
1098
1099static void free_netdevs(struct ib_device *ib_dev)
1100{
1101 unsigned long flags;
1102 unsigned int port;
1103
1104 rdma_for_each_port (ib_dev, port) {
1105 struct ib_port_data *pdata = &ib_dev->port_data[port];
1106
1107 spin_lock_irqsave(&pdata->netdev_lock, flags);
1108 if (pdata->netdev) {
1109 dev_put(pdata->netdev);
1110 pdata->netdev = NULL;
1111 }
1112 spin_unlock_irqrestore(&pdata->netdev_lock, flags);
1113 }
1114}
1115
1116struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
1117 unsigned int port)
1118{
1119 struct ib_port_data *pdata;
1120 struct net_device *res;
1121
1122 if (!rdma_is_port_valid(ib_dev, port))
1123 return NULL;
1124
1125 pdata = &ib_dev->port_data[port];
1126
1127 /*
1128 * New drivers should use ib_device_set_netdev() not the legacy
1129 * get_netdev().
1130 */
1131 if (ib_dev->ops.get_netdev)
1132 res = ib_dev->ops.get_netdev(ib_dev, port);
1133 else {
1134 spin_lock(&pdata->netdev_lock);
1135 res = pdata->netdev;
1136 if (res)
1137 dev_hold(res);
1138 spin_unlock(&pdata->netdev_lock);
1139 }
1140
1141 /*
1142 * If we are starting to unregister expedite things by preventing
1143 * propagation of an unregistering netdev.
1144 */
1145 if (res && res->reg_state != NETREG_REGISTERED) {
1146 dev_put(res);
1147 return NULL;
1148 }
1149
1150 return res;
1151}
1152
1153/**
1016 * ib_enum_roce_netdev - enumerate all RoCE ports 1154 * ib_enum_roce_netdev - enumerate all RoCE ports
1017 * @ib_dev : IB device we want to query 1155 * @ib_dev : IB device we want to query
1018 * @filter: Should we call the callback? 1156 * @filter: Should we call the callback?
@@ -1034,16 +1172,8 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
1034 1172
1035 rdma_for_each_port (ib_dev, port) 1173 rdma_for_each_port (ib_dev, port)
1036 if (rdma_protocol_roce(ib_dev, port)) { 1174 if (rdma_protocol_roce(ib_dev, port)) {
1037 struct net_device *idev = NULL; 1175 struct net_device *idev =
1038 1176 ib_device_get_netdev(ib_dev, port);
1039 if (ib_dev->ops.get_netdev)
1040 idev = ib_dev->ops.get_netdev(ib_dev, port);
1041
1042 if (idev &&
1043 idev->reg_state >= NETREG_UNREGISTERED) {
1044 dev_put(idev);
1045 idev = NULL;
1046 }
1047 1177
1048 if (filter(ib_dev, port, idev, filter_cookie)) 1178 if (filter(ib_dev, port, idev, filter_cookie))
1049 cb(ib_dev, port, idev, cookie); 1179 cb(ib_dev, port, idev, cookie);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 85f6f2bcce40..1980ddc5f7bc 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -268,9 +268,7 @@ static int fill_port_info(struct sk_buff *msg,
268 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) 268 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
269 return -EMSGSIZE; 269 return -EMSGSIZE;
270 270
271 if (device->ops.get_netdev) 271 netdev = ib_device_get_netdev(device, port);
272 netdev = device->ops.get_netdev(device, port);
273
274 if (netdev && net_eq(dev_net(netdev), net)) { 272 if (netdev && net_eq(dev_net(netdev), net)) {
275 ret = nla_put_u32(msg, 273 ret = nla_put_u32(msg,
276 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); 274 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index de5d895a5054..5a5e83f5f0fc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1723,10 +1723,7 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
1723 if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) 1723 if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
1724 return -EINVAL; 1724 return -EINVAL;
1725 1725
1726 if (!dev->ops.get_netdev) 1726 netdev = ib_device_get_netdev(dev, port_num);
1727 return -EOPNOTSUPP;
1728
1729 netdev = dev->ops.get_netdev(dev, port_num);
1730 if (!netdev) 1727 if (!netdev)
1731 return -ENODEV; 1728 return -ENODEV;
1732 1729
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 50b7ebc2885e..7f81a313c01b 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2204,6 +2204,9 @@ struct ib_port_data {
2204 struct list_head pkey_list; 2204 struct list_head pkey_list;
2205 2205
2206 struct ib_port_cache cache; 2206 struct ib_port_cache cache;
2207
2208 spinlock_t netdev_lock;
2209 struct net_device *netdev;
2207}; 2210};
2208 2211
2209/* rdma netdev type - specifies protocol type */ 2212/* rdma netdev type - specifies protocol type */
@@ -3996,6 +3999,10 @@ void ib_device_put(struct ib_device *device);
3996struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, 3999struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
3997 u16 pkey, const union ib_gid *gid, 4000 u16 pkey, const union ib_gid *gid,
3998 const struct sockaddr *addr); 4001 const struct sockaddr *addr);
4002int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
4003 unsigned int port);
4004struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
4005
3999struct ib_wq *ib_create_wq(struct ib_pd *pd, 4006struct ib_wq *ib_create_wq(struct ib_pd *pd,
4000 struct ib_wq_init_attr *init_attr); 4007 struct ib_wq_init_attr *init_attr);
4001int ib_destroy_wq(struct ib_wq *wq); 4008int ib_destroy_wq(struct ib_wq *wq);