aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorDoug Ledford <dledford@redhat.com>2013-09-24 17:16:27 -0400
committerRoland Dreier <roland@purestorage.com>2013-11-08 17:42:24 -0500
commit29f27e8477aee4e619889add998a1976d67b60a5 (patch)
tree16fd7eab71eca64f0ecb1996dbd1e54c23adc537 /drivers/infiniband/core
parent959f58544b7f20c92d5eb43d1232c96c15c01bfb (diff)
IB/cma: Use cached gids
The cma_acquire_dev function was changed by commit 3c86aa70bf67 ("RDMA/cm: Add RDMA CM support for IBoE devices") to use find_gid_port() because multiport devices might have either IB or IBoE formatted gids. The old function assumed that all ports on the same device used the same GID format. However, when it was changed to use find_gid_port(), we inadvertently lost usage of the GID cache. This turned out to be a very costly change. In our testing, each iteration through each index of the GID table takes roughly 35us. When you have multiple devices in a system, and the GID you are looking for is on one of the later devices, the code loops through all of the GID indexes on all of the early devices before it finally succeeds on the target device. This pathological search behavior combined with 35us per GID table index retrieval results in results such as the following from the cmtime application that's part of the latest librdmacm git repo: ib1: step total ms max ms min us us / conn create id : 29.42 0.04 1.00 2.94 bind addr : 186705.66 19.00 18556.00 18670.57 resolve addr : 41.93 9.68 619.00 4.19 resolve route: 486.93 0.48 101.00 48.69 create qp : 4021.95 6.18 330.00 402.20 connect : 68350.39 68588.17 24632.00 6835.04 disconnect : 1460.43 252.65-1862269.00 146.04 destroy : 41.16 0.04 2.00 4.12 ib0: step total ms max ms min us us / conn create id : 28.61 0.68 1.00 2.86 bind addr : 2178.86 2.95 201.00 217.89 resolve addr : 51.26 16.85 845.00 5.13 resolve route: 620.08 0.43 92.00 62.01 create qp : 3344.40 6.36 273.00 334.44 connect : 6435.99 6368.53 7844.00 643.60 disconnect : 5095.38 321.90 757.00 509.54 destroy : 37.13 0.02 2.00 3.71 Clearly, both the bind address and connect operations suffer a huge penalty for being anything other than the default GID on the first port in the system. After applying this patch, the numbers now look like this: ib1: step total ms max ms min us us / conn create id : 30.15 0.03 1.00 3.01 bind addr : 80.27 0.04 7.00 8.03 resolve addr : 43.02 13.53 589.00 4.30 resolve route: 482.90 0.45 100.00 48.29 create qp : 3986.55 5.80 330.00 398.66 connect : 7141.53 7051.29 5005.00 714.15 disconnect : 5038.85 193.63 918.00 503.88 destroy : 37.02 0.04 2.00 3.70 ib0: step total ms max ms min us us / conn create id : 34.27 0.05 1.00 3.43 bind addr : 26.45 0.04 1.00 2.64 resolve addr : 38.25 10.54 760.00 3.82 resolve route: 604.79 0.43 97.00 60.48 create qp : 3314.95 6.34 273.00 331.49 connect : 12399.26 12351.10 8609.00 1239.93 disconnect : 5096.76 270.72 1015.00 509.68 destroy : 37.10 0.03 2.00 3.71 It's worth noting that we still suffer a bit of a penalty on connect to the wrong device, but the penalty is much less than it used to be. Follow on patches deal with this penalty. Many thanks to Neil Horman for helping to track the source of slow function that allowed us to track down the fact that the original patch I mentioned above backed out cache usage and identify just how much that impacted the system. Signed-off-by: Doug Ledford <dledford@redhat.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/cma.c35
1 files changed, 6 insertions, 29 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index dab4b41f1715..c62ff9e7f8c6 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -328,28 +328,6 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
328 return ret; 328 return ret;
329} 329}
330 330
331static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
332{
333 int i;
334 int err;
335 struct ib_port_attr props;
336 union ib_gid tmp;
337
338 err = ib_query_port(device, port_num, &props);
339 if (err)
340 return err;
341
342 for (i = 0; i < props.gid_tbl_len; ++i) {
343 err = ib_query_gid(device, port_num, i, &tmp);
344 if (err)
345 return err;
346 if (!memcmp(&tmp, gid, sizeof tmp))
347 return 0;
348 }
349
350 return -EADDRNOTAVAIL;
351}
352
353static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 331static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
354{ 332{
355 dev_addr->dev_type = ARPHRD_INFINIBAND; 333 dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -377,7 +355,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
377 struct cma_device *cma_dev; 355 struct cma_device *cma_dev;
378 union ib_gid gid, iboe_gid; 356 union ib_gid gid, iboe_gid;
379 int ret = -ENODEV; 357 int ret = -ENODEV;
380 u8 port; 358 u8 port, found_port;
381 enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? 359 enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
382 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; 360 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
383 361
@@ -390,20 +368,19 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
390 memcpy(&gid, dev_addr->src_dev_addr + 368 memcpy(&gid, dev_addr->src_dev_addr +
391 rdma_addr_gid_offset(dev_addr), sizeof gid); 369 rdma_addr_gid_offset(dev_addr), sizeof gid);
392 list_for_each_entry(cma_dev, &dev_list, list) { 370 list_for_each_entry(cma_dev, &dev_list, list) {
393 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 371 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port)
394 if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { 372 if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
395 if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && 373 if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
396 rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) 374 rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
397 ret = find_gid_port(cma_dev->device, &iboe_gid, port); 375 ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
398 else 376 else
399 ret = find_gid_port(cma_dev->device, &gid, port); 377 ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
400 378
401 if (!ret) { 379 if (!ret && (port == found_port)) {
402 id_priv->id.port_num = port; 380 id_priv->id.port_num = found_port;
403 goto out; 381 goto out;
404 } 382 }
405 } 383 }
406 }
407 } 384 }
408 385
409out: 386out: