aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-18 12:39:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-18 12:39:22 -0400
commit9ea446352047d8350553250db51da2c73a610688 (patch)
treef75712fb5bfb5d2a83685023b2838f3c9ea40320
parent9dffdb38d864ae89e16ff7b3a09451270736e35b (diff)
parent082eaa50838c6b70a8244f8b01d7ed7d686f84db (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "Initial roundup of 4.6 merge window patches. This is the first of two pull requests. It is the smaller request, but touches for more different things (this is everything but what is in or going into staging). The pull request for the code in staging/rdma is on hold until after we decide what to do on the write/writev API issue and may be partially deferred until 4.7 as a result. Summary: - cxgb4 updates - nes updates - unification of iwarp portmapper code to core - add drain_cq API - various ib_core updates - minor ipoib updates - minor mlx4 updates - more significant mlx5 updates (including a minor merge conflict with net-next tree...merge is simple to resolve and Stephen's resolution was confirmed by Mellanox) - trivial net/9p rdma conversion - ocrdma RoCEv2 update - srpt updates" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (85 commits) iwpm: crash fix for large connections test iw_cxgb3: support for iWARP port mapping iw_cxgb4: remove port mapper related code iw_nes: remove port mapper related code iwcm: common code for port mapper net/9p: convert to new CQ API IB/mlx5: Add support for don't trap rules net/mlx5_core: Introduce forward to next priority action net/mlx5_core: Create anchor of last flow table iser: Accept arbitrary sg lists mapping if the device supports it mlx5: Add arbitrary sg list support IB/core: Add arbitrary sg_list support IB/mlx5: Expose correct max_fast_reg_page_list_len IB/mlx5: Make coding style more consistent IB/mlx5: Convert UMR CQ to new CQ API IB/ocrdma: Skip using unneeded intermediate variable IB/ocrdma: Skip using unneeded intermediate variable IB/ocrdma: Delete unnecessary variable initialisations in 11 functions IB/core: Documentation fix in the MAD header file IB/core: trivial prink cleanup. ...
-rw-r--r--drivers/infiniband/core/cache.c15
-rw-r--r--drivers/infiniband/core/cma.c22
-rw-r--r--drivers/infiniband/core/device.c28
-rw-r--r--drivers/infiniband/core/fmr_pool.c37
-rw-r--r--drivers/infiniband/core/iwcm.c190
-rw-r--r--drivers/infiniband/core/iwpm_msg.c12
-rw-r--r--drivers/infiniband/core/iwpm_util.c14
-rw-r--r--drivers/infiniband/core/iwpm_util.h2
-rw-r--r--drivers/infiniband/core/packer.c14
-rw-r--r--drivers/infiniband/core/sa_query.c13
-rw-r--r--drivers/infiniband/core/ucm.c8
-rw-r--r--drivers/infiniband/core/ucma.c6
-rw-r--r--drivers/infiniband/core/ud_header.c23
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c16
-rw-r--r--drivers/infiniband/core/uverbs_main.c80
-rw-r--r--drivers/infiniband/core/verbs.c166
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c16
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c274
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c9
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c72
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h49
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c12
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c107
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c6
-rw-r--r--drivers/infiniband/hw/mlx4/main.c72
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c4
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c104
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c548
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c166
-rw-r--r--drivers/infiniband/hw/mlx5/main.c119
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h108
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c601
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c10
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c271
-rw-r--r--drivers/infiniband/hw/mlx5/user.h7
-rw-r--r--drivers/infiniband/hw/nes/Kconfig1
-rw-r--r--drivers/infiniband/hw/nes/nes.c25
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c361
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h11
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c44
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h7
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c7
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c77
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c33
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h16
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c38
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c23
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c5
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c11
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h7
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c7
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c38
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c40
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c912
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h31
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_msg.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mcg.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c225
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c40
-rw-r--r--include/linux/mlx4/device.h3
-rw-r--r--include/linux/mlx5/device.h33
-rw-r--r--include/linux/mlx5/driver.h26
-rw-r--r--include/linux/mlx5/fs.h5
-rw-r--r--include/linux/mlx5/mlx5_ifc.h51
-rw-r--r--include/linux/mlx5/qp.h7
-rw-r--r--include/linux/mlx5/vport.h2
-rw-r--r--include/rdma/ib_mad.h4
-rw-r--r--include/rdma/ib_verbs.h19
-rw-r--r--include/rdma/iw_cm.h6
-rw-r--r--include/uapi/rdma/rdma_netlink.h4
-rw-r--r--net/9p/trans_rdma.c86
90 files changed, 3618 insertions, 1973 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 53343ffbff7a..cb00d59da456 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1043,8 +1043,8 @@ static void ib_cache_update(struct ib_device *device,
1043 1043
1044 ret = ib_query_port(device, port, tprops); 1044 ret = ib_query_port(device, port, tprops);
1045 if (ret) { 1045 if (ret) {
1046 printk(KERN_WARNING "ib_query_port failed (%d) for %s\n", 1046 pr_warn("ib_query_port failed (%d) for %s\n",
1047 ret, device->name); 1047 ret, device->name);
1048 goto err; 1048 goto err;
1049 } 1049 }
1050 1050
@@ -1067,8 +1067,8 @@ static void ib_cache_update(struct ib_device *device,
1067 for (i = 0; i < pkey_cache->table_len; ++i) { 1067 for (i = 0; i < pkey_cache->table_len; ++i) {
1068 ret = ib_query_pkey(device, port, i, pkey_cache->table + i); 1068 ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1069 if (ret) { 1069 if (ret) {
1070 printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n", 1070 pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
1071 ret, device->name, i); 1071 ret, device->name, i);
1072 goto err; 1072 goto err;
1073 } 1073 }
1074 } 1074 }
@@ -1078,8 +1078,8 @@ static void ib_cache_update(struct ib_device *device,
1078 ret = ib_query_gid(device, port, i, 1078 ret = ib_query_gid(device, port, i,
1079 gid_cache->table + i, NULL); 1079 gid_cache->table + i, NULL);
1080 if (ret) { 1080 if (ret) {
1081 printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", 1081 pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
1082 ret, device->name, i); 1082 ret, device->name, i);
1083 goto err; 1083 goto err;
1084 } 1084 }
1085 } 1085 }
@@ -1161,8 +1161,7 @@ int ib_cache_setup_one(struct ib_device *device)
1161 GFP_KERNEL); 1161 GFP_KERNEL);
1162 if (!device->cache.pkey_cache || 1162 if (!device->cache.pkey_cache ||
1163 !device->cache.lmc_cache) { 1163 !device->cache.lmc_cache) {
1164 printk(KERN_WARNING "Couldn't allocate cache " 1164 pr_warn("Couldn't allocate cache for %s\n", device->name);
1165 "for %s\n", device->name);
1166 return -ENOMEM; 1165 return -ENOMEM;
1167 } 1166 }
1168 1167
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9729639df407..93ab0ae97208 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1206,6 +1206,10 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
1206 req->has_gid = true; 1206 req->has_gid = true;
1207 req->service_id = req_param->primary_path->service_id; 1207 req->service_id = req_param->primary_path->service_id;
1208 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1208 req->pkey = be16_to_cpu(req_param->primary_path->pkey);
1209 if (req->pkey != req_param->bth_pkey)
1210 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
1211 "RDMA CMA: in the future this may cause the request to be dropped\n",
1212 req_param->bth_pkey, req->pkey);
1209 break; 1213 break;
1210 case IB_CM_SIDR_REQ_RECEIVED: 1214 case IB_CM_SIDR_REQ_RECEIVED:
1211 req->device = sidr_param->listen_id->device; 1215 req->device = sidr_param->listen_id->device;
@@ -1213,6 +1217,10 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
1213 req->has_gid = false; 1217 req->has_gid = false;
1214 req->service_id = sidr_param->service_id; 1218 req->service_id = sidr_param->service_id;
1215 req->pkey = sidr_param->pkey; 1219 req->pkey = sidr_param->pkey;
1220 if (req->pkey != sidr_param->bth_pkey)
1221 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n"
1222 "RDMA CMA: in the future this may cause the request to be dropped\n",
1223 sidr_param->bth_pkey, req->pkey);
1216 break; 1224 break;
1217 default: 1225 default:
1218 return -EINVAL; 1226 return -EINVAL;
@@ -1713,7 +1721,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1713 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 1721 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1714 break; 1722 break;
1715 default: 1723 default:
1716 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", 1724 pr_err("RDMA CMA: unexpected IB CM event: %d\n",
1717 ib_event->event); 1725 ib_event->event);
1718 goto out; 1726 goto out;
1719 } 1727 }
@@ -2186,8 +2194,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2186 2194
2187 ret = rdma_listen(id, id_priv->backlog); 2195 ret = rdma_listen(id, id_priv->backlog);
2188 if (ret) 2196 if (ret)
2189 printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, " 2197 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
2190 "listening on device %s\n", ret, cma_dev->device->name); 2198 ret, cma_dev->device->name);
2191} 2199}
2192 2200
2193static void cma_listen_on_all(struct rdma_id_private *id_priv) 2201static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -3239,7 +3247,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
3239 event.status = 0; 3247 event.status = 0;
3240 break; 3248 break;
3241 default: 3249 default:
3242 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n", 3250 pr_err("RDMA CMA: unexpected IB CM event: %d\n",
3243 ib_event->event); 3251 ib_event->event);
3244 goto out; 3252 goto out;
3245 } 3253 }
@@ -4003,8 +4011,8 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
4003 if ((dev_addr->bound_dev_if == ndev->ifindex) && 4011 if ((dev_addr->bound_dev_if == ndev->ifindex) &&
4004 (net_eq(dev_net(ndev), dev_addr->net)) && 4012 (net_eq(dev_net(ndev), dev_addr->net)) &&
4005 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 4013 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
4006 printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", 4014 pr_info("RDMA CM addr change for ndev %s used by id %p\n",
4007 ndev->name, &id_priv->id); 4015 ndev->name, &id_priv->id);
4008 work = kzalloc(sizeof *work, GFP_KERNEL); 4016 work = kzalloc(sizeof *work, GFP_KERNEL);
4009 if (!work) 4017 if (!work)
4010 return -ENOMEM; 4018 return -ENOMEM;
@@ -4287,7 +4295,7 @@ static int __init cma_init(void)
4287 goto err; 4295 goto err;
4288 4296
4289 if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table)) 4297 if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
4290 printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n"); 4298 pr_warn("RDMA CMA: failed to add netlink callback\n");
4291 cma_configfs_init(); 4299 cma_configfs_init();
4292 4300
4293 return 0; 4301 return 0;
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 94b80a51ab68..270c7ff6cba7 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -115,8 +115,8 @@ static int ib_device_check_mandatory(struct ib_device *device)
115 115
116 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { 116 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
117 if (!*(void **) ((void *) device + mandatory_table[i].offset)) { 117 if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
118 printk(KERN_WARNING "Device %s is missing mandatory function %s\n", 118 pr_warn("Device %s is missing mandatory function %s\n",
119 device->name, mandatory_table[i].name); 119 device->name, mandatory_table[i].name);
120 return -EINVAL; 120 return -EINVAL;
121 } 121 }
122 } 122 }
@@ -255,8 +255,8 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
255 255
256 context = kmalloc(sizeof *context, GFP_KERNEL); 256 context = kmalloc(sizeof *context, GFP_KERNEL);
257 if (!context) { 257 if (!context) {
258 printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n", 258 pr_warn("Couldn't allocate client context for %s/%s\n",
259 device->name, client->name); 259 device->name, client->name);
260 return -ENOMEM; 260 return -ENOMEM;
261 } 261 }
262 262
@@ -343,29 +343,29 @@ int ib_register_device(struct ib_device *device,
343 343
344 ret = read_port_immutable(device); 344 ret = read_port_immutable(device);
345 if (ret) { 345 if (ret) {
346 printk(KERN_WARNING "Couldn't create per port immutable data %s\n", 346 pr_warn("Couldn't create per port immutable data %s\n",
347 device->name); 347 device->name);
348 goto out; 348 goto out;
349 } 349 }
350 350
351 ret = ib_cache_setup_one(device); 351 ret = ib_cache_setup_one(device);
352 if (ret) { 352 if (ret) {
353 printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); 353 pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
354 goto out; 354 goto out;
355 } 355 }
356 356
357 memset(&device->attrs, 0, sizeof(device->attrs)); 357 memset(&device->attrs, 0, sizeof(device->attrs));
358 ret = device->query_device(device, &device->attrs, &uhw); 358 ret = device->query_device(device, &device->attrs, &uhw);
359 if (ret) { 359 if (ret) {
360 printk(KERN_WARNING "Couldn't query the device attributes\n"); 360 pr_warn("Couldn't query the device attributes\n");
361 ib_cache_cleanup_one(device); 361 ib_cache_cleanup_one(device);
362 goto out; 362 goto out;
363 } 363 }
364 364
365 ret = ib_device_register_sysfs(device, port_callback); 365 ret = ib_device_register_sysfs(device, port_callback);
366 if (ret) { 366 if (ret) {
367 printk(KERN_WARNING "Couldn't register device %s with driver model\n", 367 pr_warn("Couldn't register device %s with driver model\n",
368 device->name); 368 device->name);
369 ib_cache_cleanup_one(device); 369 ib_cache_cleanup_one(device);
370 goto out; 370 goto out;
371 } 371 }
@@ -566,8 +566,8 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
566 goto out; 566 goto out;
567 } 567 }
568 568
569 printk(KERN_WARNING "No client context found for %s/%s\n", 569 pr_warn("No client context found for %s/%s\n",
570 device->name, client->name); 570 device->name, client->name);
571 571
572out: 572out:
573 spin_unlock_irqrestore(&device->client_data_lock, flags); 573 spin_unlock_irqrestore(&device->client_data_lock, flags);
@@ -960,13 +960,13 @@ static int __init ib_core_init(void)
960 960
961 ret = class_register(&ib_class); 961 ret = class_register(&ib_class);
962 if (ret) { 962 if (ret) {
963 printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); 963 pr_warn("Couldn't create InfiniBand device class\n");
964 goto err_comp; 964 goto err_comp;
965 } 965 }
966 966
967 ret = ibnl_init(); 967 ret = ibnl_init();
968 if (ret) { 968 if (ret) {
969 printk(KERN_WARNING "Couldn't init IB netlink interface\n"); 969 pr_warn("Couldn't init IB netlink interface\n");
970 goto err_sysfs; 970 goto err_sysfs;
971 } 971 }
972 972
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 6ac3683c144b..cdbb1f1a6d97 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -150,8 +150,8 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
150 150
151#ifdef DEBUG 151#ifdef DEBUG
152 if (fmr->ref_count !=0) { 152 if (fmr->ref_count !=0) {
153 printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n", 153 pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n",
154 fmr, fmr->ref_count); 154 fmr, fmr->ref_count);
155 } 155 }
156#endif 156#endif
157 } 157 }
@@ -167,7 +167,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
167 167
168 ret = ib_unmap_fmr(&fmr_list); 168 ret = ib_unmap_fmr(&fmr_list);
169 if (ret) 169 if (ret)
170 printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret); 170 pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
171 171
172 spin_lock_irq(&pool->pool_lock); 172 spin_lock_irq(&pool->pool_lock);
173 list_splice(&unmap_list, &pool->free_list); 173 list_splice(&unmap_list, &pool->free_list);
@@ -222,8 +222,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
222 device = pd->device; 222 device = pd->device;
223 if (!device->alloc_fmr || !device->dealloc_fmr || 223 if (!device->alloc_fmr || !device->dealloc_fmr ||
224 !device->map_phys_fmr || !device->unmap_fmr) { 224 !device->map_phys_fmr || !device->unmap_fmr) {
225 printk(KERN_INFO PFX "Device %s does not support FMRs\n", 225 pr_info(PFX "Device %s does not support FMRs\n", device->name);
226 device->name);
227 return ERR_PTR(-ENOSYS); 226 return ERR_PTR(-ENOSYS);
228 } 227 }
229 228
@@ -233,13 +232,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
233 max_remaps = device->attrs.max_map_per_fmr; 232 max_remaps = device->attrs.max_map_per_fmr;
234 233
235 pool = kmalloc(sizeof *pool, GFP_KERNEL); 234 pool = kmalloc(sizeof *pool, GFP_KERNEL);
236 if (!pool) { 235 if (!pool)
237 printk(KERN_WARNING PFX "couldn't allocate pool struct\n");
238 return ERR_PTR(-ENOMEM); 236 return ERR_PTR(-ENOMEM);
239 }
240 237
241 pool->cache_bucket = NULL; 238 pool->cache_bucket = NULL;
242
243 pool->flush_function = params->flush_function; 239 pool->flush_function = params->flush_function;
244 pool->flush_arg = params->flush_arg; 240 pool->flush_arg = params->flush_arg;
245 241
@@ -251,7 +247,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
251 kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, 247 kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
252 GFP_KERNEL); 248 GFP_KERNEL);
253 if (!pool->cache_bucket) { 249 if (!pool->cache_bucket) {
254 printk(KERN_WARNING PFX "Failed to allocate cache in pool\n"); 250 pr_warn(PFX "Failed to allocate cache in pool\n");
255 ret = -ENOMEM; 251 ret = -ENOMEM;
256 goto out_free_pool; 252 goto out_free_pool;
257 } 253 }
@@ -275,7 +271,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
275 "ib_fmr(%s)", 271 "ib_fmr(%s)",
276 device->name); 272 device->name);
277 if (IS_ERR(pool->thread)) { 273 if (IS_ERR(pool->thread)) {
278 printk(KERN_WARNING PFX "couldn't start cleanup thread\n"); 274 pr_warn(PFX "couldn't start cleanup thread\n");
279 ret = PTR_ERR(pool->thread); 275 ret = PTR_ERR(pool->thread);
280 goto out_free_pool; 276 goto out_free_pool;
281 } 277 }
@@ -294,11 +290,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
294 290
295 for (i = 0; i < params->pool_size; ++i) { 291 for (i = 0; i < params->pool_size; ++i) {
296 fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); 292 fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
297 if (!fmr) { 293 if (!fmr)
298 printk(KERN_WARNING PFX "failed to allocate fmr "
299 "struct for FMR %d\n", i);
300 goto out_fail; 294 goto out_fail;
301 }
302 295
303 fmr->pool = pool; 296 fmr->pool = pool;
304 fmr->remap_count = 0; 297 fmr->remap_count = 0;
@@ -307,8 +300,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
307 300
308 fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); 301 fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
309 if (IS_ERR(fmr->fmr)) { 302 if (IS_ERR(fmr->fmr)) {
310 printk(KERN_WARNING PFX "fmr_create failed " 303 pr_warn(PFX "fmr_create failed for FMR %d\n",
311 "for FMR %d\n", i); 304 i);
312 kfree(fmr); 305 kfree(fmr);
313 goto out_fail; 306 goto out_fail;
314 } 307 }
@@ -363,8 +356,8 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
363 } 356 }
364 357
365 if (i < pool->pool_size) 358 if (i < pool->pool_size)
366 printk(KERN_WARNING PFX "pool still has %d regions registered\n", 359 pr_warn(PFX "pool still has %d regions registered\n",
367 pool->pool_size - i); 360 pool->pool_size - i);
368 361
369 kfree(pool->cache_bucket); 362 kfree(pool->cache_bucket);
370 kfree(pool); 363 kfree(pool);
@@ -463,7 +456,7 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
463 list_add(&fmr->list, &pool->free_list); 456 list_add(&fmr->list, &pool->free_list);
464 spin_unlock_irqrestore(&pool->pool_lock, flags); 457 spin_unlock_irqrestore(&pool->pool_lock, flags);
465 458
466 printk(KERN_WARNING PFX "fmr_map returns %d\n", result); 459 pr_warn(PFX "fmr_map returns %d\n", result);
467 460
468 return ERR_PTR(result); 461 return ERR_PTR(result);
469 } 462 }
@@ -517,8 +510,8 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
517 510
518#ifdef DEBUG 511#ifdef DEBUG
519 if (fmr->ref_count < 0) 512 if (fmr->ref_count < 0)
520 printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n", 513 pr_warn(PFX "FMR %p has ref count %d < 0\n",
521 fmr, fmr->ref_count); 514 fmr, fmr->ref_count);
522#endif 515#endif
523 516
524 spin_unlock_irqrestore(&pool->pool_lock, flags); 517 spin_unlock_irqrestore(&pool->pool_lock, flags);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index ff9163dc1596..e28a160cdab0 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -50,6 +50,8 @@
50 50
51#include <rdma/iw_cm.h> 51#include <rdma/iw_cm.h>
52#include <rdma/ib_addr.h> 52#include <rdma/ib_addr.h>
53#include <rdma/iw_portmap.h>
54#include <rdma/rdma_netlink.h>
53 55
54#include "iwcm.h" 56#include "iwcm.h"
55 57
@@ -57,6 +59,16 @@ MODULE_AUTHOR("Tom Tucker");
57MODULE_DESCRIPTION("iWARP CM"); 59MODULE_DESCRIPTION("iWARP CM");
58MODULE_LICENSE("Dual BSD/GPL"); 60MODULE_LICENSE("Dual BSD/GPL");
59 61
62static struct ibnl_client_cbs iwcm_nl_cb_table[] = {
63 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
64 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
65 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
66 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
67 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
68 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
69 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
70};
71
60static struct workqueue_struct *iwcm_wq; 72static struct workqueue_struct *iwcm_wq;
61struct iwcm_work { 73struct iwcm_work {
62 struct work_struct work; 74 struct work_struct work;
@@ -402,6 +414,11 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
402 } 414 }
403 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 415 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
404 416
417 if (cm_id->mapped) {
418 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
419 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM);
420 }
421
405 (void)iwcm_deref_id(cm_id_priv); 422 (void)iwcm_deref_id(cm_id_priv);
406} 423}
407 424
@@ -426,6 +443,97 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
426} 443}
427EXPORT_SYMBOL(iw_destroy_cm_id); 444EXPORT_SYMBOL(iw_destroy_cm_id);
428 445
446/**
447 * iw_cm_check_wildcard - If IP address is 0 then use original
448 * @pm_addr: sockaddr containing the ip to check for wildcard
449 * @cm_addr: sockaddr containing the actual IP address
450 * @cm_outaddr: sockaddr to set IP addr which leaving port
451 *
452 * Checks the pm_addr for wildcard and then sets cm_outaddr's
453 * IP to the actual (cm_addr).
454 */
455static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr,
456 struct sockaddr_storage *cm_addr,
457 struct sockaddr_storage *cm_outaddr)
458{
459 if (pm_addr->ss_family == AF_INET) {
460 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr;
461
462 if (pm4_addr->sin_addr.s_addr == INADDR_ANY) {
463 struct sockaddr_in *cm4_addr =
464 (struct sockaddr_in *)cm_addr;
465 struct sockaddr_in *cm4_outaddr =
466 (struct sockaddr_in *)cm_outaddr;
467
468 cm4_outaddr->sin_addr = cm4_addr->sin_addr;
469 }
470 } else {
471 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr;
472
473 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) {
474 struct sockaddr_in6 *cm6_addr =
475 (struct sockaddr_in6 *)cm_addr;
476 struct sockaddr_in6 *cm6_outaddr =
477 (struct sockaddr_in6 *)cm_outaddr;
478
479 cm6_outaddr->sin6_addr = cm6_addr->sin6_addr;
480 }
481 }
482}
483
484/**
485 * iw_cm_map - Use portmapper to map the ports
486 * @cm_id: connection manager pointer
487 * @active: Indicates the active side when true
488 * returns nonzero for error only if iwpm_create_mapinfo() fails
489 *
490 * Tries to add a mapping for a port using the Portmapper. If
491 * successful in mapping the IP/Port it will check the remote
492 * mapped IP address for a wildcard IP address and replace the
493 * zero IP address with the remote_addr.
494 */
495static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
496{
497 struct iwpm_dev_data pm_reg_msg;
498 struct iwpm_sa_data pm_msg;
499 int status;
500
501 cm_id->m_local_addr = cm_id->local_addr;
502 cm_id->m_remote_addr = cm_id->remote_addr;
503
504 memcpy(pm_reg_msg.dev_name, cm_id->device->name,
505 sizeof(pm_reg_msg.dev_name));
506 memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
507 sizeof(pm_reg_msg.if_name));
508
509 if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) ||
510 !iwpm_valid_pid())
511 return 0;
512
513 cm_id->mapped = true;
514 pm_msg.loc_addr = cm_id->local_addr;
515 pm_msg.rem_addr = cm_id->remote_addr;
516 if (active)
517 status = iwpm_add_and_query_mapping(&pm_msg,
518 RDMA_NL_IWCM);
519 else
520 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM);
521
522 if (!status) {
523 cm_id->m_local_addr = pm_msg.mapped_loc_addr;
524 if (active) {
525 cm_id->m_remote_addr = pm_msg.mapped_rem_addr;
526 iw_cm_check_wildcard(&pm_msg.mapped_rem_addr,
527 &cm_id->remote_addr,
528 &cm_id->m_remote_addr);
529 }
530 }
531
532 return iwpm_create_mapinfo(&cm_id->local_addr,
533 &cm_id->m_local_addr,
534 RDMA_NL_IWCM);
535}
536
429/* 537/*
430 * CM_ID <-- LISTEN 538 * CM_ID <-- LISTEN
431 * 539 *
@@ -452,7 +560,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
452 case IW_CM_STATE_IDLE: 560 case IW_CM_STATE_IDLE:
453 cm_id_priv->state = IW_CM_STATE_LISTEN; 561 cm_id_priv->state = IW_CM_STATE_LISTEN;
454 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 562 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
455 ret = cm_id->device->iwcm->create_listen(cm_id, backlog); 563 ret = iw_cm_map(cm_id, false);
564 if (!ret)
565 ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
456 if (ret) 566 if (ret)
457 cm_id_priv->state = IW_CM_STATE_IDLE; 567 cm_id_priv->state = IW_CM_STATE_IDLE;
458 spin_lock_irqsave(&cm_id_priv->lock, flags); 568 spin_lock_irqsave(&cm_id_priv->lock, flags);
@@ -582,39 +692,37 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
582 spin_lock_irqsave(&cm_id_priv->lock, flags); 692 spin_lock_irqsave(&cm_id_priv->lock, flags);
583 693
584 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 694 if (cm_id_priv->state != IW_CM_STATE_IDLE) {
585 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 695 ret = -EINVAL;
586 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 696 goto err;
587 wake_up_all(&cm_id_priv->connect_wait);
588 return -EINVAL;
589 } 697 }
590 698
591 /* Get the ib_qp given the QPN */ 699 /* Get the ib_qp given the QPN */
592 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 700 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
593 if (!qp) { 701 if (!qp) {
594 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 702 ret = -EINVAL;
595 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 703 goto err;
596 wake_up_all(&cm_id_priv->connect_wait);
597 return -EINVAL;
598 } 704 }
599 cm_id->device->iwcm->add_ref(qp); 705 cm_id->device->iwcm->add_ref(qp);
600 cm_id_priv->qp = qp; 706 cm_id_priv->qp = qp;
601 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 707 cm_id_priv->state = IW_CM_STATE_CONN_SENT;
602 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 708 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
603 709
604 ret = cm_id->device->iwcm->connect(cm_id, iw_param); 710 ret = iw_cm_map(cm_id, true);
605 if (ret) { 711 if (!ret)
606 spin_lock_irqsave(&cm_id_priv->lock, flags); 712 ret = cm_id->device->iwcm->connect(cm_id, iw_param);
607 if (cm_id_priv->qp) { 713 if (!ret)
608 cm_id->device->iwcm->rem_ref(qp); 714 return 0; /* success */
609 cm_id_priv->qp = NULL;
610 }
611 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
612 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
613 cm_id_priv->state = IW_CM_STATE_IDLE;
614 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
615 wake_up_all(&cm_id_priv->connect_wait);
616 }
617 715
716 spin_lock_irqsave(&cm_id_priv->lock, flags);
717 if (cm_id_priv->qp) {
718 cm_id->device->iwcm->rem_ref(qp);
719 cm_id_priv->qp = NULL;
720 }
721 cm_id_priv->state = IW_CM_STATE_IDLE;
722err:
723 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
724 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
725 wake_up_all(&cm_id_priv->connect_wait);
618 return ret; 726 return ret;
619} 727}
620EXPORT_SYMBOL(iw_cm_connect); 728EXPORT_SYMBOL(iw_cm_connect);
@@ -656,8 +764,23 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
656 goto out; 764 goto out;
657 765
658 cm_id->provider_data = iw_event->provider_data; 766 cm_id->provider_data = iw_event->provider_data;
659 cm_id->local_addr = iw_event->local_addr; 767 cm_id->m_local_addr = iw_event->local_addr;
660 cm_id->remote_addr = iw_event->remote_addr; 768 cm_id->m_remote_addr = iw_event->remote_addr;
769 cm_id->local_addr = listen_id_priv->id.local_addr;
770
771 ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr,
772 &iw_event->remote_addr,
773 &cm_id->remote_addr,
774 RDMA_NL_IWCM);
775 if (ret) {
776 cm_id->remote_addr = iw_event->remote_addr;
777 } else {
778 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr,
779 &iw_event->local_addr,
780 &cm_id->local_addr);
781 iw_event->local_addr = cm_id->local_addr;
782 iw_event->remote_addr = cm_id->remote_addr;
783 }
661 784
662 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 785 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
663 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 786 cm_id_priv->state = IW_CM_STATE_CONN_RECV;
@@ -753,8 +876,10 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
753 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 876 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
754 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 877 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
755 if (iw_event->status == 0) { 878 if (iw_event->status == 0) {
756 cm_id_priv->id.local_addr = iw_event->local_addr; 879 cm_id_priv->id.m_local_addr = iw_event->local_addr;
757 cm_id_priv->id.remote_addr = iw_event->remote_addr; 880 cm_id_priv->id.m_remote_addr = iw_event->remote_addr;
881 iw_event->local_addr = cm_id_priv->id.local_addr;
882 iw_event->remote_addr = cm_id_priv->id.remote_addr;
758 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 883 cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
759 } else { 884 } else {
760 /* REJECTED or RESET */ 885 /* REJECTED or RESET */
@@ -1044,6 +1169,17 @@ EXPORT_SYMBOL(iw_cm_init_qp_attr);
1044 1169
1045static int __init iw_cm_init(void) 1170static int __init iw_cm_init(void)
1046{ 1171{
1172 int ret;
1173
1174 ret = iwpm_init(RDMA_NL_IWCM);
1175 if (ret)
1176 pr_err("iw_cm: couldn't init iwpm\n");
1177
1178 ret = ibnl_add_client(RDMA_NL_IWCM, RDMA_NL_IWPM_NUM_OPS,
1179 iwcm_nl_cb_table);
1180 if (ret)
1181 pr_err("iw_cm: couldn't register netlink callbacks\n");
1182
1047 iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); 1183 iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
1048 if (!iwcm_wq) 1184 if (!iwcm_wq)
1049 return -ENOMEM; 1185 return -ENOMEM;
@@ -1063,6 +1199,8 @@ static void __exit iw_cm_cleanup(void)
1063{ 1199{
1064 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1200 unregister_net_sysctl_table(iwcm_ctl_table_hdr);
1065 destroy_workqueue(iwcm_wq); 1201 destroy_workqueue(iwcm_wq);
1202 ibnl_remove_client(RDMA_NL_IWCM);
1203 iwpm_exit(RDMA_NL_IWCM);
1066} 1204}
1067 1205
1068module_init(iw_cm_init); 1206module_init(iw_cm_init);
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 22a3abee2a54..43e3fa27102b 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -88,8 +88,8 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
88 ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ); 88 ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ);
89 if (ret) 89 if (ret)
90 goto pid_query_error; 90 goto pid_query_error;
91 ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE, 91 ret = ibnl_put_attr(skb, nlh, IFNAMSIZ,
92 pm_msg->if_name, IWPM_NLA_REG_IF_NAME); 92 pm_msg->if_name, IWPM_NLA_REG_IF_NAME);
93 if (ret) 93 if (ret)
94 goto pid_query_error; 94 goto pid_query_error;
95 ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE, 95 ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE,
@@ -394,7 +394,7 @@ register_pid_response_exit:
394 /* always for found nlmsg_request */ 394 /* always for found nlmsg_request */
395 kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); 395 kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
396 barrier(); 396 barrier();
397 wake_up(&nlmsg_request->waitq); 397 up(&nlmsg_request->sem);
398 return 0; 398 return 0;
399} 399}
400EXPORT_SYMBOL(iwpm_register_pid_cb); 400EXPORT_SYMBOL(iwpm_register_pid_cb);
@@ -463,7 +463,7 @@ add_mapping_response_exit:
463 /* always for found request */ 463 /* always for found request */
464 kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); 464 kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
465 barrier(); 465 barrier();
466 wake_up(&nlmsg_request->waitq); 466 up(&nlmsg_request->sem);
467 return 0; 467 return 0;
468} 468}
469EXPORT_SYMBOL(iwpm_add_mapping_cb); 469EXPORT_SYMBOL(iwpm_add_mapping_cb);
@@ -555,7 +555,7 @@ query_mapping_response_exit:
555 /* always for found request */ 555 /* always for found request */
556 kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); 556 kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
557 barrier(); 557 barrier();
558 wake_up(&nlmsg_request->waitq); 558 up(&nlmsg_request->sem);
559 return 0; 559 return 0;
560} 560}
561EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); 561EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
@@ -749,7 +749,7 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
749 /* always for found request */ 749 /* always for found request */
750 kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); 750 kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
751 barrier(); 751 barrier();
752 wake_up(&nlmsg_request->waitq); 752 up(&nlmsg_request->sem);
753 return 0; 753 return 0;
754} 754}
755EXPORT_SYMBOL(iwpm_mapping_error_cb); 755EXPORT_SYMBOL(iwpm_mapping_error_cb);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 5fb089e91353..9b2bf2fb2b00 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -254,9 +254,9 @@ void iwpm_add_remote_info(struct iwpm_remote_info *rem_info)
254} 254}
255 255
256int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr, 256int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
257 struct sockaddr_storage *mapped_rem_addr, 257 struct sockaddr_storage *mapped_rem_addr,
258 struct sockaddr_storage *remote_addr, 258 struct sockaddr_storage *remote_addr,
259 u8 nl_client) 259 u8 nl_client)
260{ 260{
261 struct hlist_node *tmp_hlist_node; 261 struct hlist_node *tmp_hlist_node;
262 struct hlist_head *hash_bucket_head; 262 struct hlist_head *hash_bucket_head;
@@ -322,6 +322,8 @@ struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
322 nlmsg_request->nl_client = nl_client; 322 nlmsg_request->nl_client = nl_client;
323 nlmsg_request->request_done = 0; 323 nlmsg_request->request_done = 0;
324 nlmsg_request->err_code = 0; 324 nlmsg_request->err_code = 0;
325 sema_init(&nlmsg_request->sem, 1);
326 down(&nlmsg_request->sem);
325 return nlmsg_request; 327 return nlmsg_request;
326} 328}
327 329
@@ -364,11 +366,9 @@ struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq)
364int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request) 366int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request)
365{ 367{
366 int ret; 368 int ret;
367 init_waitqueue_head(&nlmsg_request->waitq);
368 369
369 ret = wait_event_timeout(nlmsg_request->waitq, 370 ret = down_timeout(&nlmsg_request->sem, IWPM_NL_TIMEOUT);
370 (nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT); 371 if (ret) {
371 if (!ret) {
372 ret = -EINVAL; 372 ret = -EINVAL;
373 pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n", 373 pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n",
374 __func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq); 374 __func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq);
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index b7b9e194ce81..af1fc14a0d3d 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -69,7 +69,7 @@ struct iwpm_nlmsg_request {
69 u8 nl_client; 69 u8 nl_client;
70 u8 request_done; 70 u8 request_done;
71 u16 err_code; 71 u16 err_code;
72 wait_queue_head_t waitq; 72 struct semaphore sem;
73 struct kref kref; 73 struct kref kref;
74}; 74};
75 75
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c
index 1b65986c0be3..19b1ee3279b4 100644
--- a/drivers/infiniband/core/packer.c
+++ b/drivers/infiniband/core/packer.c
@@ -44,7 +44,7 @@ static u64 value_read(int offset, int size, void *structure)
44 case 4: return be32_to_cpup((__be32 *) (structure + offset)); 44 case 4: return be32_to_cpup((__be32 *) (structure + offset));
45 case 8: return be64_to_cpup((__be64 *) (structure + offset)); 45 case 8: return be64_to_cpup((__be64 *) (structure + offset));
46 default: 46 default:
47 printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); 47 pr_warn("Field size %d bits not handled\n", size * 8);
48 return 0; 48 return 0;
49 } 49 }
50} 50}
@@ -104,9 +104,8 @@ void ib_pack(const struct ib_field *desc,
104 } else { 104 } else {
105 if (desc[i].offset_bits % 8 || 105 if (desc[i].offset_bits % 8 ||
106 desc[i].size_bits % 8) { 106 desc[i].size_bits % 8) {
107 printk(KERN_WARNING "Structure field %s of size %d " 107 pr_warn("Structure field %s of size %d bits is not byte-aligned\n",
108 "bits is not byte-aligned\n", 108 desc[i].field_name, desc[i].size_bits);
109 desc[i].field_name, desc[i].size_bits);
110 } 109 }
111 110
112 if (desc[i].struct_size_bytes) 111 if (desc[i].struct_size_bytes)
@@ -132,7 +131,7 @@ static void value_write(int offset, int size, u64 val, void *structure)
132 case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break; 131 case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break;
133 case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break; 132 case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break;
134 default: 133 default:
135 printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); 134 pr_warn("Field size %d bits not handled\n", size * 8);
136 } 135 }
137} 136}
138 137
@@ -188,9 +187,8 @@ void ib_unpack(const struct ib_field *desc,
188 } else { 187 } else {
189 if (desc[i].offset_bits % 8 || 188 if (desc[i].offset_bits % 8 ||
190 desc[i].size_bits % 8) { 189 desc[i].size_bits % 8) {
191 printk(KERN_WARNING "Structure field %s of size %d " 190 pr_warn("Structure field %s of size %d bits is not byte-aligned\n",
192 "bits is not byte-aligned\n", 191 desc[i].field_name, desc[i].size_bits);
193 desc[i].field_name, desc[i].size_bits);
194 } 192 }
195 193
196 memcpy(structure + desc[i].struct_offset_bytes, 194 memcpy(structure + desc[i].struct_offset_bytes,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 1e37f3515d98..b5656a2298ee 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -864,13 +864,12 @@ static void update_sm_ah(struct work_struct *work)
864 struct ib_ah_attr ah_attr; 864 struct ib_ah_attr ah_attr;
865 865
866 if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { 866 if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
867 printk(KERN_WARNING "Couldn't query port\n"); 867 pr_warn("Couldn't query port\n");
868 return; 868 return;
869 } 869 }
870 870
871 new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL); 871 new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
872 if (!new_ah) { 872 if (!new_ah) {
873 printk(KERN_WARNING "Couldn't allocate new SM AH\n");
874 return; 873 return;
875 } 874 }
876 875
@@ -880,7 +879,7 @@ static void update_sm_ah(struct work_struct *work)
880 new_ah->pkey_index = 0; 879 new_ah->pkey_index = 0;
881 if (ib_find_pkey(port->agent->device, port->port_num, 880 if (ib_find_pkey(port->agent->device, port->port_num,
882 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) 881 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
883 printk(KERN_ERR "Couldn't find index for default PKey\n"); 882 pr_err("Couldn't find index for default PKey\n");
884 883
885 memset(&ah_attr, 0, sizeof ah_attr); 884 memset(&ah_attr, 0, sizeof ah_attr);
886 ah_attr.dlid = port_attr.sm_lid; 885 ah_attr.dlid = port_attr.sm_lid;
@@ -889,7 +888,7 @@ static void update_sm_ah(struct work_struct *work)
889 888
890 new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); 889 new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
891 if (IS_ERR(new_ah->ah)) { 890 if (IS_ERR(new_ah->ah)) {
892 printk(KERN_WARNING "Couldn't create new SM AH\n"); 891 pr_warn("Couldn't create new SM AH\n");
893 kfree(new_ah); 892 kfree(new_ah);
894 return; 893 return;
895 } 894 }
@@ -1221,7 +1220,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1221 rec.net = NULL; 1220 rec.net = NULL;
1222 rec.ifindex = 0; 1221 rec.ifindex = 0;
1223 rec.gid_type = IB_GID_TYPE_IB; 1222 rec.gid_type = IB_GID_TYPE_IB;
1224 memset(rec.dmac, 0, ETH_ALEN); 1223 eth_zero_addr(rec.dmac);
1225 query->callback(status, &rec, query->context); 1224 query->callback(status, &rec, query->context);
1226 } else 1225 } else
1227 query->callback(status, NULL, query->context); 1226 query->callback(status, NULL, query->context);
@@ -1800,13 +1799,13 @@ static int __init ib_sa_init(void)
1800 1799
1801 ret = ib_register_client(&sa_client); 1800 ret = ib_register_client(&sa_client);
1802 if (ret) { 1801 if (ret) {
1803 printk(KERN_ERR "Couldn't register ib_sa client\n"); 1802 pr_err("Couldn't register ib_sa client\n");
1804 goto err1; 1803 goto err1;
1805 } 1804 }
1806 1805
1807 ret = mcast_init(); 1806 ret = mcast_init();
1808 if (ret) { 1807 if (ret) {
1809 printk(KERN_ERR "Couldn't initialize multicast handling\n"); 1808 pr_err("Couldn't initialize multicast handling\n");
1810 goto err2; 1809 goto err2;
1811 } 1810 }
1812 1811
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 6b4e8a008bc0..4a9aa0433b07 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1234,7 +1234,7 @@ static int find_overflow_devnum(void)
1234 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES, 1234 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
1235 "infiniband_cm"); 1235 "infiniband_cm");
1236 if (ret) { 1236 if (ret) {
1237 printk(KERN_ERR "ucm: couldn't register dynamic device number\n"); 1237 pr_err("ucm: couldn't register dynamic device number\n");
1238 return ret; 1238 return ret;
1239 } 1239 }
1240 } 1240 }
@@ -1329,19 +1329,19 @@ static int __init ib_ucm_init(void)
1329 ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, 1329 ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
1330 "infiniband_cm"); 1330 "infiniband_cm");
1331 if (ret) { 1331 if (ret) {
1332 printk(KERN_ERR "ucm: couldn't register device number\n"); 1332 pr_err("ucm: couldn't register device number\n");
1333 goto error1; 1333 goto error1;
1334 } 1334 }
1335 1335
1336 ret = class_create_file(&cm_class, &class_attr_abi_version.attr); 1336 ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
1337 if (ret) { 1337 if (ret) {
1338 printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); 1338 pr_err("ucm: couldn't create abi_version attribute\n");
1339 goto error2; 1339 goto error2;
1340 } 1340 }
1341 1341
1342 ret = ib_register_client(&ucm_client); 1342 ret = ib_register_client(&ucm_client);
1343 if (ret) { 1343 if (ret) {
1344 printk(KERN_ERR "ucm: couldn't register client\n"); 1344 pr_err("ucm: couldn't register client\n");
1345 goto error3; 1345 goto error3;
1346 } 1346 }
1347 return 0; 1347 return 0;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 8b5a934e1133..dd3bcceadfde 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -314,7 +314,7 @@ static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
314 } 314 }
315 } 315 }
316 if (!event_found) 316 if (!event_found)
317 printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n"); 317 pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
318} 318}
319 319
320static int ucma_event_handler(struct rdma_cm_id *cm_id, 320static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -1716,13 +1716,13 @@ static int __init ucma_init(void)
1716 1716
1717 ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); 1717 ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
1718 if (ret) { 1718 if (ret) {
1719 printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n"); 1719 pr_err("rdma_ucm: couldn't create abi_version attr\n");
1720 goto err1; 1720 goto err1;
1721 } 1721 }
1722 1722
1723 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); 1723 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table);
1724 if (!ucma_ctl_table_hdr) { 1724 if (!ucma_ctl_table_hdr) {
1725 printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n"); 1725 pr_err("rdma_ucm: couldn't register sysctl paths\n");
1726 ret = -ENOMEM; 1726 ret = -ENOMEM;
1727 goto err2; 1727 goto err2;
1728 } 1728 }
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 2116132568e7..29a45d2f8898 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -479,8 +479,8 @@ int ib_ud_header_unpack(void *buf,
479 buf += IB_LRH_BYTES; 479 buf += IB_LRH_BYTES;
480 480
481 if (header->lrh.link_version != 0) { 481 if (header->lrh.link_version != 0) {
482 printk(KERN_WARNING "Invalid LRH.link_version %d\n", 482 pr_warn("Invalid LRH.link_version %d\n",
483 header->lrh.link_version); 483 header->lrh.link_version);
484 return -EINVAL; 484 return -EINVAL;
485 } 485 }
486 486
@@ -496,20 +496,20 @@ int ib_ud_header_unpack(void *buf,
496 buf += IB_GRH_BYTES; 496 buf += IB_GRH_BYTES;
497 497
498 if (header->grh.ip_version != 6) { 498 if (header->grh.ip_version != 6) {
499 printk(KERN_WARNING "Invalid GRH.ip_version %d\n", 499 pr_warn("Invalid GRH.ip_version %d\n",
500 header->grh.ip_version); 500 header->grh.ip_version);
501 return -EINVAL; 501 return -EINVAL;
502 } 502 }
503 if (header->grh.next_header != 0x1b) { 503 if (header->grh.next_header != 0x1b) {
504 printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n", 504 pr_warn("Invalid GRH.next_header 0x%02x\n",
505 header->grh.next_header); 505 header->grh.next_header);
506 return -EINVAL; 506 return -EINVAL;
507 } 507 }
508 break; 508 break;
509 509
510 default: 510 default:
511 printk(KERN_WARNING "Invalid LRH.link_next_header %d\n", 511 pr_warn("Invalid LRH.link_next_header %d\n",
512 header->lrh.link_next_header); 512 header->lrh.link_next_header);
513 return -EINVAL; 513 return -EINVAL;
514 } 514 }
515 515
@@ -525,14 +525,13 @@ int ib_ud_header_unpack(void *buf,
525 header->immediate_present = 1; 525 header->immediate_present = 1;
526 break; 526 break;
527 default: 527 default:
528 printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n", 528 pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode);
529 header->bth.opcode);
530 return -EINVAL; 529 return -EINVAL;
531 } 530 }
532 531
533 if (header->bth.transport_header_version != 0) { 532 if (header->bth.transport_header_version != 0) {
534 printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n", 533 pr_warn("Invalid BTH.transport_header_version %d\n",
535 header->bth.transport_header_version); 534 header->bth.transport_header_version);
536 return -EINVAL; 535 return -EINVAL;
537 } 536 }
538 537
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 6c6fbff19752..3638c787cb7c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1174,6 +1174,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
1174 struct ib_uobject *uobj; 1174 struct ib_uobject *uobj;
1175 struct ib_pd *pd; 1175 struct ib_pd *pd;
1176 struct ib_mw *mw; 1176 struct ib_mw *mw;
1177 struct ib_udata udata;
1177 int ret; 1178 int ret;
1178 1179
1179 if (out_len < sizeof(resp)) 1180 if (out_len < sizeof(resp))
@@ -1195,7 +1196,12 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
1195 goto err_free; 1196 goto err_free;
1196 } 1197 }
1197 1198
1198 mw = pd->device->alloc_mw(pd, cmd.mw_type); 1199 INIT_UDATA(&udata, buf + sizeof(cmd),
1200 (unsigned long)cmd.response + sizeof(resp),
1201 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
1202 out_len - sizeof(resp));
1203
1204 mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
1199 if (IS_ERR(mw)) { 1205 if (IS_ERR(mw)) {
1200 ret = PTR_ERR(mw); 1206 ret = PTR_ERR(mw);
1201 goto err_put; 1207 goto err_put;
@@ -3086,6 +3092,14 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3086 !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) 3092 !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
3087 return -EPERM; 3093 return -EPERM;
3088 3094
3095 if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED)
3096 return -EINVAL;
3097
3098 if ((cmd.flow_attr.flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
3099 ((cmd.flow_attr.type == IB_FLOW_ATTR_ALL_DEFAULT) ||
3100 (cmd.flow_attr.type == IB_FLOW_ATTR_MC_DEFAULT)))
3101 return -EINVAL;
3102
3089 if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) 3103 if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
3090 return -EINVAL; 3104 return -EINVAL;
3091 3105
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 39680aed99dd..28ba2cc81535 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -683,12 +683,28 @@ out:
683 return ev_file; 683 return ev_file;
684} 684}
685 685
686static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
687{
688 u64 mask;
689
690 if (command <= IB_USER_VERBS_CMD_OPEN_QP)
691 mask = ib_dev->uverbs_cmd_mask;
692 else
693 mask = ib_dev->uverbs_ex_cmd_mask;
694
695 if (mask & ((u64)1 << command))
696 return 0;
697
698 return -1;
699}
700
686static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, 701static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
687 size_t count, loff_t *pos) 702 size_t count, loff_t *pos)
688{ 703{
689 struct ib_uverbs_file *file = filp->private_data; 704 struct ib_uverbs_file *file = filp->private_data;
690 struct ib_device *ib_dev; 705 struct ib_device *ib_dev;
691 struct ib_uverbs_cmd_hdr hdr; 706 struct ib_uverbs_cmd_hdr hdr;
707 __u32 command;
692 __u32 flags; 708 __u32 flags;
693 int srcu_key; 709 int srcu_key;
694 ssize_t ret; 710 ssize_t ret;
@@ -707,37 +723,34 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
707 goto out; 723 goto out;
708 } 724 }
709 725
710 flags = (hdr.command & 726 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
711 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; 727 IB_USER_VERBS_CMD_COMMAND_MASK)) {
728 ret = -EINVAL;
729 goto out;
730 }
712 731
713 if (!flags) { 732 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
714 __u32 command; 733 if (verify_command_mask(ib_dev, command)) {
734 ret = -EOPNOTSUPP;
735 goto out;
736 }
715 737
716 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | 738 if (!file->ucontext &&
717 IB_USER_VERBS_CMD_COMMAND_MASK)) { 739 command != IB_USER_VERBS_CMD_GET_CONTEXT) {
718 ret = -EINVAL; 740 ret = -EINVAL;
719 goto out; 741 goto out;
720 } 742 }
721 743
722 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; 744 flags = (hdr.command &
745 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
723 746
747 if (!flags) {
724 if (command >= ARRAY_SIZE(uverbs_cmd_table) || 748 if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
725 !uverbs_cmd_table[command]) { 749 !uverbs_cmd_table[command]) {
726 ret = -EINVAL; 750 ret = -EINVAL;
727 goto out; 751 goto out;
728 } 752 }
729 753
730 if (!file->ucontext &&
731 command != IB_USER_VERBS_CMD_GET_CONTEXT) {
732 ret = -EINVAL;
733 goto out;
734 }
735
736 if (!(ib_dev->uverbs_cmd_mask & (1ull << command))) {
737 ret = -ENOSYS;
738 goto out;
739 }
740
741 if (hdr.in_words * 4 != count) { 754 if (hdr.in_words * 4 != count) {
742 ret = -EINVAL; 755 ret = -EINVAL;
743 goto out; 756 goto out;
@@ -749,21 +762,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
749 hdr.out_words * 4); 762 hdr.out_words * 4);
750 763
751 } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { 764 } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
752 __u32 command;
753
754 struct ib_uverbs_ex_cmd_hdr ex_hdr; 765 struct ib_uverbs_ex_cmd_hdr ex_hdr;
755 struct ib_udata ucore; 766 struct ib_udata ucore;
756 struct ib_udata uhw; 767 struct ib_udata uhw;
757 size_t written_count = count; 768 size_t written_count = count;
758 769
759 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
760 IB_USER_VERBS_CMD_COMMAND_MASK)) {
761 ret = -EINVAL;
762 goto out;
763 }
764
765 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
766
767 if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || 770 if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
768 !uverbs_ex_cmd_table[command]) { 771 !uverbs_ex_cmd_table[command]) {
769 ret = -ENOSYS; 772 ret = -ENOSYS;
@@ -775,11 +778,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
775 goto out; 778 goto out;
776 } 779 }
777 780
778 if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command))) {
779 ret = -ENOSYS;
780 goto out;
781 }
782
783 if (count < (sizeof(hdr) + sizeof(ex_hdr))) { 781 if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
784 ret = -EINVAL; 782 ret = -EINVAL;
785 goto out; 783 goto out;
@@ -1058,7 +1056,7 @@ static int find_overflow_devnum(void)
1058 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, 1056 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
1059 "infiniband_verbs"); 1057 "infiniband_verbs");
1060 if (ret) { 1058 if (ret) {
1061 printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n"); 1059 pr_err("user_verbs: couldn't register dynamic device number\n");
1062 return ret; 1060 return ret;
1063 } 1061 }
1064 } 1062 }
@@ -1279,14 +1277,14 @@ static int __init ib_uverbs_init(void)
1279 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, 1277 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
1280 "infiniband_verbs"); 1278 "infiniband_verbs");
1281 if (ret) { 1279 if (ret) {
1282 printk(KERN_ERR "user_verbs: couldn't register device number\n"); 1280 pr_err("user_verbs: couldn't register device number\n");
1283 goto out; 1281 goto out;
1284 } 1282 }
1285 1283
1286 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); 1284 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
1287 if (IS_ERR(uverbs_class)) { 1285 if (IS_ERR(uverbs_class)) {
1288 ret = PTR_ERR(uverbs_class); 1286 ret = PTR_ERR(uverbs_class);
1289 printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); 1287 pr_err("user_verbs: couldn't create class infiniband_verbs\n");
1290 goto out_chrdev; 1288 goto out_chrdev;
1291 } 1289 }
1292 1290
@@ -1294,13 +1292,13 @@ static int __init ib_uverbs_init(void)
1294 1292
1295 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); 1293 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1296 if (ret) { 1294 if (ret) {
1297 printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); 1295 pr_err("user_verbs: couldn't create abi_version attribute\n");
1298 goto out_class; 1296 goto out_class;
1299 } 1297 }
1300 1298
1301 ret = ib_register_client(&uverbs_client); 1299 ret = ib_register_client(&uverbs_client);
1302 if (ret) { 1300 if (ret) {
1303 printk(KERN_ERR "user_verbs: couldn't register client\n"); 1301 pr_err("user_verbs: couldn't register client\n");
1304 goto out_class; 1302 goto out_class;
1305 } 1303 }
1306 1304
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 5af6d024e053..5cd1e3987f2b 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1567,6 +1567,8 @@ EXPORT_SYMBOL(ib_check_mr_status);
1567 * - The last sg element is allowed to have length less than page_size. 1567 * - The last sg element is allowed to have length less than page_size.
1568 * - If sg_nents total byte length exceeds the mr max_num_sge * page_size 1568 * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
1569 * then only max_num_sg entries will be mapped. 1569 * then only max_num_sg entries will be mapped.
1570 * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS_REG, non of these
1571 * constraints holds and the page_size argument is ignored.
1570 * 1572 *
1571 * Returns the number of sg elements that were mapped to the memory region. 1573 * Returns the number of sg elements that were mapped to the memory region.
1572 * 1574 *
@@ -1657,3 +1659,167 @@ next_page:
1657 return i; 1659 return i;
1658} 1660}
1659EXPORT_SYMBOL(ib_sg_to_pages); 1661EXPORT_SYMBOL(ib_sg_to_pages);
1662
1663struct ib_drain_cqe {
1664 struct ib_cqe cqe;
1665 struct completion done;
1666};
1667
1668static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
1669{
1670 struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe,
1671 cqe);
1672
1673 complete(&cqe->done);
1674}
1675
1676/*
1677 * Post a WR and block until its completion is reaped for the SQ.
1678 */
1679static void __ib_drain_sq(struct ib_qp *qp)
1680{
1681 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
1682 struct ib_drain_cqe sdrain;
1683 struct ib_send_wr swr = {}, *bad_swr;
1684 int ret;
1685
1686 if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
1687 WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
1688 "IB_POLL_DIRECT poll_ctx not supported for drain\n");
1689 return;
1690 }
1691
1692 swr.wr_cqe = &sdrain.cqe;
1693 sdrain.cqe.done = ib_drain_qp_done;
1694 init_completion(&sdrain.done);
1695
1696 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
1697 if (ret) {
1698 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
1699 return;
1700 }
1701
1702 ret = ib_post_send(qp, &swr, &bad_swr);
1703 if (ret) {
1704 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
1705 return;
1706 }
1707
1708 wait_for_completion(&sdrain.done);
1709}
1710
1711/*
1712 * Post a WR and block until its completion is reaped for the RQ.
1713 */
1714static void __ib_drain_rq(struct ib_qp *qp)
1715{
1716 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
1717 struct ib_drain_cqe rdrain;
1718 struct ib_recv_wr rwr = {}, *bad_rwr;
1719 int ret;
1720
1721 if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
1722 WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
1723 "IB_POLL_DIRECT poll_ctx not supported for drain\n");
1724 return;
1725 }
1726
1727 rwr.wr_cqe = &rdrain.cqe;
1728 rdrain.cqe.done = ib_drain_qp_done;
1729 init_completion(&rdrain.done);
1730
1731 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
1732 if (ret) {
1733 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
1734 return;
1735 }
1736
1737 ret = ib_post_recv(qp, &rwr, &bad_rwr);
1738 if (ret) {
1739 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
1740 return;
1741 }
1742
1743 wait_for_completion(&rdrain.done);
1744}
1745
1746/**
1747 * ib_drain_sq() - Block until all SQ CQEs have been consumed by the
1748 * application.
1749 * @qp: queue pair to drain
1750 *
1751 * If the device has a provider-specific drain function, then
1752 * call that. Otherwise call the generic drain function
1753 * __ib_drain_sq().
1754 *
1755 * The caller must:
1756 *
1757 * ensure there is room in the CQ and SQ for the drain work request and
1758 * completion.
1759 *
1760 * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
1761 * IB_POLL_DIRECT.
1762 *
1763 * ensure that there are no other contexts that are posting WRs concurrently.
1764 * Otherwise the drain is not guaranteed.
1765 */
1766void ib_drain_sq(struct ib_qp *qp)
1767{
1768 if (qp->device->drain_sq)
1769 qp->device->drain_sq(qp);
1770 else
1771 __ib_drain_sq(qp);
1772}
1773EXPORT_SYMBOL(ib_drain_sq);
1774
1775/**
1776 * ib_drain_rq() - Block until all RQ CQEs have been consumed by the
1777 * application.
1778 * @qp: queue pair to drain
1779 *
1780 * If the device has a provider-specific drain function, then
1781 * call that. Otherwise call the generic drain function
1782 * __ib_drain_rq().
1783 *
1784 * The caller must:
1785 *
1786 * ensure there is room in the CQ and RQ for the drain work request and
1787 * completion.
1788 *
1789 * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
1790 * IB_POLL_DIRECT.
1791 *
1792 * ensure that there are no other contexts that are posting WRs concurrently.
1793 * Otherwise the drain is not guaranteed.
1794 */
1795void ib_drain_rq(struct ib_qp *qp)
1796{
1797 if (qp->device->drain_rq)
1798 qp->device->drain_rq(qp);
1799 else
1800 __ib_drain_rq(qp);
1801}
1802EXPORT_SYMBOL(ib_drain_rq);
1803
1804/**
1805 * ib_drain_qp() - Block until all CQEs have been consumed by the
1806 * application on both the RQ and SQ.
1807 * @qp: queue pair to drain
1808 *
1809 * The caller must:
1810 *
1811 * ensure there is room in the CQ(s), SQ, and RQ for drain work requests
1812 * and completions.
1813 *
1814 * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
1815 * IB_POLL_DIRECT.
1816 *
1817 * ensure that there are no other contexts that are posting WRs concurrently.
1818 * Otherwise the drain is not guaranteed.
1819 */
1820void ib_drain_qp(struct ib_qp *qp)
1821{
1822 ib_drain_sq(qp);
1823 ib_drain_rq(qp);
1824}
1825EXPORT_SYMBOL(ib_drain_qp);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index f504ba73e5dc..d403231a4aff 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1877,7 +1877,7 @@ err:
1877static int is_loopback_dst(struct iw_cm_id *cm_id) 1877static int is_loopback_dst(struct iw_cm_id *cm_id)
1878{ 1878{
1879 struct net_device *dev; 1879 struct net_device *dev;
1880 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; 1880 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
1881 1881
1882 dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr); 1882 dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
1883 if (!dev) 1883 if (!dev)
@@ -1892,10 +1892,10 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1892 struct iwch_ep *ep; 1892 struct iwch_ep *ep;
1893 struct rtable *rt; 1893 struct rtable *rt;
1894 int err = 0; 1894 int err = 0;
1895 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; 1895 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
1896 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; 1896 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
1897 1897
1898 if (cm_id->remote_addr.ss_family != PF_INET) { 1898 if (cm_id->m_remote_addr.ss_family != PF_INET) {
1899 err = -ENOSYS; 1899 err = -ENOSYS;
1900 goto out; 1900 goto out;
1901 } 1901 }
@@ -1961,9 +1961,9 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1961 1961
1962 state_set(&ep->com, CONNECTING); 1962 state_set(&ep->com, CONNECTING);
1963 ep->tos = IPTOS_LOWDELAY; 1963 ep->tos = IPTOS_LOWDELAY;
1964 memcpy(&ep->com.local_addr, &cm_id->local_addr, 1964 memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
1965 sizeof(ep->com.local_addr)); 1965 sizeof(ep->com.local_addr));
1966 memcpy(&ep->com.remote_addr, &cm_id->remote_addr, 1966 memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
1967 sizeof(ep->com.remote_addr)); 1967 sizeof(ep->com.remote_addr));
1968 1968
1969 /* send connect request to rnic */ 1969 /* send connect request to rnic */
@@ -1992,7 +1992,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
1992 1992
1993 might_sleep(); 1993 might_sleep();
1994 1994
1995 if (cm_id->local_addr.ss_family != PF_INET) { 1995 if (cm_id->m_local_addr.ss_family != PF_INET) {
1996 err = -ENOSYS; 1996 err = -ENOSYS;
1997 goto fail1; 1997 goto fail1;
1998 } 1998 }
@@ -2008,7 +2008,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
2008 cm_id->add_ref(cm_id); 2008 cm_id->add_ref(cm_id);
2009 ep->com.cm_id = cm_id; 2009 ep->com.cm_id = cm_id;
2010 ep->backlog = backlog; 2010 ep->backlog = backlog;
2011 memcpy(&ep->com.local_addr, &cm_id->local_addr, 2011 memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
2012 sizeof(ep->com.local_addr)); 2012 sizeof(ep->com.local_addr));
2013 2013
2014 /* 2014 /*
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 2734820d291b..42a7b8952d13 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -657,7 +657,8 @@ err:
657 return ERR_PTR(err); 657 return ERR_PTR(err);
658} 658}
659 659
660static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) 660static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
661 struct ib_udata *udata)
661{ 662{
662 struct iwch_dev *rhp; 663 struct iwch_dev *rhp;
663 struct iwch_pd *php; 664 struct iwch_pd *php;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index cd2ff5f9518a..651711370d55 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -302,7 +302,7 @@ void _c4iw_free_ep(struct kref *kref)
302 if (ep->com.remote_addr.ss_family == AF_INET6) { 302 if (ep->com.remote_addr.ss_family == AF_INET6) {
303 struct sockaddr_in6 *sin6 = 303 struct sockaddr_in6 *sin6 =
304 (struct sockaddr_in6 *) 304 (struct sockaddr_in6 *)
305 &ep->com.mapped_local_addr; 305 &ep->com.local_addr;
306 306
307 cxgb4_clip_release( 307 cxgb4_clip_release(
308 ep->com.dev->rdev.lldi.ports[0], 308 ep->com.dev->rdev.lldi.ports[0],
@@ -314,12 +314,6 @@ void _c4iw_free_ep(struct kref *kref)
314 dst_release(ep->dst); 314 dst_release(ep->dst);
315 cxgb4_l2t_release(ep->l2t); 315 cxgb4_l2t_release(ep->l2t);
316 } 316 }
317 if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) {
318 print_addr(&ep->com, __func__, "remove_mapinfo/mapping");
319 iwpm_remove_mapinfo(&ep->com.local_addr,
320 &ep->com.mapped_local_addr);
321 iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
322 }
323 kfree(ep); 317 kfree(ep);
324} 318}
325 319
@@ -455,7 +449,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
455 state_set(&ep->com, DEAD); 449 state_set(&ep->com, DEAD);
456 if (ep->com.remote_addr.ss_family == AF_INET6) { 450 if (ep->com.remote_addr.ss_family == AF_INET6) {
457 struct sockaddr_in6 *sin6 = 451 struct sockaddr_in6 *sin6 =
458 (struct sockaddr_in6 *)&ep->com.mapped_local_addr; 452 (struct sockaddr_in6 *)&ep->com.local_addr;
459 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 453 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
460 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 454 (const u32 *)&sin6->sin6_addr.s6_addr, 1);
461 } 455 }
@@ -485,12 +479,19 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
485 unsigned int flowclen = 80; 479 unsigned int flowclen = 80;
486 struct fw_flowc_wr *flowc; 480 struct fw_flowc_wr *flowc;
487 int i; 481 int i;
482 u16 vlan = ep->l2t->vlan;
483 int nparams;
484
485 if (vlan == CPL_L2T_VLAN_NONE)
486 nparams = 8;
487 else
488 nparams = 9;
488 489
489 skb = get_skb(skb, flowclen, GFP_KERNEL); 490 skb = get_skb(skb, flowclen, GFP_KERNEL);
490 flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); 491 flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
491 492
492 flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | 493 flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
493 FW_FLOWC_WR_NPARAMS_V(8)); 494 FW_FLOWC_WR_NPARAMS_V(nparams));
494 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen, 495 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen,
495 16)) | FW_WR_FLOWID_V(ep->hwtid)); 496 16)) | FW_WR_FLOWID_V(ep->hwtid));
496 497
@@ -511,9 +512,17 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
511 flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); 512 flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
512 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; 513 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
513 flowc->mnemval[7].val = cpu_to_be32(ep->emss); 514 flowc->mnemval[7].val = cpu_to_be32(ep->emss);
514 /* Pad WR to 16 byte boundary */ 515 if (nparams == 9) {
515 flowc->mnemval[8].mnemonic = 0; 516 u16 pri;
516 flowc->mnemval[8].val = 0; 517
518 pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
519 flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
520 flowc->mnemval[8].val = cpu_to_be32(pri);
521 } else {
522 /* Pad WR to 16 byte boundary */
523 flowc->mnemval[8].mnemonic = 0;
524 flowc->mnemval[8].val = 0;
525 }
517 for (i = 0; i < 9; i++) { 526 for (i = 0; i < 9; i++) {
518 flowc->mnemval[i].r4[0] = 0; 527 flowc->mnemval[i].r4[0] = 0;
519 flowc->mnemval[i].r4[1] = 0; 528 flowc->mnemval[i].r4[1] = 0;
@@ -568,54 +577,6 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
568 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 577 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
569} 578}
570 579
571/*
572 * c4iw_form_pm_msg - Form a port mapper message with mapping info
573 */
574static void c4iw_form_pm_msg(struct c4iw_ep *ep,
575 struct iwpm_sa_data *pm_msg)
576{
577 memcpy(&pm_msg->loc_addr, &ep->com.local_addr,
578 sizeof(ep->com.local_addr));
579 memcpy(&pm_msg->rem_addr, &ep->com.remote_addr,
580 sizeof(ep->com.remote_addr));
581}
582
583/*
584 * c4iw_form_reg_msg - Form a port mapper message with dev info
585 */
586static void c4iw_form_reg_msg(struct c4iw_dev *dev,
587 struct iwpm_dev_data *pm_msg)
588{
589 memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE);
590 memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name,
591 IWPM_IFNAME_SIZE);
592}
593
594static void c4iw_record_pm_msg(struct c4iw_ep *ep,
595 struct iwpm_sa_data *pm_msg)
596{
597 memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr,
598 sizeof(ep->com.mapped_local_addr));
599 memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr,
600 sizeof(ep->com.mapped_remote_addr));
601}
602
603static int get_remote_addr(struct c4iw_ep *parent_ep, struct c4iw_ep *child_ep)
604{
605 int ret;
606
607 print_addr(&parent_ep->com, __func__, "get_remote_addr parent_ep ");
608 print_addr(&child_ep->com, __func__, "get_remote_addr child_ep ");
609
610 ret = iwpm_get_remote_info(&parent_ep->com.mapped_local_addr,
611 &child_ep->com.mapped_remote_addr,
612 &child_ep->com.remote_addr, RDMA_NL_C4IW);
613 if (ret)
614 PDBG("Unable to find remote peer addr info - err %d\n", ret);
615
616 return ret;
617}
618
619static void best_mtu(const unsigned short *mtus, unsigned short mtu, 580static void best_mtu(const unsigned short *mtus, unsigned short mtu,
620 unsigned int *idx, int use_ts, int ipv6) 581 unsigned int *idx, int use_ts, int ipv6)
621{ 582{
@@ -645,13 +606,13 @@ static int send_connect(struct c4iw_ep *ep)
645 int wscale; 606 int wscale;
646 int win, sizev4, sizev6, wrlen; 607 int win, sizev4, sizev6, wrlen;
647 struct sockaddr_in *la = (struct sockaddr_in *) 608 struct sockaddr_in *la = (struct sockaddr_in *)
648 &ep->com.mapped_local_addr; 609 &ep->com.local_addr;
649 struct sockaddr_in *ra = (struct sockaddr_in *) 610 struct sockaddr_in *ra = (struct sockaddr_in *)
650 &ep->com.mapped_remote_addr; 611 &ep->com.remote_addr;
651 struct sockaddr_in6 *la6 = (struct sockaddr_in6 *) 612 struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
652 &ep->com.mapped_local_addr; 613 &ep->com.local_addr;
653 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) 614 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
654 &ep->com.mapped_remote_addr; 615 &ep->com.remote_addr;
655 int ret; 616 int ret;
656 enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; 617 enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
657 u32 isn = (prandom_u32() & ~7UL) - 1; 618 u32 isn = (prandom_u32() & ~7UL) - 1;
@@ -710,7 +671,7 @@ static int send_connect(struct c4iw_ep *ep)
710 L2T_IDX_V(ep->l2t->idx) | 671 L2T_IDX_V(ep->l2t->idx) |
711 TX_CHAN_V(ep->tx_chan) | 672 TX_CHAN_V(ep->tx_chan) |
712 SMAC_SEL_V(ep->smac_idx) | 673 SMAC_SEL_V(ep->smac_idx) |
713 DSCP_V(ep->tos) | 674 DSCP_V(ep->tos >> 2) |
714 ULP_MODE_V(ULP_MODE_TCPDDP) | 675 ULP_MODE_V(ULP_MODE_TCPDDP) |
715 RCV_BUFSIZ_V(win); 676 RCV_BUFSIZ_V(win);
716 opt2 = RX_CHANNEL_V(0) | 677 opt2 = RX_CHANNEL_V(0) |
@@ -1829,10 +1790,10 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1829 req->le.filter = cpu_to_be32(cxgb4_select_ntuple( 1790 req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
1830 ep->com.dev->rdev.lldi.ports[0], 1791 ep->com.dev->rdev.lldi.ports[0],
1831 ep->l2t)); 1792 ep->l2t));
1832 sin = (struct sockaddr_in *)&ep->com.mapped_local_addr; 1793 sin = (struct sockaddr_in *)&ep->com.local_addr;
1833 req->le.lport = sin->sin_port; 1794 req->le.lport = sin->sin_port;
1834 req->le.u.ipv4.lip = sin->sin_addr.s_addr; 1795 req->le.u.ipv4.lip = sin->sin_addr.s_addr;
1835 sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr; 1796 sin = (struct sockaddr_in *)&ep->com.remote_addr;
1836 req->le.pport = sin->sin_port; 1797 req->le.pport = sin->sin_port;
1837 req->le.u.ipv4.pip = sin->sin_addr.s_addr; 1798 req->le.u.ipv4.pip = sin->sin_addr.s_addr;
1838 req->tcb.t_state_to_astid = 1799 req->tcb.t_state_to_astid =
@@ -1864,7 +1825,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1864 L2T_IDX_V(ep->l2t->idx) | 1825 L2T_IDX_V(ep->l2t->idx) |
1865 TX_CHAN_V(ep->tx_chan) | 1826 TX_CHAN_V(ep->tx_chan) |
1866 SMAC_SEL_V(ep->smac_idx) | 1827 SMAC_SEL_V(ep->smac_idx) |
1867 DSCP_V(ep->tos) | 1828 DSCP_V(ep->tos >> 2) |
1868 ULP_MODE_V(ULP_MODE_TCPDDP) | 1829 ULP_MODE_V(ULP_MODE_TCPDDP) |
1869 RCV_BUFSIZ_V(win)); 1830 RCV_BUFSIZ_V(win));
1870 req->tcb.opt2 = (__force __be32) (PACE_V(1) | 1831 req->tcb.opt2 = (__force __be32) (PACE_V(1) |
@@ -1928,7 +1889,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
1928 1889
1929static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, 1890static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
1930 struct dst_entry *dst, struct c4iw_dev *cdev, 1891 struct dst_entry *dst, struct c4iw_dev *cdev,
1931 bool clear_mpa_v1, enum chip_type adapter_type) 1892 bool clear_mpa_v1, enum chip_type adapter_type, u8 tos)
1932{ 1893{
1933 struct neighbour *n; 1894 struct neighbour *n;
1934 int err, step; 1895 int err, step;
@@ -1958,7 +1919,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
1958 goto out; 1919 goto out;
1959 } 1920 }
1960 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, 1921 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1961 n, pdev, 0); 1922 n, pdev, rt_tos2priority(tos));
1962 if (!ep->l2t) 1923 if (!ep->l2t)
1963 goto out; 1924 goto out;
1964 ep->mtu = pdev->mtu; 1925 ep->mtu = pdev->mtu;
@@ -2013,13 +1974,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
2013{ 1974{
2014 int err = 0; 1975 int err = 0;
2015 struct sockaddr_in *laddr = (struct sockaddr_in *) 1976 struct sockaddr_in *laddr = (struct sockaddr_in *)
2016 &ep->com.cm_id->local_addr; 1977 &ep->com.cm_id->m_local_addr;
2017 struct sockaddr_in *raddr = (struct sockaddr_in *) 1978 struct sockaddr_in *raddr = (struct sockaddr_in *)
2018 &ep->com.cm_id->remote_addr; 1979 &ep->com.cm_id->m_remote_addr;
2019 struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *) 1980 struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
2020 &ep->com.cm_id->local_addr; 1981 &ep->com.cm_id->m_local_addr;
2021 struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *) 1982 struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
2022 &ep->com.cm_id->remote_addr; 1983 &ep->com.cm_id->m_remote_addr;
2023 int iptype; 1984 int iptype;
2024 __u8 *ra; 1985 __u8 *ra;
2025 1986
@@ -2038,10 +1999,10 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
2038 insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); 1999 insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
2039 2000
2040 /* find a route */ 2001 /* find a route */
2041 if (ep->com.cm_id->local_addr.ss_family == AF_INET) { 2002 if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
2042 ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, 2003 ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
2043 raddr->sin_addr.s_addr, laddr->sin_port, 2004 raddr->sin_addr.s_addr, laddr->sin_port,
2044 raddr->sin_port, 0); 2005 raddr->sin_port, ep->com.cm_id->tos);
2045 iptype = 4; 2006 iptype = 4;
2046 ra = (__u8 *)&raddr->sin_addr; 2007 ra = (__u8 *)&raddr->sin_addr;
2047 } else { 2008 } else {
@@ -2058,7 +2019,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
2058 goto fail3; 2019 goto fail3;
2059 } 2020 }
2060 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false, 2021 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
2061 ep->com.dev->rdev.lldi.adapter_type); 2022 ep->com.dev->rdev.lldi.adapter_type,
2023 ep->com.cm_id->tos);
2062 if (err) { 2024 if (err) {
2063 pr_err("%s - cannot alloc l2e.\n", __func__); 2025 pr_err("%s - cannot alloc l2e.\n", __func__);
2064 goto fail4; 2026 goto fail4;
@@ -2069,7 +2031,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
2069 ep->l2t->idx); 2031 ep->l2t->idx);
2070 2032
2071 state_set(&ep->com, CONNECTING); 2033 state_set(&ep->com, CONNECTING);
2072 ep->tos = 0; 2034 ep->tos = ep->com.cm_id->tos;
2073 2035
2074 /* send connect request to rnic */ 2036 /* send connect request to rnic */
2075 err = send_connect(ep); 2037 err = send_connect(ep);
@@ -2109,10 +2071,10 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2109 struct sockaddr_in6 *ra6; 2071 struct sockaddr_in6 *ra6;
2110 2072
2111 ep = lookup_atid(t, atid); 2073 ep = lookup_atid(t, atid);
2112 la = (struct sockaddr_in *)&ep->com.mapped_local_addr; 2074 la = (struct sockaddr_in *)&ep->com.local_addr;
2113 ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr; 2075 ra = (struct sockaddr_in *)&ep->com.remote_addr;
2114 la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; 2076 la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
2115 ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr; 2077 ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
2116 2078
2117 PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, 2079 PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
2118 status, status2errno(status)); 2080 status, status2errno(status));
@@ -2154,7 +2116,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2154 if (ep->com.remote_addr.ss_family == AF_INET6) { 2116 if (ep->com.remote_addr.ss_family == AF_INET6) {
2155 struct sockaddr_in6 *sin6 = 2117 struct sockaddr_in6 *sin6 =
2156 (struct sockaddr_in6 *) 2118 (struct sockaddr_in6 *)
2157 &ep->com.mapped_local_addr; 2119 &ep->com.local_addr;
2158 cxgb4_clip_release( 2120 cxgb4_clip_release(
2159 ep->com.dev->rdev.lldi.ports[0], 2121 ep->com.dev->rdev.lldi.ports[0],
2160 (const u32 *) 2122 (const u32 *)
@@ -2189,7 +2151,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2189 2151
2190 if (ep->com.remote_addr.ss_family == AF_INET6) { 2152 if (ep->com.remote_addr.ss_family == AF_INET6) {
2191 struct sockaddr_in6 *sin6 = 2153 struct sockaddr_in6 *sin6 =
2192 (struct sockaddr_in6 *)&ep->com.mapped_local_addr; 2154 (struct sockaddr_in6 *)&ep->com.local_addr;
2193 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 2155 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
2194 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 2156 (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2195 } 2157 }
@@ -2391,6 +2353,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2391 u16 peer_mss = ntohs(req->tcpopt.mss); 2353 u16 peer_mss = ntohs(req->tcpopt.mss);
2392 int iptype; 2354 int iptype;
2393 unsigned short hdrs; 2355 unsigned short hdrs;
2356 u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
2394 2357
2395 parent_ep = lookup_stid(t, stid); 2358 parent_ep = lookup_stid(t, stid);
2396 if (!parent_ep) { 2359 if (!parent_ep) {
@@ -2399,8 +2362,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2399 } 2362 }
2400 2363
2401 if (state_read(&parent_ep->com) != LISTEN) { 2364 if (state_read(&parent_ep->com) != LISTEN) {
2402 printk(KERN_ERR "%s - listening ep not in LISTEN\n", 2365 PDBG("%s - listening ep not in LISTEN\n", __func__);
2403 __func__);
2404 goto reject; 2366 goto reject;
2405 } 2367 }
2406 2368
@@ -2415,7 +2377,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2415 ntohs(peer_port), peer_mss); 2377 ntohs(peer_port), peer_mss);
2416 dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, 2378 dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
2417 local_port, peer_port, 2379 local_port, peer_port,
2418 PASS_OPEN_TOS_G(ntohl(req->tos_stid))); 2380 tos);
2419 } else { 2381 } else {
2420 PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" 2382 PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
2421 , __func__, parent_ep, hwtid, 2383 , __func__, parent_ep, hwtid,
@@ -2441,7 +2403,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2441 } 2403 }
2442 2404
2443 err = import_ep(child_ep, iptype, peer_ip, dst, dev, false, 2405 err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
2444 parent_ep->com.dev->rdev.lldi.adapter_type); 2406 parent_ep->com.dev->rdev.lldi.adapter_type, tos);
2445 if (err) { 2407 if (err) {
2446 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", 2408 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
2447 __func__); 2409 __func__);
@@ -2459,18 +2421,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2459 child_ep->com.dev = dev; 2421 child_ep->com.dev = dev;
2460 child_ep->com.cm_id = NULL; 2422 child_ep->com.cm_id = NULL;
2461 2423
2462 /*
2463 * The mapped_local and mapped_remote addresses get setup with
2464 * the actual 4-tuple. The local address will be based on the
2465 * actual local address of the connection, but on the port number
2466 * of the parent listening endpoint. The remote address is
2467 * setup based on a query to the IWPM since we don't know what it
2468 * originally was before mapping. If no mapping was done, then
2469 * mapped_remote == remote, and mapped_local == local.
2470 */
2471 if (iptype == 4) { 2424 if (iptype == 4) {
2472 struct sockaddr_in *sin = (struct sockaddr_in *) 2425 struct sockaddr_in *sin = (struct sockaddr_in *)
2473 &child_ep->com.mapped_local_addr; 2426 &child_ep->com.local_addr;
2474 2427
2475 sin->sin_family = PF_INET; 2428 sin->sin_family = PF_INET;
2476 sin->sin_port = local_port; 2429 sin->sin_port = local_port;
@@ -2482,12 +2435,12 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2482 &parent_ep->com.local_addr)->sin_port; 2435 &parent_ep->com.local_addr)->sin_port;
2483 sin->sin_addr.s_addr = *(__be32 *)local_ip; 2436 sin->sin_addr.s_addr = *(__be32 *)local_ip;
2484 2437
2485 sin = (struct sockaddr_in *)&child_ep->com.mapped_remote_addr; 2438 sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
2486 sin->sin_family = PF_INET; 2439 sin->sin_family = PF_INET;
2487 sin->sin_port = peer_port; 2440 sin->sin_port = peer_port;
2488 sin->sin_addr.s_addr = *(__be32 *)peer_ip; 2441 sin->sin_addr.s_addr = *(__be32 *)peer_ip;
2489 } else { 2442 } else {
2490 sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr; 2443 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2491 sin6->sin6_family = PF_INET6; 2444 sin6->sin6_family = PF_INET6;
2492 sin6->sin6_port = local_port; 2445 sin6->sin6_port = local_port;
2493 memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); 2446 memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
@@ -2498,18 +2451,15 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2498 &parent_ep->com.local_addr)->sin6_port; 2451 &parent_ep->com.local_addr)->sin6_port;
2499 memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); 2452 memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2500 2453
2501 sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_remote_addr; 2454 sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
2502 sin6->sin6_family = PF_INET6; 2455 sin6->sin6_family = PF_INET6;
2503 sin6->sin6_port = peer_port; 2456 sin6->sin6_port = peer_port;
2504 memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); 2457 memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
2505 } 2458 }
2506 memcpy(&child_ep->com.remote_addr, &child_ep->com.mapped_remote_addr,
2507 sizeof(child_ep->com.remote_addr));
2508 get_remote_addr(parent_ep, child_ep);
2509 2459
2510 c4iw_get_ep(&parent_ep->com); 2460 c4iw_get_ep(&parent_ep->com);
2511 child_ep->parent_ep = parent_ep; 2461 child_ep->parent_ep = parent_ep;
2512 child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid)); 2462 child_ep->tos = tos;
2513 child_ep->dst = dst; 2463 child_ep->dst = dst;
2514 child_ep->hwtid = hwtid; 2464 child_ep->hwtid = hwtid;
2515 2465
@@ -2522,7 +2472,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2522 accept_cr(child_ep, skb, req); 2472 accept_cr(child_ep, skb, req);
2523 set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); 2473 set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
2524 if (iptype == 6) { 2474 if (iptype == 6) {
2525 sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr; 2475 sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2526 cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0], 2476 cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
2527 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 2477 (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2528 } 2478 }
@@ -2765,7 +2715,7 @@ out:
2765 if (ep->com.remote_addr.ss_family == AF_INET6) { 2715 if (ep->com.remote_addr.ss_family == AF_INET6) {
2766 struct sockaddr_in6 *sin6 = 2716 struct sockaddr_in6 *sin6 =
2767 (struct sockaddr_in6 *) 2717 (struct sockaddr_in6 *)
2768 &ep->com.mapped_local_addr; 2718 &ep->com.local_addr;
2769 cxgb4_clip_release( 2719 cxgb4_clip_release(
2770 ep->com.dev->rdev.lldi.ports[0], 2720 ep->com.dev->rdev.lldi.ports[0],
2771 (const u32 *)&sin6->sin6_addr.s6_addr, 2721 (const u32 *)&sin6->sin6_addr.s6_addr,
@@ -3026,8 +2976,8 @@ static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3026{ 2976{
3027 struct in_device *ind; 2977 struct in_device *ind;
3028 int found = 0; 2978 int found = 0;
3029 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; 2979 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3030 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; 2980 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3031 2981
3032 ind = in_dev_get(dev->rdev.lldi.ports[0]); 2982 ind = in_dev_get(dev->rdev.lldi.ports[0]);
3033 if (!ind) 2983 if (!ind)
@@ -3072,8 +3022,8 @@ static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
3072static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) 3022static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3073{ 3023{
3074 struct in6_addr uninitialized_var(addr); 3024 struct in6_addr uninitialized_var(addr);
3075 struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr; 3025 struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
3076 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr; 3026 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
3077 3027
3078 if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) { 3028 if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
3079 memcpy(la6->sin6_addr.s6_addr, &addr, 16); 3029 memcpy(la6->sin6_addr.s6_addr, &addr, 16);
@@ -3092,11 +3042,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3092 struct sockaddr_in *raddr; 3042 struct sockaddr_in *raddr;
3093 struct sockaddr_in6 *laddr6; 3043 struct sockaddr_in6 *laddr6;
3094 struct sockaddr_in6 *raddr6; 3044 struct sockaddr_in6 *raddr6;
3095 struct iwpm_dev_data pm_reg_msg;
3096 struct iwpm_sa_data pm_msg;
3097 __u8 *ra; 3045 __u8 *ra;
3098 int iptype; 3046 int iptype;
3099 int iwpm_err = 0;
3100 3047
3101 if ((conn_param->ord > cur_max_read_depth(dev)) || 3048 if ((conn_param->ord > cur_max_read_depth(dev)) ||
3102 (conn_param->ird > cur_max_read_depth(dev))) { 3049 (conn_param->ird > cur_max_read_depth(dev))) {
@@ -3144,47 +3091,17 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3144 } 3091 }
3145 insert_handle(dev, &dev->atid_idr, ep, ep->atid); 3092 insert_handle(dev, &dev->atid_idr, ep, ep->atid);
3146 3093
3147 memcpy(&ep->com.local_addr, &cm_id->local_addr, 3094 memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3148 sizeof(ep->com.local_addr)); 3095 sizeof(ep->com.local_addr));
3149 memcpy(&ep->com.remote_addr, &cm_id->remote_addr, 3096 memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
3150 sizeof(ep->com.remote_addr)); 3097 sizeof(ep->com.remote_addr));
3151 3098
3152 /* No port mapper available, go with the specified peer information */ 3099 laddr = (struct sockaddr_in *)&ep->com.local_addr;
3153 memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, 3100 raddr = (struct sockaddr_in *)&ep->com.remote_addr;
3154 sizeof(ep->com.mapped_local_addr)); 3101 laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3155 memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr, 3102 raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr;
3156 sizeof(ep->com.mapped_remote_addr));
3157
3158 c4iw_form_reg_msg(dev, &pm_reg_msg);
3159 iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
3160 if (iwpm_err) {
3161 PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
3162 __func__, iwpm_err);
3163 }
3164 if (iwpm_valid_pid() && !iwpm_err) {
3165 c4iw_form_pm_msg(ep, &pm_msg);
3166 iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW);
3167 if (iwpm_err)
3168 PDBG("%s: Port Mapper query fail (err = %d).\n",
3169 __func__, iwpm_err);
3170 else
3171 c4iw_record_pm_msg(ep, &pm_msg);
3172 }
3173 if (iwpm_create_mapinfo(&ep->com.local_addr,
3174 &ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
3175 iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
3176 err = -ENOMEM;
3177 goto fail1;
3178 }
3179 print_addr(&ep->com, __func__, "add_query/create_mapinfo");
3180 set_bit(RELEASE_MAPINFO, &ep->com.flags);
3181
3182 laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr;
3183 raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
3184 laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
3185 raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr;
3186 3103
3187 if (cm_id->remote_addr.ss_family == AF_INET) { 3104 if (cm_id->m_remote_addr.ss_family == AF_INET) {
3188 iptype = 4; 3105 iptype = 4;
3189 ra = (__u8 *)&raddr->sin_addr; 3106 ra = (__u8 *)&raddr->sin_addr;
3190 3107
@@ -3203,7 +3120,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3203 ra, ntohs(raddr->sin_port)); 3120 ra, ntohs(raddr->sin_port));
3204 ep->dst = find_route(dev, laddr->sin_addr.s_addr, 3121 ep->dst = find_route(dev, laddr->sin_addr.s_addr,
3205 raddr->sin_addr.s_addr, laddr->sin_port, 3122 raddr->sin_addr.s_addr, laddr->sin_port,
3206 raddr->sin_port, 0); 3123 raddr->sin_port, cm_id->tos);
3207 } else { 3124 } else {
3208 iptype = 6; 3125 iptype = 6;
3209 ra = (__u8 *)&raddr6->sin6_addr; 3126 ra = (__u8 *)&raddr6->sin6_addr;
@@ -3234,7 +3151,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3234 } 3151 }
3235 3152
3236 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, 3153 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
3237 ep->com.dev->rdev.lldi.adapter_type); 3154 ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
3238 if (err) { 3155 if (err) {
3239 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); 3156 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
3240 goto fail3; 3157 goto fail3;
@@ -3245,7 +3162,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3245 ep->l2t->idx); 3162 ep->l2t->idx);
3246 3163
3247 state_set(&ep->com, CONNECTING); 3164 state_set(&ep->com, CONNECTING);
3248 ep->tos = 0; 3165 ep->tos = cm_id->tos;
3249 3166
3250 /* send connect request to rnic */ 3167 /* send connect request to rnic */
3251 err = send_connect(ep); 3168 err = send_connect(ep);
@@ -3269,7 +3186,7 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3269{ 3186{
3270 int err; 3187 int err;
3271 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) 3188 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
3272 &ep->com.mapped_local_addr; 3189 &ep->com.local_addr;
3273 3190
3274 if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) { 3191 if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
3275 err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0], 3192 err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
@@ -3302,7 +3219,7 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3302{ 3219{
3303 int err; 3220 int err;
3304 struct sockaddr_in *sin = (struct sockaddr_in *) 3221 struct sockaddr_in *sin = (struct sockaddr_in *)
3305 &ep->com.mapped_local_addr; 3222 &ep->com.local_addr;
3306 3223
3307 if (dev->rdev.lldi.enable_fw_ofld_conn) { 3224 if (dev->rdev.lldi.enable_fw_ofld_conn) {
3308 do { 3225 do {
@@ -3343,9 +3260,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3343 int err = 0; 3260 int err = 0;
3344 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 3261 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3345 struct c4iw_listen_ep *ep; 3262 struct c4iw_listen_ep *ep;
3346 struct iwpm_dev_data pm_reg_msg;
3347 struct iwpm_sa_data pm_msg;
3348 int iwpm_err = 0;
3349 3263
3350 might_sleep(); 3264 might_sleep();
3351 3265
@@ -3360,7 +3274,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3360 ep->com.cm_id = cm_id; 3274 ep->com.cm_id = cm_id;
3361 ep->com.dev = dev; 3275 ep->com.dev = dev;
3362 ep->backlog = backlog; 3276 ep->backlog = backlog;
3363 memcpy(&ep->com.local_addr, &cm_id->local_addr, 3277 memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3364 sizeof(ep->com.local_addr)); 3278 sizeof(ep->com.local_addr));
3365 3279
3366 /* 3280 /*
@@ -3369,10 +3283,10 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3369 if (dev->rdev.lldi.enable_fw_ofld_conn && 3283 if (dev->rdev.lldi.enable_fw_ofld_conn &&
3370 ep->com.local_addr.ss_family == AF_INET) 3284 ep->com.local_addr.ss_family == AF_INET)
3371 ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, 3285 ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
3372 cm_id->local_addr.ss_family, ep); 3286 cm_id->m_local_addr.ss_family, ep);
3373 else 3287 else
3374 ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, 3288 ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
3375 cm_id->local_addr.ss_family, ep); 3289 cm_id->m_local_addr.ss_family, ep);
3376 3290
3377 if (ep->stid == -1) { 3291 if (ep->stid == -1) {
3378 printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); 3292 printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
@@ -3381,36 +3295,9 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3381 } 3295 }
3382 insert_handle(dev, &dev->stid_idr, ep, ep->stid); 3296 insert_handle(dev, &dev->stid_idr, ep, ep->stid);
3383 3297
3384 /* No port mapper available, go with the specified info */ 3298 memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3385 memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, 3299 sizeof(ep->com.local_addr));
3386 sizeof(ep->com.mapped_local_addr));
3387
3388 c4iw_form_reg_msg(dev, &pm_reg_msg);
3389 iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
3390 if (iwpm_err) {
3391 PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
3392 __func__, iwpm_err);
3393 }
3394 if (iwpm_valid_pid() && !iwpm_err) {
3395 memcpy(&pm_msg.loc_addr, &ep->com.local_addr,
3396 sizeof(ep->com.local_addr));
3397 iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW);
3398 if (iwpm_err)
3399 PDBG("%s: Port Mapper query fail (err = %d).\n",
3400 __func__, iwpm_err);
3401 else
3402 memcpy(&ep->com.mapped_local_addr,
3403 &pm_msg.mapped_loc_addr,
3404 sizeof(ep->com.mapped_local_addr));
3405 }
3406 if (iwpm_create_mapinfo(&ep->com.local_addr,
3407 &ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
3408 err = -ENOMEM;
3409 goto fail3;
3410 }
3411 print_addr(&ep->com, __func__, "add_mapping/create_mapinfo");
3412 3300
3413 set_bit(RELEASE_MAPINFO, &ep->com.flags);
3414 state_set(&ep->com, LISTEN); 3301 state_set(&ep->com, LISTEN);
3415 if (ep->com.local_addr.ss_family == AF_INET) 3302 if (ep->com.local_addr.ss_family == AF_INET)
3416 err = create_server4(dev, ep); 3303 err = create_server4(dev, ep);
@@ -3421,7 +3308,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3421 goto out; 3308 goto out;
3422 } 3309 }
3423 3310
3424fail3:
3425 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, 3311 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3426 ep->com.local_addr.ss_family); 3312 ep->com.local_addr.ss_family);
3427fail2: 3313fail2:
@@ -3456,7 +3342,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
3456 goto done; 3342 goto done;
3457 err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 3343 err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
3458 0, 0, __func__); 3344 0, 0, __func__);
3459 sin6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; 3345 sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3460 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 3346 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3461 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 3347 (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3462 } 3348 }
@@ -3580,7 +3466,7 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3580 state_set(&ep->com, DEAD); 3466 state_set(&ep->com, DEAD);
3581 if (ep->com.remote_addr.ss_family == AF_INET6) { 3467 if (ep->com.remote_addr.ss_family == AF_INET6) {
3582 struct sockaddr_in6 *sin6 = 3468 struct sockaddr_in6 *sin6 =
3583 (struct sockaddr_in6 *)&ep->com.mapped_local_addr; 3469 (struct sockaddr_in6 *)&ep->com.local_addr;
3584 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 3470 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3585 (const u32 *)&sin6->sin6_addr.s6_addr, 1); 3471 (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3586 } 3472 }
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index cf21df4a8bf5..b4eeb783573c 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -815,8 +815,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
815 } 815 }
816 } 816 }
817out: 817out:
818 if (wq) 818 if (wq) {
819 if (unlikely(qhp->attr.state != C4IW_QP_STATE_RTS)) {
820 if (t4_sq_empty(wq))
821 complete(&qhp->sq_drained);
822 if (t4_rq_empty(wq))
823 complete(&qhp->rq_drained);
824 }
819 spin_unlock(&qhp->lock); 825 spin_unlock(&qhp->lock);
826 }
820 return ret; 827 return ret;
821} 828}
822 829
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 8024ea4417b8..ae2e8b23d2dd 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -87,17 +87,6 @@ struct c4iw_debugfs_data {
87 int pos; 87 int pos;
88}; 88};
89 89
90/* registered cxgb4 netlink callbacks */
91static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
92 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
93 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
94 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
95 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
96 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
97 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
98 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
99};
100
101static int count_idrs(int id, void *p, void *data) 90static int count_idrs(int id, void *p, void *data)
102{ 91{
103 int *countp = data; 92 int *countp = data;
@@ -242,13 +231,13 @@ static int dump_qp(int id, void *p, void *data)
242 if (qp->ep) { 231 if (qp->ep) {
243 if (qp->ep->com.local_addr.ss_family == AF_INET) { 232 if (qp->ep->com.local_addr.ss_family == AF_INET) {
244 struct sockaddr_in *lsin = (struct sockaddr_in *) 233 struct sockaddr_in *lsin = (struct sockaddr_in *)
245 &qp->ep->com.local_addr; 234 &qp->ep->com.cm_id->local_addr;
246 struct sockaddr_in *rsin = (struct sockaddr_in *) 235 struct sockaddr_in *rsin = (struct sockaddr_in *)
247 &qp->ep->com.remote_addr; 236 &qp->ep->com.cm_id->remote_addr;
248 struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) 237 struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
249 &qp->ep->com.mapped_local_addr; 238 &qp->ep->com.cm_id->m_local_addr;
250 struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) 239 struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
251 &qp->ep->com.mapped_remote_addr; 240 &qp->ep->com.cm_id->m_remote_addr;
252 241
253 cc = snprintf(qpd->buf + qpd->pos, space, 242 cc = snprintf(qpd->buf + qpd->pos, space,
254 "rc qp sq id %u rq id %u state %u " 243 "rc qp sq id %u rq id %u state %u "
@@ -264,15 +253,15 @@ static int dump_qp(int id, void *p, void *data)
264 ntohs(mapped_rsin->sin_port)); 253 ntohs(mapped_rsin->sin_port));
265 } else { 254 } else {
266 struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) 255 struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
267 &qp->ep->com.local_addr; 256 &qp->ep->com.cm_id->local_addr;
268 struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) 257 struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
269 &qp->ep->com.remote_addr; 258 &qp->ep->com.cm_id->remote_addr;
270 struct sockaddr_in6 *mapped_lsin6 = 259 struct sockaddr_in6 *mapped_lsin6 =
271 (struct sockaddr_in6 *) 260 (struct sockaddr_in6 *)
272 &qp->ep->com.mapped_local_addr; 261 &qp->ep->com.cm_id->m_local_addr;
273 struct sockaddr_in6 *mapped_rsin6 = 262 struct sockaddr_in6 *mapped_rsin6 =
274 (struct sockaddr_in6 *) 263 (struct sockaddr_in6 *)
275 &qp->ep->com.mapped_remote_addr; 264 &qp->ep->com.cm_id->m_remote_addr;
276 265
277 cc = snprintf(qpd->buf + qpd->pos, space, 266 cc = snprintf(qpd->buf + qpd->pos, space,
278 "rc qp sq id %u rq id %u state %u " 267 "rc qp sq id %u rq id %u state %u "
@@ -545,13 +534,13 @@ static int dump_ep(int id, void *p, void *data)
545 534
546 if (ep->com.local_addr.ss_family == AF_INET) { 535 if (ep->com.local_addr.ss_family == AF_INET) {
547 struct sockaddr_in *lsin = (struct sockaddr_in *) 536 struct sockaddr_in *lsin = (struct sockaddr_in *)
548 &ep->com.local_addr; 537 &ep->com.cm_id->local_addr;
549 struct sockaddr_in *rsin = (struct sockaddr_in *) 538 struct sockaddr_in *rsin = (struct sockaddr_in *)
550 &ep->com.remote_addr; 539 &ep->com.cm_id->remote_addr;
551 struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) 540 struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
552 &ep->com.mapped_local_addr; 541 &ep->com.cm_id->m_local_addr;
553 struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) 542 struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
554 &ep->com.mapped_remote_addr; 543 &ep->com.cm_id->m_remote_addr;
555 544
556 cc = snprintf(epd->buf + epd->pos, space, 545 cc = snprintf(epd->buf + epd->pos, space,
557 "ep %p cm_id %p qp %p state %d flags 0x%lx " 546 "ep %p cm_id %p qp %p state %d flags 0x%lx "
@@ -569,13 +558,13 @@ static int dump_ep(int id, void *p, void *data)
569 ntohs(mapped_rsin->sin_port)); 558 ntohs(mapped_rsin->sin_port));
570 } else { 559 } else {
571 struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) 560 struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
572 &ep->com.local_addr; 561 &ep->com.cm_id->local_addr;
573 struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) 562 struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
574 &ep->com.remote_addr; 563 &ep->com.cm_id->remote_addr;
575 struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) 564 struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
576 &ep->com.mapped_local_addr; 565 &ep->com.cm_id->m_local_addr;
577 struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *) 566 struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
578 &ep->com.mapped_remote_addr; 567 &ep->com.cm_id->m_remote_addr;
579 568
580 cc = snprintf(epd->buf + epd->pos, space, 569 cc = snprintf(epd->buf + epd->pos, space,
581 "ep %p cm_id %p qp %p state %d flags 0x%lx " 570 "ep %p cm_id %p qp %p state %d flags 0x%lx "
@@ -610,9 +599,9 @@ static int dump_listen_ep(int id, void *p, void *data)
610 599
611 if (ep->com.local_addr.ss_family == AF_INET) { 600 if (ep->com.local_addr.ss_family == AF_INET) {
612 struct sockaddr_in *lsin = (struct sockaddr_in *) 601 struct sockaddr_in *lsin = (struct sockaddr_in *)
613 &ep->com.local_addr; 602 &ep->com.cm_id->local_addr;
614 struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) 603 struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
615 &ep->com.mapped_local_addr; 604 &ep->com.cm_id->m_local_addr;
616 605
617 cc = snprintf(epd->buf + epd->pos, space, 606 cc = snprintf(epd->buf + epd->pos, space,
618 "ep %p cm_id %p state %d flags 0x%lx stid %d " 607 "ep %p cm_id %p state %d flags 0x%lx stid %d "
@@ -623,9 +612,9 @@ static int dump_listen_ep(int id, void *p, void *data)
623 ntohs(mapped_lsin->sin_port)); 612 ntohs(mapped_lsin->sin_port));
624 } else { 613 } else {
625 struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) 614 struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
626 &ep->com.local_addr; 615 &ep->com.cm_id->local_addr;
627 struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) 616 struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
628 &ep->com.mapped_local_addr; 617 &ep->com.cm_id->m_local_addr;
629 618
630 cc = snprintf(epd->buf + epd->pos, space, 619 cc = snprintf(epd->buf + epd->pos, space,
631 "ep %p cm_id %p state %d flags 0x%lx stid %d " 620 "ep %p cm_id %p state %d flags 0x%lx stid %d "
@@ -801,10 +790,9 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
801 rdev->lldi.vr->qp.size, 790 rdev->lldi.vr->qp.size,
802 rdev->lldi.vr->cq.start, 791 rdev->lldi.vr->cq.start,
803 rdev->lldi.vr->cq.size); 792 rdev->lldi.vr->cq.size);
804 PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p " 793 PDBG("udb %pR db_reg %p gts_reg %p "
805 "qpmask 0x%x cqmask 0x%x\n", 794 "qpmask 0x%x cqmask 0x%x\n",
806 (unsigned)pci_resource_len(rdev->lldi.pdev, 2), 795 &rdev->lldi.pdev->resource[2],
807 (void *)pci_resource_start(rdev->lldi.pdev, 2),
808 rdev->lldi.db_reg, rdev->lldi.gts_reg, 796 rdev->lldi.db_reg, rdev->lldi.gts_reg,
809 rdev->qpmask, rdev->cqmask); 797 rdev->qpmask, rdev->cqmask);
810 798
@@ -1506,20 +1494,6 @@ static int __init c4iw_init_module(void)
1506 printk(KERN_WARNING MOD 1494 printk(KERN_WARNING MOD
1507 "could not create debugfs entry, continuing\n"); 1495 "could not create debugfs entry, continuing\n");
1508 1496
1509 if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS,
1510 c4iw_nl_cb_table))
1511 pr_err("%s[%u]: Failed to add netlink callback\n"
1512 , __func__, __LINE__);
1513
1514 err = iwpm_init(RDMA_NL_C4IW);
1515 if (err) {
1516 pr_err("port mapper initialization failed with %d\n", err);
1517 ibnl_remove_client(RDMA_NL_C4IW);
1518 c4iw_cm_term();
1519 debugfs_remove_recursive(c4iw_debugfs_root);
1520 return err;
1521 }
1522
1523 cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info); 1497 cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
1524 1498
1525 return 0; 1499 return 0;
@@ -1537,8 +1511,6 @@ static void __exit c4iw_exit_module(void)
1537 } 1511 }
1538 mutex_unlock(&dev_mutex); 1512 mutex_unlock(&dev_mutex);
1539 cxgb4_unregister_uld(CXGB4_ULD_RDMA); 1513 cxgb4_unregister_uld(CXGB4_ULD_RDMA);
1540 iwpm_exit(RDMA_NL_C4IW);
1541 ibnl_remove_client(RDMA_NL_C4IW);
1542 c4iw_cm_term(); 1514 c4iw_cm_term();
1543 debugfs_remove_recursive(c4iw_debugfs_root); 1515 debugfs_remove_recursive(c4iw_debugfs_root);
1544} 1516}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index fb2de75a0392..df43f871ab61 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -476,6 +476,8 @@ struct c4iw_qp {
476 wait_queue_head_t wait; 476 wait_queue_head_t wait;
477 struct timer_list timer; 477 struct timer_list timer;
478 int sq_sig_all; 478 int sq_sig_all;
479 struct completion rq_drained;
480 struct completion sq_drained;
479}; 481};
480 482
481static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) 483static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
@@ -753,7 +755,6 @@ enum c4iw_ep_flags {
753 CLOSE_SENT = 3, 755 CLOSE_SENT = 3,
754 TIMEOUT = 4, 756 TIMEOUT = 4,
755 QP_REFERENCED = 5, 757 QP_REFERENCED = 5,
756 RELEASE_MAPINFO = 6,
757}; 758};
758 759
759enum c4iw_ep_history { 760enum c4iw_ep_history {
@@ -790,8 +791,6 @@ struct c4iw_ep_common {
790 struct mutex mutex; 791 struct mutex mutex;
791 struct sockaddr_storage local_addr; 792 struct sockaddr_storage local_addr;
792 struct sockaddr_storage remote_addr; 793 struct sockaddr_storage remote_addr;
793 struct sockaddr_storage mapped_local_addr;
794 struct sockaddr_storage mapped_remote_addr;
795 struct c4iw_wr_wait wr_wait; 794 struct c4iw_wr_wait wr_wait;
796 unsigned long flags; 795 unsigned long flags;
797 unsigned long history; 796 unsigned long history;
@@ -843,45 +842,6 @@ struct c4iw_ep {
843 struct c4iw_ep_stats stats; 842 struct c4iw_ep_stats stats;
844}; 843};
845 844
846static inline void print_addr(struct c4iw_ep_common *epc, const char *func,
847 const char *msg)
848{
849
850#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr))
851#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port)
852#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr))
853#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port)
854
855 if (c4iw_debug) {
856 switch (epc->local_addr.ss_family) {
857 case AF_INET:
858 PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n",
859 func, msg, SINA(&epc->local_addr),
860 SINP(&epc->local_addr),
861 SINP(&epc->mapped_local_addr),
862 SINA(&epc->remote_addr),
863 SINP(&epc->remote_addr),
864 SINP(&epc->mapped_remote_addr));
865 break;
866 case AF_INET6:
867 PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n",
868 func, msg, SIN6A(&epc->local_addr),
869 SIN6P(&epc->local_addr),
870 SIN6P(&epc->mapped_local_addr),
871 SIN6A(&epc->remote_addr),
872 SIN6P(&epc->remote_addr),
873 SIN6P(&epc->mapped_remote_addr));
874 break;
875 default:
876 break;
877 }
878 }
879#undef SINA
880#undef SINP
881#undef SIN6A
882#undef SIN6P
883}
884
885static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) 845static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
886{ 846{
887 return cm_id->provider_data; 847 return cm_id->provider_data;
@@ -961,7 +921,8 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr,
961 struct scatterlist *sg, 921 struct scatterlist *sg,
962 int sg_nents); 922 int sg_nents);
963int c4iw_dealloc_mw(struct ib_mw *mw); 923int c4iw_dealloc_mw(struct ib_mw *mw);
964struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); 924struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
925 struct ib_udata *udata);
965struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, 926struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
966 u64 length, u64 virt, int acc, 927 u64 length, u64 virt, int acc,
967 struct ib_udata *udata); 928 struct ib_udata *udata);
@@ -1016,6 +977,8 @@ extern int c4iw_wr_log;
1016extern int db_fc_threshold; 977extern int db_fc_threshold;
1017extern int db_coalescing_threshold; 978extern int db_coalescing_threshold;
1018extern int use_dsgl; 979extern int use_dsgl;
980void c4iw_drain_rq(struct ib_qp *qp);
981void c4iw_drain_sq(struct ib_qp *qp);
1019 982
1020 983
1021#endif 984#endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 7849890c4781..008be07d5604 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -34,6 +34,7 @@
34#include <linux/moduleparam.h> 34#include <linux/moduleparam.h>
35#include <rdma/ib_umem.h> 35#include <rdma/ib_umem.h>
36#include <linux/atomic.h> 36#include <linux/atomic.h>
37#include <rdma/ib_user_verbs.h>
37 38
38#include "iw_cxgb4.h" 39#include "iw_cxgb4.h"
39 40
@@ -552,7 +553,8 @@ err:
552 return ERR_PTR(err); 553 return ERR_PTR(err);
553} 554}
554 555
555struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) 556struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
557 struct ib_udata *udata)
556{ 558{
557 struct c4iw_dev *rhp; 559 struct c4iw_dev *rhp;
558 struct c4iw_pd *php; 560 struct c4iw_pd *php;
@@ -617,12 +619,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
617 int ret = 0; 619 int ret = 0;
618 int length = roundup(max_num_sg * sizeof(u64), 32); 620 int length = roundup(max_num_sg * sizeof(u64), 32);
619 621
622 php = to_c4iw_pd(pd);
623 rhp = php->rhp;
624
620 if (mr_type != IB_MR_TYPE_MEM_REG || 625 if (mr_type != IB_MR_TYPE_MEM_REG ||
621 max_num_sg > t4_max_fr_depth(use_dsgl)) 626 max_num_sg > t4_max_fr_depth(&rhp->rdev.lldi.ulptx_memwrite_dsgl &&
627 use_dsgl))
622 return ERR_PTR(-EINVAL); 628 return ERR_PTR(-EINVAL);
623 629
624 php = to_c4iw_pd(pd);
625 rhp = php->rhp;
626 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); 630 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
627 if (!mhp) { 631 if (!mhp) {
628 ret = -ENOMEM; 632 ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index ec04272fbdc2..124682dc5709 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -339,7 +339,8 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
339 props->max_mr = c4iw_num_stags(&dev->rdev); 339 props->max_mr = c4iw_num_stags(&dev->rdev);
340 props->max_pd = T4_MAX_NUM_PD; 340 props->max_pd = T4_MAX_NUM_PD;
341 props->local_ca_ack_delay = 0; 341 props->local_ca_ack_delay = 0;
342 props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl); 342 props->max_fast_reg_page_list_len =
343 t4_max_fr_depth(dev->rdev.lldi.ulptx_memwrite_dsgl && use_dsgl);
343 344
344 return 0; 345 return 0;
345} 346}
@@ -564,6 +565,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
564 dev->ibdev.get_protocol_stats = c4iw_get_mib; 565 dev->ibdev.get_protocol_stats = c4iw_get_mib;
565 dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; 566 dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
566 dev->ibdev.get_port_immutable = c4iw_port_immutable; 567 dev->ibdev.get_port_immutable = c4iw_port_immutable;
568 dev->ibdev.drain_sq = c4iw_drain_sq;
569 dev->ibdev.drain_rq = c4iw_drain_rq;
567 570
568 dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); 571 dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
569 if (!dev->ibdev.iwcm) 572 if (!dev->ibdev.iwcm)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index e99345eb875a..e17fb5d5e033 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -606,7 +606,7 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
606} 606}
607 607
608static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, 608static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
609 struct ib_reg_wr *wr, u8 *len16, u8 t5dev) 609 struct ib_reg_wr *wr, u8 *len16, bool dsgl_supported)
610{ 610{
611 struct c4iw_mr *mhp = to_c4iw_mr(wr->mr); 611 struct c4iw_mr *mhp = to_c4iw_mr(wr->mr);
612 struct fw_ri_immd *imdp; 612 struct fw_ri_immd *imdp;
@@ -615,7 +615,7 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
615 int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32); 615 int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
616 int rem; 616 int rem;
617 617
618 if (mhp->mpl_len > t4_max_fr_depth(use_dsgl)) 618 if (mhp->mpl_len > t4_max_fr_depth(dsgl_supported && use_dsgl))
619 return -EINVAL; 619 return -EINVAL;
620 620
621 wqe->fr.qpbinde_to_dcacpu = 0; 621 wqe->fr.qpbinde_to_dcacpu = 0;
@@ -629,7 +629,7 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
629 wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 629 wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
630 0xffffffff); 630 0xffffffff);
631 631
632 if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { 632 if (dsgl_supported && use_dsgl && (pbllen > max_fr_immd)) {
633 struct fw_ri_dsgl *sglp; 633 struct fw_ri_dsgl *sglp;
634 634
635 for (i = 0; i < mhp->mpl_len; i++) 635 for (i = 0; i < mhp->mpl_len; i++)
@@ -808,9 +808,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
808 fw_opcode = FW_RI_FR_NSMR_WR; 808 fw_opcode = FW_RI_FR_NSMR_WR;
809 swsqe->opcode = FW_RI_FAST_REGISTER; 809 swsqe->opcode = FW_RI_FAST_REGISTER;
810 err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16, 810 err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16,
811 is_t5( 811 qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl);
812 qhp->rhp->rdev.lldi.adapter_type) ?
813 1 : 0);
814 break; 812 break;
815 case IB_WR_LOCAL_INV: 813 case IB_WR_LOCAL_INV:
816 if (wr->send_flags & IB_SEND_FENCE) 814 if (wr->send_flags & IB_SEND_FENCE)
@@ -1621,7 +1619,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1621 unsigned int sqsize, rqsize; 1619 unsigned int sqsize, rqsize;
1622 struct c4iw_ucontext *ucontext; 1620 struct c4iw_ucontext *ucontext;
1623 int ret; 1621 int ret;
1624 struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; 1622 struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
1623 struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
1625 1624
1626 PDBG("%s ib_pd %p\n", __func__, pd); 1625 PDBG("%s ib_pd %p\n", __func__, pd);
1627 1626
@@ -1697,6 +1696,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1697 qhp->attr.max_ird = 0; 1696 qhp->attr.max_ird = 0;
1698 qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; 1697 qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
1699 spin_lock_init(&qhp->lock); 1698 spin_lock_init(&qhp->lock);
1699 init_completion(&qhp->sq_drained);
1700 init_completion(&qhp->rq_drained);
1700 mutex_init(&qhp->mutex); 1701 mutex_init(&qhp->mutex);
1701 init_waitqueue_head(&qhp->wait); 1702 init_waitqueue_head(&qhp->wait);
1702 atomic_set(&qhp->refcnt, 1); 1703 atomic_set(&qhp->refcnt, 1);
@@ -1706,29 +1707,30 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1706 goto err2; 1707 goto err2;
1707 1708
1708 if (udata) { 1709 if (udata) {
1709 mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); 1710 sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL);
1710 if (!mm1) { 1711 if (!sq_key_mm) {
1711 ret = -ENOMEM; 1712 ret = -ENOMEM;
1712 goto err3; 1713 goto err3;
1713 } 1714 }
1714 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); 1715 rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
1715 if (!mm2) { 1716 if (!rq_key_mm) {
1716 ret = -ENOMEM; 1717 ret = -ENOMEM;
1717 goto err4; 1718 goto err4;
1718 } 1719 }
1719 mm3 = kmalloc(sizeof *mm3, GFP_KERNEL); 1720 sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
1720 if (!mm3) { 1721 if (!sq_db_key_mm) {
1721 ret = -ENOMEM; 1722 ret = -ENOMEM;
1722 goto err5; 1723 goto err5;
1723 } 1724 }
1724 mm4 = kmalloc(sizeof *mm4, GFP_KERNEL); 1725 rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
1725 if (!mm4) { 1726 if (!rq_db_key_mm) {
1726 ret = -ENOMEM; 1727 ret = -ENOMEM;
1727 goto err6; 1728 goto err6;
1728 } 1729 }
1729 if (t4_sq_onchip(&qhp->wq.sq)) { 1730 if (t4_sq_onchip(&qhp->wq.sq)) {
1730 mm5 = kmalloc(sizeof *mm5, GFP_KERNEL); 1731 ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
1731 if (!mm5) { 1732 GFP_KERNEL);
1733 if (!ma_sync_key_mm) {
1732 ret = -ENOMEM; 1734 ret = -ENOMEM;
1733 goto err7; 1735 goto err7;
1734 } 1736 }
@@ -1743,7 +1745,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1743 uresp.rq_size = qhp->wq.rq.size; 1745 uresp.rq_size = qhp->wq.rq.size;
1744 uresp.rq_memsize = qhp->wq.rq.memsize; 1746 uresp.rq_memsize = qhp->wq.rq.memsize;
1745 spin_lock(&ucontext->mmap_lock); 1747 spin_lock(&ucontext->mmap_lock);
1746 if (mm5) { 1748 if (ma_sync_key_mm) {
1747 uresp.ma_sync_key = ucontext->key; 1749 uresp.ma_sync_key = ucontext->key;
1748 ucontext->key += PAGE_SIZE; 1750 ucontext->key += PAGE_SIZE;
1749 } else { 1751 } else {
@@ -1761,28 +1763,29 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1761 ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); 1763 ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
1762 if (ret) 1764 if (ret)
1763 goto err8; 1765 goto err8;
1764 mm1->key = uresp.sq_key; 1766 sq_key_mm->key = uresp.sq_key;
1765 mm1->addr = qhp->wq.sq.phys_addr; 1767 sq_key_mm->addr = qhp->wq.sq.phys_addr;
1766 mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); 1768 sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
1767 insert_mmap(ucontext, mm1); 1769 insert_mmap(ucontext, sq_key_mm);
1768 mm2->key = uresp.rq_key; 1770 rq_key_mm->key = uresp.rq_key;
1769 mm2->addr = virt_to_phys(qhp->wq.rq.queue); 1771 rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
1770 mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); 1772 rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
1771 insert_mmap(ucontext, mm2); 1773 insert_mmap(ucontext, rq_key_mm);
1772 mm3->key = uresp.sq_db_gts_key; 1774 sq_db_key_mm->key = uresp.sq_db_gts_key;
1773 mm3->addr = (__force unsigned long)qhp->wq.sq.bar2_pa; 1775 sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
1774 mm3->len = PAGE_SIZE; 1776 sq_db_key_mm->len = PAGE_SIZE;
1775 insert_mmap(ucontext, mm3); 1777 insert_mmap(ucontext, sq_db_key_mm);
1776 mm4->key = uresp.rq_db_gts_key; 1778 rq_db_key_mm->key = uresp.rq_db_gts_key;
1777 mm4->addr = (__force unsigned long)qhp->wq.rq.bar2_pa; 1779 rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa;
1778 mm4->len = PAGE_SIZE; 1780 rq_db_key_mm->len = PAGE_SIZE;
1779 insert_mmap(ucontext, mm4); 1781 insert_mmap(ucontext, rq_db_key_mm);
1780 if (mm5) { 1782 if (ma_sync_key_mm) {
1781 mm5->key = uresp.ma_sync_key; 1783 ma_sync_key_mm->key = uresp.ma_sync_key;
1782 mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0) 1784 ma_sync_key_mm->addr =
1783 + PCIE_MA_SYNC_A) & PAGE_MASK; 1785 (pci_resource_start(rhp->rdev.lldi.pdev, 0) +
1784 mm5->len = PAGE_SIZE; 1786 PCIE_MA_SYNC_A) & PAGE_MASK;
1785 insert_mmap(ucontext, mm5); 1787 ma_sync_key_mm->len = PAGE_SIZE;
1788 insert_mmap(ucontext, ma_sync_key_mm);
1786 } 1789 }
1787 } 1790 }
1788 qhp->ibqp.qp_num = qhp->wq.sq.qid; 1791 qhp->ibqp.qp_num = qhp->wq.sq.qid;
@@ -1795,15 +1798,15 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1795 qhp->wq.rq.memsize, attrs->cap.max_recv_wr); 1798 qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
1796 return &qhp->ibqp; 1799 return &qhp->ibqp;
1797err8: 1800err8:
1798 kfree(mm5); 1801 kfree(ma_sync_key_mm);
1799err7: 1802err7:
1800 kfree(mm4); 1803 kfree(rq_db_key_mm);
1801err6: 1804err6:
1802 kfree(mm3); 1805 kfree(sq_db_key_mm);
1803err5: 1806err5:
1804 kfree(mm2); 1807 kfree(rq_key_mm);
1805err4: 1808err4:
1806 kfree(mm1); 1809 kfree(sq_key_mm);
1807err3: 1810err3:
1808 remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); 1811 remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
1809err2: 1812err2:
@@ -1888,3 +1891,17 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1888 init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; 1891 init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
1889 return 0; 1892 return 0;
1890} 1893}
1894
1895void c4iw_drain_sq(struct ib_qp *ibqp)
1896{
1897 struct c4iw_qp *qp = to_c4iw_qp(ibqp);
1898
1899 wait_for_completion(&qp->sq_drained);
1900}
1901
1902void c4iw_drain_rq(struct ib_qp *ibqp)
1903{
1904 struct c4iw_qp *qp = to_c4iw_qp(ibqp);
1905
1906 wait_for_completion(&qp->rq_drained);
1907}
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 21cb41a60fe8..c74ef2620b85 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -310,7 +310,7 @@ static void aliasguid_query_handler(int status,
310 if (status) { 310 if (status) {
311 pr_debug("(port: %d) failed: status = %d\n", 311 pr_debug("(port: %d) failed: status = %d\n",
312 cb_ctx->port, status); 312 cb_ctx->port, status);
313 rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC; 313 rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC;
314 goto out; 314 goto out;
315 } 315 }
316 316
@@ -416,7 +416,7 @@ next_entry:
416 be64_to_cpu((__force __be64)rec->guid_indexes), 416 be64_to_cpu((__force __be64)rec->guid_indexes),
417 be64_to_cpu((__force __be64)applied_guid_indexes), 417 be64_to_cpu((__force __be64)applied_guid_indexes),
418 be64_to_cpu((__force __be64)declined_guid_indexes)); 418 be64_to_cpu((__force __be64)declined_guid_indexes));
419 rec->time_to_run = ktime_get_real_ns() + 419 rec->time_to_run = ktime_get_boot_ns() +
420 resched_delay_sec * NSEC_PER_SEC; 420 resched_delay_sec * NSEC_PER_SEC;
421 } else { 421 } else {
422 rec->status = MLX4_GUID_INFO_STATUS_SET; 422 rec->status = MLX4_GUID_INFO_STATUS_SET;
@@ -708,7 +708,7 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
708 } 708 }
709 } 709 }
710 if (resched_delay_sec) { 710 if (resched_delay_sec) {
711 u64 curr_time = ktime_get_real_ns(); 711 u64 curr_time = ktime_get_boot_ns();
712 712
713 *resched_delay_sec = (low_record_time < curr_time) ? 0 : 713 *resched_delay_sec = (low_record_time < curr_time) ? 0 :
714 div_u64((low_record_time - curr_time), NSEC_PER_SEC); 714 div_u64((low_record_time - curr_time), NSEC_PER_SEC);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 1c7ab6cabbb8..914bc98e753f 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1643,6 +1643,56 @@ static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_
1643 return err; 1643 return err;
1644} 1644}
1645 1645
1646static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
1647 struct ib_flow_attr *flow_attr,
1648 enum mlx4_net_trans_promisc_mode *type)
1649{
1650 int err = 0;
1651
1652 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
1653 (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
1654 (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
1655 return -EOPNOTSUPP;
1656 }
1657
1658 if (flow_attr->num_of_specs == 0) {
1659 type[0] = MLX4_FS_MC_SNIFFER;
1660 type[1] = MLX4_FS_UC_SNIFFER;
1661 } else {
1662 union ib_flow_spec *ib_spec;
1663
1664 ib_spec = (union ib_flow_spec *)(flow_attr + 1);
1665 if (ib_spec->type != IB_FLOW_SPEC_ETH)
1666 return -EINVAL;
1667
1668 /* if all is zero than MC and UC */
1669 if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
1670 type[0] = MLX4_FS_MC_SNIFFER;
1671 type[1] = MLX4_FS_UC_SNIFFER;
1672 } else {
1673 u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
1674 ib_spec->eth.mask.dst_mac[1],
1675 ib_spec->eth.mask.dst_mac[2],
1676 ib_spec->eth.mask.dst_mac[3],
1677 ib_spec->eth.mask.dst_mac[4],
1678 ib_spec->eth.mask.dst_mac[5]};
1679
1680 /* Above xor was only on MC bit, non empty mask is valid
1681 * only if this bit is set and rest are zero.
1682 */
1683 if (!is_zero_ether_addr(&mac[0]))
1684 return -EINVAL;
1685
1686 if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
1687 type[0] = MLX4_FS_MC_SNIFFER;
1688 else
1689 type[0] = MLX4_FS_UC_SNIFFER;
1690 }
1691 }
1692
1693 return err;
1694}
1695
1646static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, 1696static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1647 struct ib_flow_attr *flow_attr, 1697 struct ib_flow_attr *flow_attr,
1648 int domain) 1698 int domain)
@@ -1653,6 +1703,10 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1653 struct mlx4_dev *dev = (to_mdev(qp->device))->dev; 1703 struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
1654 int is_bonded = mlx4_is_bonded(dev); 1704 int is_bonded = mlx4_is_bonded(dev);
1655 1705
1706 if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
1707 (flow_attr->type != IB_FLOW_ATTR_NORMAL))
1708 return ERR_PTR(-EOPNOTSUPP);
1709
1656 memset(type, 0, sizeof(type)); 1710 memset(type, 0, sizeof(type));
1657 1711
1658 mflow = kzalloc(sizeof(*mflow), GFP_KERNEL); 1712 mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
@@ -1663,7 +1717,19 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1663 1717
1664 switch (flow_attr->type) { 1718 switch (flow_attr->type) {
1665 case IB_FLOW_ATTR_NORMAL: 1719 case IB_FLOW_ATTR_NORMAL:
1666 type[0] = MLX4_FS_REGULAR; 1720 /* If dont trap flag (continue match) is set, under specific
1721 * condition traffic be replicated to given qp,
1722 * without stealing it
1723 */
1724 if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
1725 err = mlx4_ib_add_dont_trap_rule(dev,
1726 flow_attr,
1727 type);
1728 if (err)
1729 goto err_free;
1730 } else {
1731 type[0] = MLX4_FS_REGULAR;
1732 }
1667 break; 1733 break;
1668 1734
1669 case IB_FLOW_ATTR_ALL_DEFAULT: 1735 case IB_FLOW_ATTR_ALL_DEFAULT:
@@ -1675,8 +1741,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1675 break; 1741 break;
1676 1742
1677 case IB_FLOW_ATTR_SNIFFER: 1743 case IB_FLOW_ATTR_SNIFFER:
1678 type[0] = MLX4_FS_UC_SNIFFER; 1744 type[0] = MLX4_FS_MIRROR_RX_PORT;
1679 type[1] = MLX4_FS_MC_SNIFFER; 1745 type[1] = MLX4_FS_MIRROR_SX_PORT;
1680 break; 1746 break;
1681 1747
1682 default: 1748 default:
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 52ce7b000044..1eca01cebe51 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -711,7 +711,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
711 u64 virt_addr, int access_flags, 711 u64 virt_addr, int access_flags,
712 struct ib_udata *udata); 712 struct ib_udata *udata);
713int mlx4_ib_dereg_mr(struct ib_mr *mr); 713int mlx4_ib_dereg_mr(struct ib_mr *mr);
714struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); 714struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
715 struct ib_udata *udata);
715int mlx4_ib_dealloc_mw(struct ib_mw *mw); 716int mlx4_ib_dealloc_mw(struct ib_mw *mw);
716struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, 717struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
717 enum ib_mr_type mr_type, 718 enum ib_mr_type mr_type,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 242b94ec105b..ce0b5aa8eb9b 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -32,6 +32,7 @@
32 */ 32 */
33 33
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <rdma/ib_user_verbs.h>
35 36
36#include "mlx4_ib.h" 37#include "mlx4_ib.h"
37 38
@@ -334,7 +335,8 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
334 return 0; 335 return 0;
335} 336}
336 337
337struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) 338struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
339 struct ib_udata *udata)
338{ 340{
339 struct mlx4_ib_dev *dev = to_mdev(pd->device); 341 struct mlx4_ib_dev *dev = to_mdev(pd->device);
340 struct mlx4_ib_mw *mw; 342 struct mlx4_ib_mw *mw;
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 27a70159e2ea..4e851889355a 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
1obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o 1obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
2 2
3mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o 3mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o
4mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o 4mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index fd1de31e0611..a00ba4418de9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -207,7 +207,10 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
207 break; 207 break;
208 case MLX5_CQE_RESP_SEND: 208 case MLX5_CQE_RESP_SEND:
209 wc->opcode = IB_WC_RECV; 209 wc->opcode = IB_WC_RECV;
210 wc->wc_flags = 0; 210 wc->wc_flags = IB_WC_IP_CSUM_OK;
211 if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) &&
212 (cqe->hds_ip_ext & CQE_L4_OK))))
213 wc->wc_flags = 0;
211 break; 214 break;
212 case MLX5_CQE_RESP_SEND_IMM: 215 case MLX5_CQE_RESP_SEND_IMM:
213 wc->opcode = IB_WC_RECV; 216 wc->opcode = IB_WC_RECV;
@@ -431,7 +434,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
431 struct mlx5_core_qp *mqp; 434 struct mlx5_core_qp *mqp;
432 struct mlx5_ib_wq *wq; 435 struct mlx5_ib_wq *wq;
433 struct mlx5_sig_err_cqe *sig_err_cqe; 436 struct mlx5_sig_err_cqe *sig_err_cqe;
434 struct mlx5_core_mr *mmr; 437 struct mlx5_core_mkey *mmkey;
435 struct mlx5_ib_mr *mr; 438 struct mlx5_ib_mr *mr;
436 uint8_t opcode; 439 uint8_t opcode;
437 uint32_t qpn; 440 uint32_t qpn;
@@ -536,17 +539,17 @@ repoll:
536 case MLX5_CQE_SIG_ERR: 539 case MLX5_CQE_SIG_ERR:
537 sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; 540 sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
538 541
539 read_lock(&dev->mdev->priv.mr_table.lock); 542 read_lock(&dev->mdev->priv.mkey_table.lock);
540 mmr = __mlx5_mr_lookup(dev->mdev, 543 mmkey = __mlx5_mr_lookup(dev->mdev,
541 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); 544 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
542 if (unlikely(!mmr)) { 545 if (unlikely(!mmkey)) {
543 read_unlock(&dev->mdev->priv.mr_table.lock); 546 read_unlock(&dev->mdev->priv.mkey_table.lock);
544 mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", 547 mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
545 cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); 548 cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
546 return -EINVAL; 549 return -EINVAL;
547 } 550 }
548 551
549 mr = to_mibmr(mmr); 552 mr = to_mibmr(mmkey);
550 get_sig_err_item(sig_err_cqe, &mr->sig->err_item); 553 get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
551 mr->sig->sig_err_exists = true; 554 mr->sig->sig_err_exists = true;
552 mr->sig->sigerr_count++; 555 mr->sig->sigerr_count++;
@@ -558,25 +561,51 @@ repoll:
558 mr->sig->err_item.expected, 561 mr->sig->err_item.expected,
559 mr->sig->err_item.actual); 562 mr->sig->err_item.actual);
560 563
561 read_unlock(&dev->mdev->priv.mr_table.lock); 564 read_unlock(&dev->mdev->priv.mkey_table.lock);
562 goto repoll; 565 goto repoll;
563 } 566 }
564 567
565 return 0; 568 return 0;
566} 569}
567 570
571static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
572 struct ib_wc *wc)
573{
574 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
575 struct mlx5_ib_wc *soft_wc, *next;
576 int npolled = 0;
577
578 list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) {
579 if (npolled >= num_entries)
580 break;
581
582 mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
583 cq->mcq.cqn);
584
585 wc[npolled++] = soft_wc->wc;
586 list_del(&soft_wc->list);
587 kfree(soft_wc);
588 }
589
590 return npolled;
591}
592
568int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 593int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
569{ 594{
570 struct mlx5_ib_cq *cq = to_mcq(ibcq); 595 struct mlx5_ib_cq *cq = to_mcq(ibcq);
571 struct mlx5_ib_qp *cur_qp = NULL; 596 struct mlx5_ib_qp *cur_qp = NULL;
572 unsigned long flags; 597 unsigned long flags;
598 int soft_polled = 0;
573 int npolled; 599 int npolled;
574 int err = 0; 600 int err = 0;
575 601
576 spin_lock_irqsave(&cq->lock, flags); 602 spin_lock_irqsave(&cq->lock, flags);
577 603
578 for (npolled = 0; npolled < num_entries; npolled++) { 604 if (unlikely(!list_empty(&cq->wc_list)))
579 err = mlx5_poll_one(cq, &cur_qp, wc + npolled); 605 soft_polled = poll_soft_wc(cq, num_entries, wc);
606
607 for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
608 err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled);
580 if (err) 609 if (err)
581 break; 610 break;
582 } 611 }
@@ -587,7 +616,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
587 spin_unlock_irqrestore(&cq->lock, flags); 616 spin_unlock_irqrestore(&cq->lock, flags);
588 617
589 if (err == 0 || err == -EAGAIN) 618 if (err == 0 || err == -EAGAIN)
590 return npolled; 619 return soft_polled + npolled;
591 else 620 else
592 return err; 621 return err;
593} 622}
@@ -595,16 +624,27 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
595int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 624int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
596{ 625{
597 struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev; 626 struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
627 struct mlx5_ib_cq *cq = to_mcq(ibcq);
598 void __iomem *uar_page = mdev->priv.uuari.uars[0].map; 628 void __iomem *uar_page = mdev->priv.uuari.uars[0].map;
629 unsigned long irq_flags;
630 int ret = 0;
631
632 spin_lock_irqsave(&cq->lock, irq_flags);
633 if (cq->notify_flags != IB_CQ_NEXT_COMP)
634 cq->notify_flags = flags & IB_CQ_SOLICITED_MASK;
599 635
600 mlx5_cq_arm(&to_mcq(ibcq)->mcq, 636 if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list))
637 ret = 1;
638 spin_unlock_irqrestore(&cq->lock, irq_flags);
639
640 mlx5_cq_arm(&cq->mcq,
601 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 641 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
602 MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, 642 MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
603 uar_page, 643 uar_page,
604 MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock), 644 MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
605 to_mcq(ibcq)->mcq.cons_index); 645 to_mcq(ibcq)->mcq.cons_index);
606 646
607 return 0; 647 return ret;
608} 648}
609 649
610static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, 650static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
@@ -757,6 +797,14 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
757 mlx5_db_free(dev->mdev, &cq->db); 797 mlx5_db_free(dev->mdev, &cq->db);
758} 798}
759 799
800static void notify_soft_wc_handler(struct work_struct *work)
801{
802 struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq,
803 notify_work);
804
805 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
806}
807
760struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, 808struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
761 const struct ib_cq_init_attr *attr, 809 const struct ib_cq_init_attr *attr,
762 struct ib_ucontext *context, 810 struct ib_ucontext *context,
@@ -807,6 +855,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
807 &index, &inlen); 855 &index, &inlen);
808 if (err) 856 if (err)
809 goto err_create; 857 goto err_create;
858
859 INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
810 } 860 }
811 861
812 cq->cqe_size = cqe_size; 862 cq->cqe_size = cqe_size;
@@ -832,6 +882,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
832 cq->mcq.comp = mlx5_ib_cq_comp; 882 cq->mcq.comp = mlx5_ib_cq_comp;
833 cq->mcq.event = mlx5_ib_cq_event; 883 cq->mcq.event = mlx5_ib_cq_event;
834 884
885 INIT_LIST_HEAD(&cq->wc_list);
886
835 if (context) 887 if (context)
836 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { 888 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
837 err = -EFAULT; 889 err = -EFAULT;
@@ -1219,3 +1271,27 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
1219 cq = to_mcq(ibcq); 1271 cq = to_mcq(ibcq);
1220 return cq->cqe_size; 1272 return cq->cqe_size;
1221} 1273}
1274
1275/* Called from atomic context */
1276int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
1277{
1278 struct mlx5_ib_wc *soft_wc;
1279 struct mlx5_ib_cq *cq = to_mcq(ibcq);
1280 unsigned long flags;
1281
1282 soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC);
1283 if (!soft_wc)
1284 return -ENOMEM;
1285
1286 soft_wc->wc = *wc;
1287 spin_lock_irqsave(&cq->lock, flags);
1288 list_add_tail(&soft_wc->list, &cq->wc_list);
1289 if (cq->notify_flags == IB_CQ_NEXT_COMP ||
1290 wc->status != IB_WC_SUCCESS) {
1291 cq->notify_flags = 0;
1292 schedule_work(&cq->notify_work);
1293 }
1294 spin_unlock_irqrestore(&cq->lock, flags);
1295
1296 return 0;
1297}
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
new file mode 100644
index 000000000000..53e03c8ede79
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -0,0 +1,548 @@
1/*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "mlx5_ib.h"
34
35struct mlx5_ib_gsi_wr {
36 struct ib_cqe cqe;
37 struct ib_wc wc;
38 int send_flags;
39 bool completed:1;
40};
41
42struct mlx5_ib_gsi_qp {
43 struct ib_qp ibqp;
44 struct ib_qp *rx_qp;
45 u8 port_num;
46 struct ib_qp_cap cap;
47 enum ib_sig_type sq_sig_type;
48 /* Serialize qp state modifications */
49 struct mutex mutex;
50 struct ib_cq *cq;
51 struct mlx5_ib_gsi_wr *outstanding_wrs;
52 u32 outstanding_pi, outstanding_ci;
53 int num_qps;
54 /* Protects access to the tx_qps. Post send operations synchronize
55 * with tx_qp creation in setup_qp(). Also protects the
56 * outstanding_wrs array and indices.
57 */
58 spinlock_t lock;
59 struct ib_qp **tx_qps;
60};
61
62static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
63{
64 return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
65}
66
67static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
68{
69 return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
70}
71
72static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
73{
74 return ++index % gsi->cap.max_send_wr;
75}
76
77#define for_each_outstanding_wr(gsi, index) \
78 for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
79 index = next_outstanding(gsi, index))
80
81/* Call with gsi->lock locked */
82static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
83{
84 struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
85 struct mlx5_ib_gsi_wr *wr;
86 u32 index;
87
88 for_each_outstanding_wr(gsi, index) {
89 wr = &gsi->outstanding_wrs[index];
90
91 if (!wr->completed)
92 break;
93
94 if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
95 wr->send_flags & IB_SEND_SIGNALED)
96 WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
97
98 wr->completed = false;
99 }
100
101 gsi->outstanding_ci = index;
102}
103
104static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
105{
106 struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
107 struct mlx5_ib_gsi_wr *wr =
108 container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
109 u64 wr_id;
110 unsigned long flags;
111
112 spin_lock_irqsave(&gsi->lock, flags);
113 wr->completed = true;
114 wr_id = wr->wc.wr_id;
115 wr->wc = *wc;
116 wr->wc.wr_id = wr_id;
117 wr->wc.qp = &gsi->ibqp;
118
119 generate_completions(gsi);
120 spin_unlock_irqrestore(&gsi->lock, flags);
121}
122
123struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
124 struct ib_qp_init_attr *init_attr)
125{
126 struct mlx5_ib_dev *dev = to_mdev(pd->device);
127 struct mlx5_ib_gsi_qp *gsi;
128 struct ib_qp_init_attr hw_init_attr = *init_attr;
129 const u8 port_num = init_attr->port_num;
130 const int num_pkeys = pd->device->attrs.max_pkeys;
131 const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
132 int ret;
133
134 mlx5_ib_dbg(dev, "creating GSI QP\n");
135
136 if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
137 mlx5_ib_warn(dev,
138 "invalid port number %d during GSI QP creation\n",
139 port_num);
140 return ERR_PTR(-EINVAL);
141 }
142
143 gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
144 if (!gsi)
145 return ERR_PTR(-ENOMEM);
146
147 gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
148 if (!gsi->tx_qps) {
149 ret = -ENOMEM;
150 goto err_free;
151 }
152
153 gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
154 sizeof(*gsi->outstanding_wrs),
155 GFP_KERNEL);
156 if (!gsi->outstanding_wrs) {
157 ret = -ENOMEM;
158 goto err_free_tx;
159 }
160
161 mutex_init(&gsi->mutex);
162
163 mutex_lock(&dev->devr.mutex);
164
165 if (dev->devr.ports[port_num - 1].gsi) {
166 mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
167 port_num);
168 ret = -EBUSY;
169 goto err_free_wrs;
170 }
171 gsi->num_qps = num_qps;
172 spin_lock_init(&gsi->lock);
173
174 gsi->cap = init_attr->cap;
175 gsi->sq_sig_type = init_attr->sq_sig_type;
176 gsi->ibqp.qp_num = 1;
177 gsi->port_num = port_num;
178
179 gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
180 IB_POLL_SOFTIRQ);
181 if (IS_ERR(gsi->cq)) {
182 mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
183 PTR_ERR(gsi->cq));
184 ret = PTR_ERR(gsi->cq);
185 goto err_free_wrs;
186 }
187
188 hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
189 hw_init_attr.send_cq = gsi->cq;
190 if (num_qps) {
191 hw_init_attr.cap.max_send_wr = 0;
192 hw_init_attr.cap.max_send_sge = 0;
193 hw_init_attr.cap.max_inline_data = 0;
194 }
195 gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
196 if (IS_ERR(gsi->rx_qp)) {
197 mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
198 PTR_ERR(gsi->rx_qp));
199 ret = PTR_ERR(gsi->rx_qp);
200 goto err_destroy_cq;
201 }
202
203 dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
204
205 mutex_unlock(&dev->devr.mutex);
206
207 return &gsi->ibqp;
208
209err_destroy_cq:
210 ib_free_cq(gsi->cq);
211err_free_wrs:
212 mutex_unlock(&dev->devr.mutex);
213 kfree(gsi->outstanding_wrs);
214err_free_tx:
215 kfree(gsi->tx_qps);
216err_free:
217 kfree(gsi);
218 return ERR_PTR(ret);
219}
220
221int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
222{
223 struct mlx5_ib_dev *dev = to_mdev(qp->device);
224 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
225 const int port_num = gsi->port_num;
226 int qp_index;
227 int ret;
228
229 mlx5_ib_dbg(dev, "destroying GSI QP\n");
230
231 mutex_lock(&dev->devr.mutex);
232 ret = ib_destroy_qp(gsi->rx_qp);
233 if (ret) {
234 mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
235 ret);
236 mutex_unlock(&dev->devr.mutex);
237 return ret;
238 }
239 dev->devr.ports[port_num - 1].gsi = NULL;
240 mutex_unlock(&dev->devr.mutex);
241 gsi->rx_qp = NULL;
242
243 for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
244 if (!gsi->tx_qps[qp_index])
245 continue;
246 WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
247 gsi->tx_qps[qp_index] = NULL;
248 }
249
250 ib_free_cq(gsi->cq);
251
252 kfree(gsi->outstanding_wrs);
253 kfree(gsi->tx_qps);
254 kfree(gsi);
255
256 return 0;
257}
258
259static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
260{
261 struct ib_pd *pd = gsi->rx_qp->pd;
262 struct ib_qp_init_attr init_attr = {
263 .event_handler = gsi->rx_qp->event_handler,
264 .qp_context = gsi->rx_qp->qp_context,
265 .send_cq = gsi->cq,
266 .recv_cq = gsi->rx_qp->recv_cq,
267 .cap = {
268 .max_send_wr = gsi->cap.max_send_wr,
269 .max_send_sge = gsi->cap.max_send_sge,
270 .max_inline_data = gsi->cap.max_inline_data,
271 },
272 .sq_sig_type = gsi->sq_sig_type,
273 .qp_type = IB_QPT_UD,
274 .create_flags = mlx5_ib_create_qp_sqpn_qp1(),
275 };
276
277 return ib_create_qp(pd, &init_attr);
278}
279
280static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
281 u16 qp_index)
282{
283 struct mlx5_ib_dev *dev = to_mdev(qp->device);
284 struct ib_qp_attr attr;
285 int mask;
286 int ret;
287
288 mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
289 attr.qp_state = IB_QPS_INIT;
290 attr.pkey_index = qp_index;
291 attr.qkey = IB_QP1_QKEY;
292 attr.port_num = gsi->port_num;
293 ret = ib_modify_qp(qp, &attr, mask);
294 if (ret) {
295 mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
296 qp->qp_num, ret);
297 return ret;
298 }
299
300 attr.qp_state = IB_QPS_RTR;
301 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
302 if (ret) {
303 mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
304 qp->qp_num, ret);
305 return ret;
306 }
307
308 attr.qp_state = IB_QPS_RTS;
309 attr.sq_psn = 0;
310 ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
311 if (ret) {
312 mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
313 qp->qp_num, ret);
314 return ret;
315 }
316
317 return 0;
318}
319
320static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
321{
322 struct ib_device *device = gsi->rx_qp->device;
323 struct mlx5_ib_dev *dev = to_mdev(device);
324 struct ib_qp *qp;
325 unsigned long flags;
326 u16 pkey;
327 int ret;
328
329 ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
330 if (ret) {
331 mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
332 gsi->port_num, qp_index);
333 return;
334 }
335
336 if (!pkey) {
337 mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n",
338 gsi->port_num, qp_index);
339 return;
340 }
341
342 spin_lock_irqsave(&gsi->lock, flags);
343 qp = gsi->tx_qps[qp_index];
344 spin_unlock_irqrestore(&gsi->lock, flags);
345 if (qp) {
346 mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
347 gsi->port_num, qp_index);
348 return;
349 }
350
351 qp = create_gsi_ud_qp(gsi);
352 if (IS_ERR(qp)) {
353 mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
354 PTR_ERR(qp));
355 return;
356 }
357
358 ret = modify_to_rts(gsi, qp, qp_index);
359 if (ret)
360 goto err_destroy_qp;
361
362 spin_lock_irqsave(&gsi->lock, flags);
363 WARN_ON_ONCE(gsi->tx_qps[qp_index]);
364 gsi->tx_qps[qp_index] = qp;
365 spin_unlock_irqrestore(&gsi->lock, flags);
366
367 return;
368
369err_destroy_qp:
370 WARN_ON_ONCE(qp);
371}
372
373static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
374{
375 u16 qp_index;
376
377 for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
378 setup_qp(gsi, qp_index);
379}
380
381int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
382 int attr_mask)
383{
384 struct mlx5_ib_dev *dev = to_mdev(qp->device);
385 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
386 int ret;
387
388 mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
389
390 mutex_lock(&gsi->mutex);
391 ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
392 if (ret) {
393 mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
394 goto unlock;
395 }
396
397 if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
398 setup_qps(gsi);
399
400unlock:
401 mutex_unlock(&gsi->mutex);
402
403 return ret;
404}
405
406int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
407 int qp_attr_mask,
408 struct ib_qp_init_attr *qp_init_attr)
409{
410 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
411 int ret;
412
413 mutex_lock(&gsi->mutex);
414 ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
415 qp_init_attr->cap = gsi->cap;
416 mutex_unlock(&gsi->mutex);
417
418 return ret;
419}
420
421/* Call with gsi->lock locked */
422static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
423 struct ib_ud_wr *wr, struct ib_wc *wc)
424{
425 struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
426 struct mlx5_ib_gsi_wr *gsi_wr;
427
428 if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
429 mlx5_ib_warn(dev, "no available GSI work request.\n");
430 return -ENOMEM;
431 }
432
433 gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi];
434 gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi);
435
436 if (!wc) {
437 memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
438 gsi_wr->wc.pkey_index = wr->pkey_index;
439 gsi_wr->wc.wr_id = wr->wr.wr_id;
440 } else {
441 gsi_wr->wc = *wc;
442 gsi_wr->completed = true;
443 }
444
445 gsi_wr->cqe.done = &handle_single_completion;
446 wr->wr.wr_cqe = &gsi_wr->cqe;
447
448 return 0;
449}
450
451/* Call with gsi->lock locked */
452static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
453 struct ib_ud_wr *wr)
454{
455 struct ib_wc wc = {
456 { .wr_id = wr->wr.wr_id },
457 .status = IB_WC_SUCCESS,
458 .opcode = IB_WC_SEND,
459 .qp = &gsi->ibqp,
460 };
461 int ret;
462
463 ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
464 if (ret)
465 return ret;
466
467 generate_completions(gsi);
468
469 return 0;
470}
471
472/* Call with gsi->lock locked */
473static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
474{
475 struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
476 int qp_index = wr->pkey_index;
477
478 if (!mlx5_ib_deth_sqpn_cap(dev))
479 return gsi->rx_qp;
480
481 if (qp_index >= gsi->num_qps)
482 return NULL;
483
484 return gsi->tx_qps[qp_index];
485}
486
487int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
488 struct ib_send_wr **bad_wr)
489{
490 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
491 struct ib_qp *tx_qp;
492 unsigned long flags;
493 int ret;
494
495 for (; wr; wr = wr->next) {
496 struct ib_ud_wr cur_wr = *ud_wr(wr);
497
498 cur_wr.wr.next = NULL;
499
500 spin_lock_irqsave(&gsi->lock, flags);
501 tx_qp = get_tx_qp(gsi, &cur_wr);
502 if (!tx_qp) {
503 ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
504 if (ret)
505 goto err;
506 spin_unlock_irqrestore(&gsi->lock, flags);
507 continue;
508 }
509
510 ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
511 if (ret)
512 goto err;
513
514 ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
515 if (ret) {
516 /* Undo the effect of adding the outstanding wr */
517 gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
518 gsi->cap.max_send_wr;
519 goto err;
520 }
521 spin_unlock_irqrestore(&gsi->lock, flags);
522 }
523
524 return 0;
525
526err:
527 spin_unlock_irqrestore(&gsi->lock, flags);
528 *bad_wr = wr;
529 return ret;
530}
531
532int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
533 struct ib_recv_wr **bad_wr)
534{
535 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
536
537 return ib_post_recv(gsi->rx_qp, wr, bad_wr);
538}
539
540void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
541{
542 if (!gsi)
543 return;
544
545 mutex_lock(&gsi->mutex);
546 setup_qps(gsi);
547 mutex_unlock(&gsi->mutex);
548}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index b84d13a487cc..41d8a0036465 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -31,8 +31,10 @@
31 */ 31 */
32 32
33#include <linux/mlx5/cmd.h> 33#include <linux/mlx5/cmd.h>
34#include <linux/mlx5/vport.h>
34#include <rdma/ib_mad.h> 35#include <rdma/ib_mad.h>
35#include <rdma/ib_smi.h> 36#include <rdma/ib_smi.h>
37#include <rdma/ib_pma.h>
36#include "mlx5_ib.h" 38#include "mlx5_ib.h"
37 39
38enum { 40enum {
@@ -57,20 +59,12 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
57 return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port); 59 return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
58} 60}
59 61
60int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 62static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
61 const struct ib_wc *in_wc, const struct ib_grh *in_grh, 63 const struct ib_wc *in_wc, const struct ib_grh *in_grh,
62 const struct ib_mad_hdr *in, size_t in_mad_size, 64 const struct ib_mad *in_mad, struct ib_mad *out_mad)
63 struct ib_mad_hdr *out, size_t *out_mad_size,
64 u16 *out_mad_pkey_index)
65{ 65{
66 u16 slid; 66 u16 slid;
67 int err; 67 int err;
68 const struct ib_mad *in_mad = (const struct ib_mad *)in;
69 struct ib_mad *out_mad = (struct ib_mad *)out;
70
71 if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
72 *out_mad_size != sizeof(*out_mad)))
73 return IB_MAD_RESULT_FAILURE;
74 68
75 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); 69 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
76 70
@@ -117,6 +111,156 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
117 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; 111 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
118} 112}
119 113
114static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
115 void *out)
116{
117#define MLX5_SUM_CNT(p, cntr1, cntr2) \
118 (MLX5_GET64(query_vport_counter_out, p, cntr1) + \
119 MLX5_GET64(query_vport_counter_out, p, cntr2))
120
121 pma_cnt_ext->port_xmit_data =
122 cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets,
123 transmitted_ib_multicast.octets) >> 2);
124 pma_cnt_ext->port_xmit_data =
125 cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets,
126 received_ib_multicast.octets) >> 2);
127 pma_cnt_ext->port_xmit_packets =
128 cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.packets,
129 transmitted_ib_multicast.packets));
130 pma_cnt_ext->port_rcv_packets =
131 cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.packets,
132 received_ib_multicast.packets));
133 pma_cnt_ext->port_unicast_xmit_packets =
134 MLX5_GET64_BE(query_vport_counter_out,
135 out, transmitted_ib_unicast.packets);
136 pma_cnt_ext->port_unicast_rcv_packets =
137 MLX5_GET64_BE(query_vport_counter_out,
138 out, received_ib_unicast.packets);
139 pma_cnt_ext->port_multicast_xmit_packets =
140 MLX5_GET64_BE(query_vport_counter_out,
141 out, transmitted_ib_multicast.packets);
142 pma_cnt_ext->port_multicast_rcv_packets =
143 MLX5_GET64_BE(query_vport_counter_out,
144 out, received_ib_multicast.packets);
145}
146
147static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
148 void *out)
149{
150 /* Traffic counters will be reported in
151 * their 64bit form via ib_pma_portcounters_ext by default.
152 */
153 void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out,
154 counter_set);
155
156#define MLX5_ASSIGN_PMA_CNTR(counter_var, counter_name) { \
157 counter_var = MLX5_GET_BE(typeof(counter_var), \
158 ib_port_cntrs_grp_data_layout, \
159 out_pma, counter_name); \
160 }
161
162 MLX5_ASSIGN_PMA_CNTR(pma_cnt->symbol_error_counter,
163 symbol_error_counter);
164 MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_error_recovery_counter,
165 link_error_recovery_counter);
166 MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_downed_counter,
167 link_downed_counter);
168 MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_errors,
169 port_rcv_errors);
170 MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_remphys_errors,
171 port_rcv_remote_physical_errors);
172 MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_switch_relay_errors,
173 port_rcv_switch_relay_errors);
174 MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_discards,
175 port_xmit_discards);
176 MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors,
177 port_xmit_constraint_errors);
178 MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors,
179 port_rcv_constraint_errors);
180 MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors,
181 link_overrun_errors);
182 MLX5_ASSIGN_PMA_CNTR(pma_cnt->vl15_dropped,
183 vl_15_dropped);
184}
185
186static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
187 const struct ib_mad *in_mad, struct ib_mad *out_mad)
188{
189 struct mlx5_ib_dev *dev = to_mdev(ibdev);
190 int err;
191 void *out_cnt;
192
193 /* Decalring support of extended counters */
194 if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
195 struct ib_class_port_info cpi = {};
196
197 cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
198 memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
199 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
200 }
201
202 if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) {
203 struct ib_pma_portcounters_ext *pma_cnt_ext =
204 (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
205 int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
206
207 out_cnt = mlx5_vzalloc(sz);
208 if (!out_cnt)
209 return IB_MAD_RESULT_FAILURE;
210
211 err = mlx5_core_query_vport_counter(dev->mdev, 0,
212 port_num, out_cnt, sz);
213 if (!err)
214 pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
215 } else {
216 struct ib_pma_portcounters *pma_cnt =
217 (struct ib_pma_portcounters *)(out_mad->data + 40);
218 int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
219
220 out_cnt = mlx5_vzalloc(sz);
221 if (!out_cnt)
222 return IB_MAD_RESULT_FAILURE;
223
224 err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num,
225 out_cnt, sz);
226 if (!err)
227 pma_cnt_assign(pma_cnt, out_cnt);
228 }
229
230 kvfree(out_cnt);
231 if (err)
232 return IB_MAD_RESULT_FAILURE;
233
234 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
235}
236
237int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
238 const struct ib_wc *in_wc, const struct ib_grh *in_grh,
239 const struct ib_mad_hdr *in, size_t in_mad_size,
240 struct ib_mad_hdr *out, size_t *out_mad_size,
241 u16 *out_mad_pkey_index)
242{
243 struct mlx5_ib_dev *dev = to_mdev(ibdev);
244 struct mlx5_core_dev *mdev = dev->mdev;
245 const struct ib_mad *in_mad = (const struct ib_mad *)in;
246 struct ib_mad *out_mad = (struct ib_mad *)out;
247
248 if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
249 *out_mad_size != sizeof(*out_mad)))
250 return IB_MAD_RESULT_FAILURE;
251
252 memset(out_mad->data, 0, sizeof(out_mad->data));
253
254 if (MLX5_CAP_GEN(mdev, vport_counters) &&
255 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
256 in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
257 return process_pma_cmd(ibdev, port_num, in_mad, out_mad);
258 } else {
259 return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
260 in_mad, out_mad);
261 }
262}
263
120int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) 264int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
121{ 265{
122 struct ib_smp *in_mad = NULL; 266 struct ib_smp *in_mad = NULL;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 03c418ccbc98..5afbb697e691 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -487,6 +487,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
487 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; 487 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
488 if (MLX5_CAP_GEN(mdev, xrc)) 488 if (MLX5_CAP_GEN(mdev, xrc))
489 props->device_cap_flags |= IB_DEVICE_XRC; 489 props->device_cap_flags |= IB_DEVICE_XRC;
490 if (MLX5_CAP_GEN(mdev, imaicl)) {
491 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
492 IB_DEVICE_MEM_WINDOW_TYPE_2B;
493 props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
494 /* We support 'Gappy' memory registration too */
495 props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
496 }
490 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 497 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
491 if (MLX5_CAP_GEN(mdev, sho)) { 498 if (MLX5_CAP_GEN(mdev, sho)) {
492 props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; 499 props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
@@ -504,6 +511,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
504 (MLX5_CAP_ETH(dev->mdev, csum_cap))) 511 (MLX5_CAP_ETH(dev->mdev, csum_cap)))
505 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; 512 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
506 513
514 if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
515 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
516 props->device_cap_flags |= IB_DEVICE_UD_TSO;
517 }
518
507 props->vendor_part_id = mdev->pdev->device; 519 props->vendor_part_id = mdev->pdev->device;
508 props->hw_ver = mdev->pdev->revision; 520 props->hw_ver = mdev->pdev->revision;
509 521
@@ -529,7 +541,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
529 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); 541 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
530 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 542 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
531 props->max_srq_sge = max_rq_sg - 1; 543 props->max_srq_sge = max_rq_sg - 1;
532 props->max_fast_reg_page_list_len = (unsigned int)-1; 544 props->max_fast_reg_page_list_len =
545 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
533 get_atomic_caps(dev, props); 546 get_atomic_caps(dev, props);
534 props->masked_atomic_cap = IB_ATOMIC_NONE; 547 props->masked_atomic_cap = IB_ATOMIC_NONE;
535 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); 548 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
@@ -1369,11 +1382,20 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
1369 return 0; 1382 return 0;
1370} 1383}
1371 1384
1385static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
1386{
1387 priority *= 2;
1388 if (!dont_trap)
1389 priority++;
1390 return priority;
1391}
1392
1372#define MLX5_FS_MAX_TYPES 10 1393#define MLX5_FS_MAX_TYPES 10
1373#define MLX5_FS_MAX_ENTRIES 32000UL 1394#define MLX5_FS_MAX_ENTRIES 32000UL
1374static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, 1395static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
1375 struct ib_flow_attr *flow_attr) 1396 struct ib_flow_attr *flow_attr)
1376{ 1397{
1398 bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
1377 struct mlx5_flow_namespace *ns = NULL; 1399 struct mlx5_flow_namespace *ns = NULL;
1378 struct mlx5_ib_flow_prio *prio; 1400 struct mlx5_ib_flow_prio *prio;
1379 struct mlx5_flow_table *ft; 1401 struct mlx5_flow_table *ft;
@@ -1383,10 +1405,12 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
1383 int err = 0; 1405 int err = 0;
1384 1406
1385 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 1407 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1386 if (flow_is_multicast_only(flow_attr)) 1408 if (flow_is_multicast_only(flow_attr) &&
1409 !dont_trap)
1387 priority = MLX5_IB_FLOW_MCAST_PRIO; 1410 priority = MLX5_IB_FLOW_MCAST_PRIO;
1388 else 1411 else
1389 priority = flow_attr->priority; 1412 priority = ib_prio_to_core_prio(flow_attr->priority,
1413 dont_trap);
1390 ns = mlx5_get_flow_namespace(dev->mdev, 1414 ns = mlx5_get_flow_namespace(dev->mdev,
1391 MLX5_FLOW_NAMESPACE_BYPASS); 1415 MLX5_FLOW_NAMESPACE_BYPASS);
1392 num_entries = MLX5_FS_MAX_ENTRIES; 1416 num_entries = MLX5_FS_MAX_ENTRIES;
@@ -1434,6 +1458,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1434 unsigned int spec_index; 1458 unsigned int spec_index;
1435 u32 *match_c; 1459 u32 *match_c;
1436 u32 *match_v; 1460 u32 *match_v;
1461 u32 action;
1437 int err = 0; 1462 int err = 0;
1438 1463
1439 if (!is_valid_attr(flow_attr)) 1464 if (!is_valid_attr(flow_attr))
@@ -1459,9 +1484,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1459 1484
1460 /* Outer header support only */ 1485 /* Outer header support only */
1461 match_criteria_enable = (!outer_header_zero(match_c)) << 0; 1486 match_criteria_enable = (!outer_header_zero(match_c)) << 0;
1487 action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
1488 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1462 handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, 1489 handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
1463 match_c, match_v, 1490 match_c, match_v,
1464 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 1491 action,
1465 MLX5_FS_DEFAULT_FLOW_TAG, 1492 MLX5_FS_DEFAULT_FLOW_TAG,
1466 dst); 1493 dst);
1467 1494
@@ -1481,6 +1508,29 @@ free:
1481 return err ? ERR_PTR(err) : handler; 1508 return err ? ERR_PTR(err) : handler;
1482} 1509}
1483 1510
1511static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
1512 struct mlx5_ib_flow_prio *ft_prio,
1513 struct ib_flow_attr *flow_attr,
1514 struct mlx5_flow_destination *dst)
1515{
1516 struct mlx5_ib_flow_handler *handler_dst = NULL;
1517 struct mlx5_ib_flow_handler *handler = NULL;
1518
1519 handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
1520 if (!IS_ERR(handler)) {
1521 handler_dst = create_flow_rule(dev, ft_prio,
1522 flow_attr, dst);
1523 if (IS_ERR(handler_dst)) {
1524 mlx5_del_flow_rule(handler->rule);
1525 kfree(handler);
1526 handler = handler_dst;
1527 } else {
1528 list_add(&handler_dst->list, &handler->list);
1529 }
1530 }
1531
1532 return handler;
1533}
1484enum { 1534enum {
1485 LEFTOVERS_MC, 1535 LEFTOVERS_MC,
1486 LEFTOVERS_UC, 1536 LEFTOVERS_UC,
@@ -1558,7 +1608,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1558 1608
1559 if (domain != IB_FLOW_DOMAIN_USER || 1609 if (domain != IB_FLOW_DOMAIN_USER ||
1560 flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || 1610 flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
1561 flow_attr->flags) 1611 (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
1562 return ERR_PTR(-EINVAL); 1612 return ERR_PTR(-EINVAL);
1563 1613
1564 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 1614 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
@@ -1577,8 +1627,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1577 dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; 1627 dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
1578 1628
1579 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 1629 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1580 handler = create_flow_rule(dev, ft_prio, flow_attr, 1630 if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
1581 dst); 1631 handler = create_dont_trap_rule(dev, ft_prio,
1632 flow_attr, dst);
1633 } else {
1634 handler = create_flow_rule(dev, ft_prio, flow_attr,
1635 dst);
1636 }
1582 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 1637 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1583 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 1638 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
1584 handler = create_leftovers_rule(dev, ft_prio, flow_attr, 1639 handler = create_leftovers_rule(dev, ft_prio, flow_attr,
@@ -1716,6 +1771,17 @@ static struct device_attribute *mlx5_class_attributes[] = {
1716 &dev_attr_reg_pages, 1771 &dev_attr_reg_pages,
1717}; 1772};
1718 1773
1774static void pkey_change_handler(struct work_struct *work)
1775{
1776 struct mlx5_ib_port_resources *ports =
1777 container_of(work, struct mlx5_ib_port_resources,
1778 pkey_change_work);
1779
1780 mutex_lock(&ports->devr->mutex);
1781 mlx5_ib_gsi_pkey_change(ports->gsi);
1782 mutex_unlock(&ports->devr->mutex);
1783}
1784
1719static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, 1785static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1720 enum mlx5_dev_event event, unsigned long param) 1786 enum mlx5_dev_event event, unsigned long param)
1721{ 1787{
@@ -1752,6 +1818,8 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1752 case MLX5_DEV_EVENT_PKEY_CHANGE: 1818 case MLX5_DEV_EVENT_PKEY_CHANGE:
1753 ibev.event = IB_EVENT_PKEY_CHANGE; 1819 ibev.event = IB_EVENT_PKEY_CHANGE;
1754 port = (u8)param; 1820 port = (u8)param;
1821
1822 schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
1755 break; 1823 break;
1756 1824
1757 case MLX5_DEV_EVENT_GUID_CHANGE: 1825 case MLX5_DEV_EVENT_GUID_CHANGE:
@@ -1838,7 +1906,7 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1838 mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 1906 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1839 1907
1840 mlx5_ib_destroy_qp(dev->umrc.qp); 1908 mlx5_ib_destroy_qp(dev->umrc.qp);
1841 ib_destroy_cq(dev->umrc.cq); 1909 ib_free_cq(dev->umrc.cq);
1842 ib_dealloc_pd(dev->umrc.pd); 1910 ib_dealloc_pd(dev->umrc.pd);
1843} 1911}
1844 1912
@@ -1853,7 +1921,6 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
1853 struct ib_pd *pd; 1921 struct ib_pd *pd;
1854 struct ib_cq *cq; 1922 struct ib_cq *cq;
1855 struct ib_qp *qp; 1923 struct ib_qp *qp;
1856 struct ib_cq_init_attr cq_attr = {};
1857 int ret; 1924 int ret;
1858 1925
1859 attr = kzalloc(sizeof(*attr), GFP_KERNEL); 1926 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
@@ -1870,15 +1937,12 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
1870 goto error_0; 1937 goto error_0;
1871 } 1938 }
1872 1939
1873 cq_attr.cqe = 128; 1940 cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
1874 cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL,
1875 &cq_attr);
1876 if (IS_ERR(cq)) { 1941 if (IS_ERR(cq)) {
1877 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 1942 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
1878 ret = PTR_ERR(cq); 1943 ret = PTR_ERR(cq);
1879 goto error_2; 1944 goto error_2;
1880 } 1945 }
1881 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1882 1946
1883 init_attr->send_cq = cq; 1947 init_attr->send_cq = cq;
1884 init_attr->recv_cq = cq; 1948 init_attr->recv_cq = cq;
@@ -1945,7 +2009,7 @@ error_4:
1945 mlx5_ib_destroy_qp(qp); 2009 mlx5_ib_destroy_qp(qp);
1946 2010
1947error_3: 2011error_3:
1948 ib_destroy_cq(cq); 2012 ib_free_cq(cq);
1949 2013
1950error_2: 2014error_2:
1951 ib_dealloc_pd(pd); 2015 ib_dealloc_pd(pd);
@@ -1961,10 +2025,13 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
1961 struct ib_srq_init_attr attr; 2025 struct ib_srq_init_attr attr;
1962 struct mlx5_ib_dev *dev; 2026 struct mlx5_ib_dev *dev;
1963 struct ib_cq_init_attr cq_attr = {.cqe = 1}; 2027 struct ib_cq_init_attr cq_attr = {.cqe = 1};
2028 int port;
1964 int ret = 0; 2029 int ret = 0;
1965 2030
1966 dev = container_of(devr, struct mlx5_ib_dev, devr); 2031 dev = container_of(devr, struct mlx5_ib_dev, devr);
1967 2032
2033 mutex_init(&devr->mutex);
2034
1968 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); 2035 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1969 if (IS_ERR(devr->p0)) { 2036 if (IS_ERR(devr->p0)) {
1970 ret = PTR_ERR(devr->p0); 2037 ret = PTR_ERR(devr->p0);
@@ -2052,6 +2119,12 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
2052 atomic_inc(&devr->p0->usecnt); 2119 atomic_inc(&devr->p0->usecnt);
2053 atomic_set(&devr->s0->usecnt, 0); 2120 atomic_set(&devr->s0->usecnt, 0);
2054 2121
2122 for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
2123 INIT_WORK(&devr->ports[port].pkey_change_work,
2124 pkey_change_handler);
2125 devr->ports[port].devr = devr;
2126 }
2127
2055 return 0; 2128 return 0;
2056 2129
2057error5: 2130error5:
@@ -2070,12 +2143,20 @@ error0:
2070 2143
2071static void destroy_dev_resources(struct mlx5_ib_resources *devr) 2144static void destroy_dev_resources(struct mlx5_ib_resources *devr)
2072{ 2145{
2146 struct mlx5_ib_dev *dev =
2147 container_of(devr, struct mlx5_ib_dev, devr);
2148 int port;
2149
2073 mlx5_ib_destroy_srq(devr->s1); 2150 mlx5_ib_destroy_srq(devr->s1);
2074 mlx5_ib_destroy_srq(devr->s0); 2151 mlx5_ib_destroy_srq(devr->s0);
2075 mlx5_ib_dealloc_xrcd(devr->x0); 2152 mlx5_ib_dealloc_xrcd(devr->x0);
2076 mlx5_ib_dealloc_xrcd(devr->x1); 2153 mlx5_ib_dealloc_xrcd(devr->x1);
2077 mlx5_ib_destroy_cq(devr->c0); 2154 mlx5_ib_destroy_cq(devr->c0);
2078 mlx5_ib_dealloc_pd(devr->p0); 2155 mlx5_ib_dealloc_pd(devr->p0);
2156
2157 /* Make sure no change P_Key work items are still executing */
2158 for (port = 0; port < dev->num_ports; ++port)
2159 cancel_work_sync(&devr->ports[port].pkey_change_work);
2079} 2160}
2080 2161
2081static u32 get_core_cap_flags(struct ib_device *ibdev) 2162static u32 get_core_cap_flags(struct ib_device *ibdev)
@@ -2198,6 +2279,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2198 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2279 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2199 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2280 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2200 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2281 (1ull << IB_USER_VERBS_CMD_REG_MR) |
2282 (1ull << IB_USER_VERBS_CMD_REREG_MR) |
2201 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2283 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2202 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2284 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2203 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2285 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
@@ -2258,6 +2340,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2258 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; 2340 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
2259 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; 2341 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
2260 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; 2342 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
2343 dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
2261 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; 2344 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
2262 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; 2345 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
2263 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; 2346 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
@@ -2269,6 +2352,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2269 2352
2270 mlx5_ib_internal_fill_odp_caps(dev); 2353 mlx5_ib_internal_fill_odp_caps(dev);
2271 2354
2355 if (MLX5_CAP_GEN(mdev, imaicl)) {
2356 dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
2357 dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
2358 dev->ib_dev.uverbs_cmd_mask |=
2359 (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
2360 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
2361 }
2362
2272 if (MLX5_CAP_GEN(mdev, xrc)) { 2363 if (MLX5_CAP_GEN(mdev, xrc)) {
2273 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; 2364 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2274 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; 2365 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d2b9737baa36..76b2b42e0535 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -43,6 +43,7 @@
43#include <linux/mlx5/srq.h> 43#include <linux/mlx5/srq.h>
44#include <linux/types.h> 44#include <linux/types.h>
45#include <linux/mlx5/transobj.h> 45#include <linux/mlx5/transobj.h>
46#include <rdma/ib_user_verbs.h>
46 47
47#define mlx5_ib_dbg(dev, format, arg...) \ 48#define mlx5_ib_dbg(dev, format, arg...) \
48pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ 49pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
@@ -126,7 +127,7 @@ struct mlx5_ib_pd {
126}; 127};
127 128
128#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) 129#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
129#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1) 130#define MLX5_IB_FLOW_LAST_PRIO (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1)
130#if (MLX5_IB_FLOW_LAST_PRIO <= 0) 131#if (MLX5_IB_FLOW_LAST_PRIO <= 0)
131#error "Invalid number of bypass priorities" 132#error "Invalid number of bypass priorities"
132#endif 133#endif
@@ -162,9 +163,31 @@ struct mlx5_ib_flow_db {
162#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START 163#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
163#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1) 164#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
164#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2) 165#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
166
167#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3)
168#define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4)
169#define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END
170
165#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 171#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
172/*
173 * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI
174 * creates the actual hardware QP.
175 */
176#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2
166#define MLX5_IB_WR_UMR IB_WR_RESERVED1 177#define MLX5_IB_WR_UMR IB_WR_RESERVED1
167 178
179/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
180 *
181 * These flags are intended for internal use by the mlx5_ib driver, and they
182 * rely on the range reserved for that use in the ib_qp_create_flags enum.
183 */
184
185/* Create a UD QP whose source QP number is 1 */
186static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void)
187{
188 return IB_QP_CREATE_RESERVED_START;
189}
190
168struct wr_list { 191struct wr_list {
169 u16 opcode; 192 u16 opcode;
170 u16 next; 193 u16 next;
@@ -325,11 +348,14 @@ struct mlx5_ib_cq_buf {
325}; 348};
326 349
327enum mlx5_ib_qp_flags { 350enum mlx5_ib_qp_flags {
328 MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, 351 MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
329 MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, 352 MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
330 MLX5_IB_QP_CROSS_CHANNEL = 1 << 2, 353 MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL,
331 MLX5_IB_QP_MANAGED_SEND = 1 << 3, 354 MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND,
332 MLX5_IB_QP_MANAGED_RECV = 1 << 4, 355 MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV,
356 MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5,
357 /* QP uses 1 as its source QP number */
358 MLX5_IB_QP_SQPN_QP1 = 1 << 6,
333}; 359};
334 360
335struct mlx5_umr_wr { 361struct mlx5_umr_wr {
@@ -373,6 +399,14 @@ struct mlx5_ib_cq {
373 struct ib_umem *resize_umem; 399 struct ib_umem *resize_umem;
374 int cqe_size; 400 int cqe_size;
375 u32 create_flags; 401 u32 create_flags;
402 struct list_head wc_list;
403 enum ib_cq_notify_flags notify_flags;
404 struct work_struct notify_work;
405};
406
407struct mlx5_ib_wc {
408 struct ib_wc wc;
409 struct list_head list;
376}; 410};
377 411
378struct mlx5_ib_srq { 412struct mlx5_ib_srq {
@@ -413,7 +447,8 @@ struct mlx5_ib_mr {
413 int ndescs; 447 int ndescs;
414 int max_descs; 448 int max_descs;
415 int desc_size; 449 int desc_size;
416 struct mlx5_core_mr mmr; 450 int access_mode;
451 struct mlx5_core_mkey mmkey;
417 struct ib_umem *umem; 452 struct ib_umem *umem;
418 struct mlx5_shared_mr_info *smr_info; 453 struct mlx5_shared_mr_info *smr_info;
419 struct list_head list; 454 struct list_head list;
@@ -425,19 +460,20 @@ struct mlx5_ib_mr {
425 struct mlx5_core_sig_ctx *sig; 460 struct mlx5_core_sig_ctx *sig;
426 int live; 461 int live;
427 void *descs_alloc; 462 void *descs_alloc;
463 int access_flags; /* Needed for rereg MR */
464};
465
466struct mlx5_ib_mw {
467 struct ib_mw ibmw;
468 struct mlx5_core_mkey mmkey;
428}; 469};
429 470
430struct mlx5_ib_umr_context { 471struct mlx5_ib_umr_context {
472 struct ib_cqe cqe;
431 enum ib_wc_status status; 473 enum ib_wc_status status;
432 struct completion done; 474 struct completion done;
433}; 475};
434 476
435static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
436{
437 context->status = -1;
438 init_completion(&context->done);
439}
440
441struct umr_common { 477struct umr_common {
442 struct ib_pd *pd; 478 struct ib_pd *pd;
443 struct ib_cq *cq; 479 struct ib_cq *cq;
@@ -487,6 +523,14 @@ struct mlx5_mr_cache {
487 unsigned long last_add; 523 unsigned long last_add;
488}; 524};
489 525
526struct mlx5_ib_gsi_qp;
527
528struct mlx5_ib_port_resources {
529 struct mlx5_ib_resources *devr;
530 struct mlx5_ib_gsi_qp *gsi;
531 struct work_struct pkey_change_work;
532};
533
490struct mlx5_ib_resources { 534struct mlx5_ib_resources {
491 struct ib_cq *c0; 535 struct ib_cq *c0;
492 struct ib_xrcd *x0; 536 struct ib_xrcd *x0;
@@ -494,6 +538,9 @@ struct mlx5_ib_resources {
494 struct ib_pd *p0; 538 struct ib_pd *p0;
495 struct ib_srq *s0; 539 struct ib_srq *s0;
496 struct ib_srq *s1; 540 struct ib_srq *s1;
541 struct mlx5_ib_port_resources ports[2];
542 /* Protects changes to the port resources */
543 struct mutex mutex;
497}; 544};
498 545
499struct mlx5_roce { 546struct mlx5_roce {
@@ -558,9 +605,9 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
558 return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; 605 return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
559} 606}
560 607
561static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) 608static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
562{ 609{
563 return container_of(mmr, struct mlx5_ib_mr, mmr); 610 return container_of(mmkey, struct mlx5_ib_mr, mmkey);
564} 611}
565 612
566static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) 613static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
@@ -588,6 +635,11 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
588 return container_of(ibmr, struct mlx5_ib_mr, ibmr); 635 return container_of(ibmr, struct mlx5_ib_mr, ibmr);
589} 636}
590 637
638static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw)
639{
640 return container_of(ibmw, struct mlx5_ib_mw, ibmw);
641}
642
591struct mlx5_ib_ah { 643struct mlx5_ib_ah {
592 struct ib_ah ibah; 644 struct ib_ah ibah;
593 struct mlx5_av av; 645 struct mlx5_av av;
@@ -648,8 +700,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
648struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 700struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
649 u64 virt_addr, int access_flags, 701 u64 virt_addr, int access_flags,
650 struct ib_udata *udata); 702 struct ib_udata *udata);
703struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
704 struct ib_udata *udata);
705int mlx5_ib_dealloc_mw(struct ib_mw *mw);
651int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, 706int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
652 int npages, int zap); 707 int npages, int zap);
708int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
709 u64 length, u64 virt_addr, int access_flags,
710 struct ib_pd *pd, struct ib_udata *udata);
653int mlx5_ib_dereg_mr(struct ib_mr *ibmr); 711int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
654struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, 712struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
655 enum ib_mr_type mr_type, 713 enum ib_mr_type mr_type,
@@ -700,7 +758,6 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
700int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); 758int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
701int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); 759int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
702int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); 760int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
703void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
704int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 761int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
705 struct ib_mr_status *mr_status); 762 struct ib_mr_status *mr_status);
706 763
@@ -739,6 +796,23 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
739__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, 796__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
740 int index); 797 int index);
741 798
799/* GSI QP helper functions */
800struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
801 struct ib_qp_init_attr *init_attr);
802int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp);
803int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
804 int attr_mask);
805int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
806 int qp_attr_mask,
807 struct ib_qp_init_attr *qp_init_attr);
808int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
809 struct ib_send_wr **bad_wr);
810int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
811 struct ib_recv_wr **bad_wr);
812void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi);
813
814int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc);
815
742static inline void init_query_mad(struct ib_smp *mad) 816static inline void init_query_mad(struct ib_smp *mad)
743{ 817{
744 mad->base_version = 1; 818 mad->base_version = 1;
@@ -758,7 +832,7 @@ static inline u8 convert_access(int acc)
758 832
759static inline int is_qp1(enum ib_qp_type qp_type) 833static inline int is_qp1(enum ib_qp_type qp_type)
760{ 834{
761 return qp_type == IB_QPT_GSI; 835 return qp_type == MLX5_IB_QPT_HW_GSI;
762} 836}
763 837
764#define MLX5_MAX_UMR_SHIFT 16 838#define MLX5_MAX_UMR_SHIFT 16
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 6000f7aeede9..4d5bff151cdf 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -40,6 +40,7 @@
40#include <rdma/ib_umem_odp.h> 40#include <rdma/ib_umem_odp.h>
41#include <rdma/ib_verbs.h> 41#include <rdma/ib_verbs.h>
42#include "mlx5_ib.h" 42#include "mlx5_ib.h"
43#include "user.h"
43 44
44enum { 45enum {
45 MAX_PENDING_REG_MR = 8, 46 MAX_PENDING_REG_MR = 8,
@@ -57,7 +58,7 @@ static int clean_mr(struct mlx5_ib_mr *mr);
57 58
58static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 59static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
59{ 60{
60 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); 61 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
61 62
62#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 63#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
63 /* Wait until all page fault handlers using the mr complete. */ 64 /* Wait until all page fault handlers using the mr complete. */
@@ -77,6 +78,40 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
77 return order - cache->ent[0].order; 78 return order - cache->ent[0].order;
78} 79}
79 80
81static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
82{
83 return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
84 length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
85}
86
87#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
88static void update_odp_mr(struct mlx5_ib_mr *mr)
89{
90 if (mr->umem->odp_data) {
91 /*
92 * This barrier prevents the compiler from moving the
93 * setting of umem->odp_data->private to point to our
94 * MR, before reg_umr finished, to ensure that the MR
95 * initialization have finished before starting to
96 * handle invalidations.
97 */
98 smp_wmb();
99 mr->umem->odp_data->private = mr;
100 /*
101 * Make sure we will see the new
102 * umem->odp_data->private value in the invalidation
103 * routines, before we can get page faults on the
104 * MR. Page faults can happen once we put the MR in
105 * the tree, below this line. Without the barrier,
106 * there can be a fault handling and an invalidation
107 * before umem->odp_data->private == mr is visible to
108 * the invalidation handler.
109 */
110 smp_wmb();
111 }
112}
113#endif
114
80static void reg_mr_callback(int status, void *context) 115static void reg_mr_callback(int status, void *context)
81{ 116{
82 struct mlx5_ib_mr *mr = context; 117 struct mlx5_ib_mr *mr = context;
@@ -86,7 +121,7 @@ static void reg_mr_callback(int status, void *context)
86 struct mlx5_cache_ent *ent = &cache->ent[c]; 121 struct mlx5_cache_ent *ent = &cache->ent[c];
87 u8 key; 122 u8 key;
88 unsigned long flags; 123 unsigned long flags;
89 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; 124 struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
90 int err; 125 int err;
91 126
92 spin_lock_irqsave(&ent->lock, flags); 127 spin_lock_irqsave(&ent->lock, flags);
@@ -113,7 +148,7 @@ static void reg_mr_callback(int status, void *context)
113 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 148 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
114 key = dev->mdev->priv.mkey_key++; 149 key = dev->mdev->priv.mkey_key++;
115 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 150 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
116 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; 151 mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
117 152
118 cache->last_add = jiffies; 153 cache->last_add = jiffies;
119 154
@@ -124,10 +159,10 @@ static void reg_mr_callback(int status, void *context)
124 spin_unlock_irqrestore(&ent->lock, flags); 159 spin_unlock_irqrestore(&ent->lock, flags);
125 160
126 write_lock_irqsave(&table->lock, flags); 161 write_lock_irqsave(&table->lock, flags);
127 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), 162 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
128 &mr->mmr); 163 &mr->mmkey);
129 if (err) 164 if (err)
130 pr_err("Error inserting to mr tree. 0x%x\n", -err); 165 pr_err("Error inserting to mkey tree. 0x%x\n", -err);
131 write_unlock_irqrestore(&table->lock, flags); 166 write_unlock_irqrestore(&table->lock, flags);
132} 167}
133 168
@@ -168,7 +203,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
168 spin_lock_irq(&ent->lock); 203 spin_lock_irq(&ent->lock);
169 ent->pending++; 204 ent->pending++;
170 spin_unlock_irq(&ent->lock); 205 spin_unlock_irq(&ent->lock);
171 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, 206 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in,
172 sizeof(*in), reg_mr_callback, 207 sizeof(*in), reg_mr_callback,
173 mr, &mr->out); 208 mr, &mr->out);
174 if (err) { 209 if (err) {
@@ -657,14 +692,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
657 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 692 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
658 seg->start_addr = 0; 693 seg->start_addr = 0;
659 694
660 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, 695 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL,
661 NULL); 696 NULL);
662 if (err) 697 if (err)
663 goto err_in; 698 goto err_in;
664 699
665 kfree(in); 700 kfree(in);
666 mr->ibmr.lkey = mr->mmr.key; 701 mr->ibmr.lkey = mr->mmkey.key;
667 mr->ibmr.rkey = mr->mmr.key; 702 mr->ibmr.rkey = mr->mmkey.key;
668 mr->umem = NULL; 703 mr->umem = NULL;
669 704
670 return &mr->ibmr; 705 return &mr->ibmr;
@@ -693,10 +728,40 @@ static int use_umr(int order)
693 return order <= MLX5_MAX_UMR_SHIFT; 728 return order <= MLX5_MAX_UMR_SHIFT;
694} 729}
695 730
696static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 731static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
697 struct ib_sge *sg, u64 dma, int n, u32 key, 732 int npages, int page_shift, int *size,
698 int page_shift, u64 virt_addr, u64 len, 733 __be64 **mr_pas, dma_addr_t *dma)
699 int access_flags) 734{
735 __be64 *pas;
736 struct device *ddev = dev->ib_dev.dma_device;
737
738 /*
739 * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
740 * To avoid copying garbage after the pas array, we allocate
741 * a little more.
742 */
743 *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
744 *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
745 if (!(*mr_pas))
746 return -ENOMEM;
747
748 pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
749 mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
750 /* Clear padding after the actual pages. */
751 memset(pas + npages, 0, *size - npages * sizeof(u64));
752
753 *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
754 if (dma_mapping_error(ddev, *dma)) {
755 kfree(*mr_pas);
756 return -ENOMEM;
757 }
758
759 return 0;
760}
761
762static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
763 struct ib_sge *sg, u64 dma, int n, u32 key,
764 int page_shift)
700{ 765{
701 struct mlx5_ib_dev *dev = to_mdev(pd->device); 766 struct mlx5_ib_dev *dev = to_mdev(pd->device);
702 struct mlx5_umr_wr *umrwr = umr_wr(wr); 767 struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -706,7 +771,6 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
706 sg->lkey = dev->umrc.pd->local_dma_lkey; 771 sg->lkey = dev->umrc.pd->local_dma_lkey;
707 772
708 wr->next = NULL; 773 wr->next = NULL;
709 wr->send_flags = 0;
710 wr->sg_list = sg; 774 wr->sg_list = sg;
711 if (n) 775 if (n)
712 wr->num_sge = 1; 776 wr->num_sge = 1;
@@ -718,6 +782,19 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
718 umrwr->npages = n; 782 umrwr->npages = n;
719 umrwr->page_shift = page_shift; 783 umrwr->page_shift = page_shift;
720 umrwr->mkey = key; 784 umrwr->mkey = key;
785}
786
787static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
788 struct ib_sge *sg, u64 dma, int n, u32 key,
789 int page_shift, u64 virt_addr, u64 len,
790 int access_flags)
791{
792 struct mlx5_umr_wr *umrwr = umr_wr(wr);
793
794 prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
795
796 wr->send_flags = 0;
797
721 umrwr->target.virt_addr = virt_addr; 798 umrwr->target.virt_addr = virt_addr;
722 umrwr->length = len; 799 umrwr->length = len;
723 umrwr->access_flags = access_flags; 800 umrwr->access_flags = access_flags;
@@ -734,26 +811,45 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
734 umrwr->mkey = key; 811 umrwr->mkey = key;
735} 812}
736 813
737void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 814static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
815 int access_flags, int *npages,
816 int *page_shift, int *ncont, int *order)
738{ 817{
739 struct mlx5_ib_umr_context *context; 818 struct mlx5_ib_dev *dev = to_mdev(pd->device);
740 struct ib_wc wc; 819 struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length,
741 int err; 820 access_flags, 0);
742 821 if (IS_ERR(umem)) {
743 while (1) { 822 mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
744 err = ib_poll_cq(cq, 1, &wc); 823 return (void *)umem;
745 if (err < 0) { 824 }
746 pr_warn("poll cq error %d\n", err);
747 return;
748 }
749 if (err == 0)
750 break;
751 825
752 context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; 826 mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order);
753 context->status = wc.status; 827 if (!*npages) {
754 complete(&context->done); 828 mlx5_ib_warn(dev, "avoid zero region\n");
829 ib_umem_release(umem);
830 return ERR_PTR(-EINVAL);
755 } 831 }
756 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 832
833 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
834 *npages, *ncont, *order, *page_shift);
835
836 return umem;
837}
838
839static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
840{
841 struct mlx5_ib_umr_context *context =
842 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
843
844 context->status = wc->status;
845 complete(&context->done);
846}
847
848static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
849{
850 context->cqe.done = mlx5_ib_umr_done;
851 context->status = -1;
852 init_completion(&context->done);
757} 853}
758 854
759static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 855static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
@@ -764,13 +860,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
764 struct device *ddev = dev->ib_dev.dma_device; 860 struct device *ddev = dev->ib_dev.dma_device;
765 struct umr_common *umrc = &dev->umrc; 861 struct umr_common *umrc = &dev->umrc;
766 struct mlx5_ib_umr_context umr_context; 862 struct mlx5_ib_umr_context umr_context;
767 struct mlx5_umr_wr umrwr; 863 struct mlx5_umr_wr umrwr = {};
768 struct ib_send_wr *bad; 864 struct ib_send_wr *bad;
769 struct mlx5_ib_mr *mr; 865 struct mlx5_ib_mr *mr;
770 struct ib_sge sg; 866 struct ib_sge sg;
771 int size; 867 int size;
772 __be64 *mr_pas; 868 __be64 *mr_pas;
773 __be64 *pas;
774 dma_addr_t dma; 869 dma_addr_t dma;
775 int err = 0; 870 int err = 0;
776 int i; 871 int i;
@@ -790,33 +885,17 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
790 if (!mr) 885 if (!mr)
791 return ERR_PTR(-EAGAIN); 886 return ERR_PTR(-EAGAIN);
792 887
793 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. 888 err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
794 * To avoid copying garbage after the pas array, we allocate 889 &dma);
795 * a little more. */ 890 if (err)
796 size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
797 mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
798 if (!mr_pas) {
799 err = -ENOMEM;
800 goto free_mr; 891 goto free_mr;
801 }
802 892
803 pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); 893 mlx5_ib_init_umr_context(&umr_context);
804 mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
805 /* Clear padding after the actual pages. */
806 memset(pas + npages, 0, size - npages * sizeof(u64));
807
808 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
809 if (dma_mapping_error(ddev, dma)) {
810 err = -ENOMEM;
811 goto free_pas;
812 }
813 894
814 memset(&umrwr, 0, sizeof(umrwr)); 895 umrwr.wr.wr_cqe = &umr_context.cqe;
815 umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; 896 prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
816 prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
817 page_shift, virt_addr, len, access_flags); 897 page_shift, virt_addr, len, access_flags);
818 898
819 mlx5_ib_init_umr_context(&umr_context);
820 down(&umrc->sem); 899 down(&umrc->sem);
821 err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 900 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
822 if (err) { 901 if (err) {
@@ -830,9 +909,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
830 } 909 }
831 } 910 }
832 911
833 mr->mmr.iova = virt_addr; 912 mr->mmkey.iova = virt_addr;
834 mr->mmr.size = len; 913 mr->mmkey.size = len;
835 mr->mmr.pd = to_mpd(pd)->pdn; 914 mr->mmkey.pd = to_mpd(pd)->pdn;
836 915
837 mr->live = 1; 916 mr->live = 1;
838 917
@@ -840,7 +919,6 @@ unmap_dma:
840 up(&umrc->sem); 919 up(&umrc->sem);
841 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 920 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
842 921
843free_pas:
844 kfree(mr_pas); 922 kfree(mr_pas);
845 923
846free_mr: 924free_mr:
@@ -929,8 +1007,10 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
929 1007
930 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 1008 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
931 1009
1010 mlx5_ib_init_umr_context(&umr_context);
1011
932 memset(&wr, 0, sizeof(wr)); 1012 memset(&wr, 0, sizeof(wr));
933 wr.wr.wr_id = (u64)(unsigned long)&umr_context; 1013 wr.wr.wr_cqe = &umr_context.cqe;
934 1014
935 sg.addr = dma; 1015 sg.addr = dma;
936 sg.length = ALIGN(npages * sizeof(u64), 1016 sg.length = ALIGN(npages * sizeof(u64),
@@ -944,10 +1024,9 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
944 wr.wr.opcode = MLX5_IB_WR_UMR; 1024 wr.wr.opcode = MLX5_IB_WR_UMR;
945 wr.npages = sg.length / sizeof(u64); 1025 wr.npages = sg.length / sizeof(u64);
946 wr.page_shift = PAGE_SHIFT; 1026 wr.page_shift = PAGE_SHIFT;
947 wr.mkey = mr->mmr.key; 1027 wr.mkey = mr->mmkey.key;
948 wr.target.offset = start_page_index; 1028 wr.target.offset = start_page_index;
949 1029
950 mlx5_ib_init_umr_context(&umr_context);
951 down(&umrc->sem); 1030 down(&umrc->sem);
952 err = ib_post_send(umrc->qp, &wr.wr, &bad); 1031 err = ib_post_send(umrc->qp, &wr.wr, &bad);
953 if (err) { 1032 if (err) {
@@ -974,10 +1053,14 @@ free_pas:
974} 1053}
975#endif 1054#endif
976 1055
977static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 1056/*
978 u64 length, struct ib_umem *umem, 1057 * If ibmr is NULL it will be allocated by reg_create.
979 int npages, int page_shift, 1058 * Else, the given ibmr will be used.
980 int access_flags) 1059 */
1060static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
1061 u64 virt_addr, u64 length,
1062 struct ib_umem *umem, int npages,
1063 int page_shift, int access_flags)
981{ 1064{
982 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1065 struct mlx5_ib_dev *dev = to_mdev(pd->device);
983 struct mlx5_create_mkey_mbox_in *in; 1066 struct mlx5_create_mkey_mbox_in *in;
@@ -986,7 +1069,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
986 int err; 1069 int err;
987 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 1070 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
988 1071
989 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1072 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
990 if (!mr) 1073 if (!mr)
991 return ERR_PTR(-ENOMEM); 1074 return ERR_PTR(-ENOMEM);
992 1075
@@ -1013,7 +1096,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
1013 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1096 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1014 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1097 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
1015 1 << page_shift)); 1098 1 << page_shift));
1016 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, 1099 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL,
1017 NULL, NULL); 1100 NULL, NULL);
1018 if (err) { 1101 if (err) {
1019 mlx5_ib_warn(dev, "create mkey failed\n"); 1102 mlx5_ib_warn(dev, "create mkey failed\n");
@@ -1024,7 +1107,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
1024 mr->live = 1; 1107 mr->live = 1;
1025 kvfree(in); 1108 kvfree(in);
1026 1109
1027 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 1110 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
1028 1111
1029 return mr; 1112 return mr;
1030 1113
@@ -1032,11 +1115,23 @@ err_2:
1032 kvfree(in); 1115 kvfree(in);
1033 1116
1034err_1: 1117err_1:
1035 kfree(mr); 1118 if (!ibmr)
1119 kfree(mr);
1036 1120
1037 return ERR_PTR(err); 1121 return ERR_PTR(err);
1038} 1122}
1039 1123
1124static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
1125 int npages, u64 length, int access_flags)
1126{
1127 mr->npages = npages;
1128 atomic_add(npages, &dev->mdev->priv.reg_pages);
1129 mr->ibmr.lkey = mr->mmkey.key;
1130 mr->ibmr.rkey = mr->mmkey.key;
1131 mr->ibmr.length = length;
1132 mr->access_flags = access_flags;
1133}
1134
1040struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1135struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1041 u64 virt_addr, int access_flags, 1136 u64 virt_addr, int access_flags,
1042 struct ib_udata *udata) 1137 struct ib_udata *udata)
@@ -1052,22 +1147,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1052 1147
1053 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1148 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1054 start, virt_addr, length, access_flags); 1149 start, virt_addr, length, access_flags);
1055 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 1150 umem = mr_umem_get(pd, start, length, access_flags, &npages,
1056 0); 1151 &page_shift, &ncont, &order);
1057 if (IS_ERR(umem)) {
1058 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
1059 return (void *)umem;
1060 }
1061 1152
1062 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 1153 if (IS_ERR(umem))
1063 if (!npages) { 1154 return (void *)umem;
1064 mlx5_ib_warn(dev, "avoid zero region\n");
1065 err = -EINVAL;
1066 goto error;
1067 }
1068
1069 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
1070 npages, ncont, order, page_shift);
1071 1155
1072 if (use_umr(order)) { 1156 if (use_umr(order)) {
1073 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 1157 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
@@ -1083,45 +1167,21 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1083 } 1167 }
1084 1168
1085 if (!mr) 1169 if (!mr)
1086 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, 1170 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
1087 access_flags); 1171 page_shift, access_flags);
1088 1172
1089 if (IS_ERR(mr)) { 1173 if (IS_ERR(mr)) {
1090 err = PTR_ERR(mr); 1174 err = PTR_ERR(mr);
1091 goto error; 1175 goto error;
1092 } 1176 }
1093 1177
1094 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 1178 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1095 1179
1096 mr->umem = umem; 1180 mr->umem = umem;
1097 mr->npages = npages; 1181 set_mr_fileds(dev, mr, npages, length, access_flags);
1098 atomic_add(npages, &dev->mdev->priv.reg_pages);
1099 mr->ibmr.lkey = mr->mmr.key;
1100 mr->ibmr.rkey = mr->mmr.key;
1101 1182
1102#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1183#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1103 if (umem->odp_data) { 1184 update_odp_mr(mr);
1104 /*
1105 * This barrier prevents the compiler from moving the
1106 * setting of umem->odp_data->private to point to our
1107 * MR, before reg_umr finished, to ensure that the MR
1108 * initialization have finished before starting to
1109 * handle invalidations.
1110 */
1111 smp_wmb();
1112 mr->umem->odp_data->private = mr;
1113 /*
1114 * Make sure we will see the new
1115 * umem->odp_data->private value in the invalidation
1116 * routines, before we can get page faults on the
1117 * MR. Page faults can happen once we put the MR in
1118 * the tree, below this line. Without the barrier,
1119 * there can be a fault handling and an invalidation
1120 * before umem->odp_data->private == mr is visible to
1121 * the invalidation handler.
1122 */
1123 smp_wmb();
1124 }
1125#endif 1185#endif
1126 1186
1127 return &mr->ibmr; 1187 return &mr->ibmr;
@@ -1135,15 +1195,15 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1135{ 1195{
1136 struct umr_common *umrc = &dev->umrc; 1196 struct umr_common *umrc = &dev->umrc;
1137 struct mlx5_ib_umr_context umr_context; 1197 struct mlx5_ib_umr_context umr_context;
1138 struct mlx5_umr_wr umrwr; 1198 struct mlx5_umr_wr umrwr = {};
1139 struct ib_send_wr *bad; 1199 struct ib_send_wr *bad;
1140 int err; 1200 int err;
1141 1201
1142 memset(&umrwr.wr, 0, sizeof(umrwr));
1143 umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
1144 prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key);
1145
1146 mlx5_ib_init_umr_context(&umr_context); 1202 mlx5_ib_init_umr_context(&umr_context);
1203
1204 umrwr.wr.wr_cqe = &umr_context.cqe;
1205 prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
1206
1147 down(&umrc->sem); 1207 down(&umrc->sem);
1148 err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 1208 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1149 if (err) { 1209 if (err) {
@@ -1165,6 +1225,167 @@ error:
1165 return err; 1225 return err;
1166} 1226}
1167 1227
1228static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
1229 u64 length, int npages, int page_shift, int order,
1230 int access_flags, int flags)
1231{
1232 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1233 struct device *ddev = dev->ib_dev.dma_device;
1234 struct mlx5_ib_umr_context umr_context;
1235 struct ib_send_wr *bad;
1236 struct mlx5_umr_wr umrwr = {};
1237 struct ib_sge sg;
1238 struct umr_common *umrc = &dev->umrc;
1239 dma_addr_t dma = 0;
1240 __be64 *mr_pas = NULL;
1241 int size;
1242 int err;
1243
1244 mlx5_ib_init_umr_context(&umr_context);
1245
1246 umrwr.wr.wr_cqe = &umr_context.cqe;
1247 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1248
1249 if (flags & IB_MR_REREG_TRANS) {
1250 err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
1251 &mr_pas, &dma);
1252 if (err)
1253 return err;
1254
1255 umrwr.target.virt_addr = virt_addr;
1256 umrwr.length = length;
1257 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1258 }
1259
1260 prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
1261 page_shift);
1262
1263 if (flags & IB_MR_REREG_PD) {
1264 umrwr.pd = pd;
1265 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
1266 }
1267
1268 if (flags & IB_MR_REREG_ACCESS) {
1269 umrwr.access_flags = access_flags;
1270 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
1271 }
1272
1273 /* post send request to UMR QP */
1274 down(&umrc->sem);
1275 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1276
1277 if (err) {
1278 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
1279 } else {
1280 wait_for_completion(&umr_context.done);
1281 if (umr_context.status != IB_WC_SUCCESS) {
1282 mlx5_ib_warn(dev, "reg umr failed (%u)\n",
1283 umr_context.status);
1284 err = -EFAULT;
1285 }
1286 }
1287
1288 up(&umrc->sem);
1289 if (flags & IB_MR_REREG_TRANS) {
1290 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1291 kfree(mr_pas);
1292 }
1293 return err;
1294}
1295
1296int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1297 u64 length, u64 virt_addr, int new_access_flags,
1298 struct ib_pd *new_pd, struct ib_udata *udata)
1299{
1300 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1301 struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1302 struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1303 int access_flags = flags & IB_MR_REREG_ACCESS ?
1304 new_access_flags :
1305 mr->access_flags;
1306 u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
1307 u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
1308 int page_shift = 0;
1309 int npages = 0;
1310 int ncont = 0;
1311 int order = 0;
1312 int err;
1313
1314 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1315 start, virt_addr, length, access_flags);
1316
1317 if (flags != IB_MR_REREG_PD) {
1318 /*
1319 * Replace umem. This needs to be done whether or not UMR is
1320 * used.
1321 */
1322 flags |= IB_MR_REREG_TRANS;
1323 ib_umem_release(mr->umem);
1324 mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages,
1325 &page_shift, &ncont, &order);
1326 if (IS_ERR(mr->umem)) {
1327 err = PTR_ERR(mr->umem);
1328 mr->umem = NULL;
1329 return err;
1330 }
1331 }
1332
1333 if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
1334 /*
1335 * UMR can't be used - MKey needs to be replaced.
1336 */
1337 if (mr->umred) {
1338 err = unreg_umr(dev, mr);
1339 if (err)
1340 mlx5_ib_warn(dev, "Failed to unregister MR\n");
1341 } else {
1342 err = destroy_mkey(dev, mr);
1343 if (err)
1344 mlx5_ib_warn(dev, "Failed to destroy MKey\n");
1345 }
1346 if (err)
1347 return err;
1348
1349 mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
1350 page_shift, access_flags);
1351
1352 if (IS_ERR(mr))
1353 return PTR_ERR(mr);
1354
1355 mr->umred = 0;
1356 } else {
1357 /*
1358 * Send a UMR WQE
1359 */
1360 err = rereg_umr(pd, mr, addr, len, npages, page_shift,
1361 order, access_flags, flags);
1362 if (err) {
1363 mlx5_ib_warn(dev, "Failed to rereg UMR\n");
1364 return err;
1365 }
1366 }
1367
1368 if (flags & IB_MR_REREG_PD) {
1369 ib_mr->pd = pd;
1370 mr->mmkey.pd = to_mpd(pd)->pdn;
1371 }
1372
1373 if (flags & IB_MR_REREG_ACCESS)
1374 mr->access_flags = access_flags;
1375
1376 if (flags & IB_MR_REREG_TRANS) {
1377 atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
1378 set_mr_fileds(dev, mr, npages, len, access_flags);
1379 mr->mmkey.iova = addr;
1380 mr->mmkey.size = len;
1381 }
1382#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1383 update_odp_mr(mr);
1384#endif
1385
1386 return 0;
1387}
1388
1168static int 1389static int
1169mlx5_alloc_priv_descs(struct ib_device *device, 1390mlx5_alloc_priv_descs(struct ib_device *device,
1170 struct mlx5_ib_mr *mr, 1391 struct mlx5_ib_mr *mr,
@@ -1236,7 +1457,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
1236 err = destroy_mkey(dev, mr); 1457 err = destroy_mkey(dev, mr);
1237 if (err) { 1458 if (err) {
1238 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1459 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1239 mr->mmr.key, err); 1460 mr->mmkey.key, err);
1240 return err; 1461 return err;
1241 } 1462 }
1242 } else { 1463 } else {
@@ -1300,8 +1521,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1300 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1521 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1301 struct mlx5_create_mkey_mbox_in *in; 1522 struct mlx5_create_mkey_mbox_in *in;
1302 struct mlx5_ib_mr *mr; 1523 struct mlx5_ib_mr *mr;
1303 int access_mode, err; 1524 int ndescs = ALIGN(max_num_sg, 4);
1304 int ndescs = roundup(max_num_sg, 4); 1525 int err;
1305 1526
1306 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1527 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1307 if (!mr) 1528 if (!mr)
@@ -1319,7 +1540,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1319 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1540 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1320 1541
1321 if (mr_type == IB_MR_TYPE_MEM_REG) { 1542 if (mr_type == IB_MR_TYPE_MEM_REG) {
1322 access_mode = MLX5_ACCESS_MODE_MTT; 1543 mr->access_mode = MLX5_ACCESS_MODE_MTT;
1323 in->seg.log2_page_size = PAGE_SHIFT; 1544 in->seg.log2_page_size = PAGE_SHIFT;
1324 1545
1325 err = mlx5_alloc_priv_descs(pd->device, mr, 1546 err = mlx5_alloc_priv_descs(pd->device, mr,
@@ -1329,6 +1550,15 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1329 1550
1330 mr->desc_size = sizeof(u64); 1551 mr->desc_size = sizeof(u64);
1331 mr->max_descs = ndescs; 1552 mr->max_descs = ndescs;
1553 } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
1554 mr->access_mode = MLX5_ACCESS_MODE_KLM;
1555
1556 err = mlx5_alloc_priv_descs(pd->device, mr,
1557 ndescs, sizeof(struct mlx5_klm));
1558 if (err)
1559 goto err_free_in;
1560 mr->desc_size = sizeof(struct mlx5_klm);
1561 mr->max_descs = ndescs;
1332 } else if (mr_type == IB_MR_TYPE_SIGNATURE) { 1562 } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
1333 u32 psv_index[2]; 1563 u32 psv_index[2];
1334 1564
@@ -1347,7 +1577,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1347 if (err) 1577 if (err)
1348 goto err_free_sig; 1578 goto err_free_sig;
1349 1579
1350 access_mode = MLX5_ACCESS_MODE_KLM; 1580 mr->access_mode = MLX5_ACCESS_MODE_KLM;
1351 mr->sig->psv_memory.psv_idx = psv_index[0]; 1581 mr->sig->psv_memory.psv_idx = psv_index[0];
1352 mr->sig->psv_wire.psv_idx = psv_index[1]; 1582 mr->sig->psv_wire.psv_idx = psv_index[1];
1353 1583
@@ -1361,14 +1591,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1361 goto err_free_in; 1591 goto err_free_in;
1362 } 1592 }
1363 1593
1364 in->seg.flags = MLX5_PERM_UMR_EN | access_mode; 1594 in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode;
1365 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), 1595 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in),
1366 NULL, NULL, NULL); 1596 NULL, NULL, NULL);
1367 if (err) 1597 if (err)
1368 goto err_destroy_psv; 1598 goto err_destroy_psv;
1369 1599
1370 mr->ibmr.lkey = mr->mmr.key; 1600 mr->ibmr.lkey = mr->mmkey.key;
1371 mr->ibmr.rkey = mr->mmr.key; 1601 mr->ibmr.rkey = mr->mmkey.key;
1372 mr->umem = NULL; 1602 mr->umem = NULL;
1373 kfree(in); 1603 kfree(in);
1374 1604
@@ -1395,6 +1625,88 @@ err_free:
1395 return ERR_PTR(err); 1625 return ERR_PTR(err);
1396} 1626}
1397 1627
1628struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
1629 struct ib_udata *udata)
1630{
1631 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1632 struct mlx5_create_mkey_mbox_in *in = NULL;
1633 struct mlx5_ib_mw *mw = NULL;
1634 int ndescs;
1635 int err;
1636 struct mlx5_ib_alloc_mw req = {};
1637 struct {
1638 __u32 comp_mask;
1639 __u32 response_length;
1640 } resp = {};
1641
1642 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1643 if (err)
1644 return ERR_PTR(err);
1645
1646 if (req.comp_mask || req.reserved1 || req.reserved2)
1647 return ERR_PTR(-EOPNOTSUPP);
1648
1649 if (udata->inlen > sizeof(req) &&
1650 !ib_is_udata_cleared(udata, sizeof(req),
1651 udata->inlen - sizeof(req)))
1652 return ERR_PTR(-EOPNOTSUPP);
1653
1654 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
1655
1656 mw = kzalloc(sizeof(*mw), GFP_KERNEL);
1657 in = kzalloc(sizeof(*in), GFP_KERNEL);
1658 if (!mw || !in) {
1659 err = -ENOMEM;
1660 goto free;
1661 }
1662
1663 in->seg.status = MLX5_MKEY_STATUS_FREE;
1664 in->seg.xlt_oct_size = cpu_to_be32(ndescs);
1665 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1666 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM |
1667 MLX5_PERM_LOCAL_READ;
1668 if (type == IB_MW_TYPE_2)
1669 in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
1670 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1671
1672 err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in),
1673 NULL, NULL, NULL);
1674 if (err)
1675 goto free;
1676
1677 mw->ibmw.rkey = mw->mmkey.key;
1678
1679 resp.response_length = min(offsetof(typeof(resp), response_length) +
1680 sizeof(resp.response_length), udata->outlen);
1681 if (resp.response_length) {
1682 err = ib_copy_to_udata(udata, &resp, resp.response_length);
1683 if (err) {
1684 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
1685 goto free;
1686 }
1687 }
1688
1689 kfree(in);
1690 return &mw->ibmw;
1691
1692free:
1693 kfree(mw);
1694 kfree(in);
1695 return ERR_PTR(err);
1696}
1697
1698int mlx5_ib_dealloc_mw(struct ib_mw *mw)
1699{
1700 struct mlx5_ib_mw *mmw = to_mmw(mw);
1701 int err;
1702
1703 err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
1704 &mmw->mmkey);
1705 if (!err)
1706 kfree(mmw);
1707 return err;
1708}
1709
1398int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1710int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1399 struct ib_mr_status *mr_status) 1711 struct ib_mr_status *mr_status)
1400{ 1712{
@@ -1436,6 +1748,32 @@ done:
1436 return ret; 1748 return ret;
1437} 1749}
1438 1750
1751static int
1752mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
1753 struct scatterlist *sgl,
1754 unsigned short sg_nents)
1755{
1756 struct scatterlist *sg = sgl;
1757 struct mlx5_klm *klms = mr->descs;
1758 u32 lkey = mr->ibmr.pd->local_dma_lkey;
1759 int i;
1760
1761 mr->ibmr.iova = sg_dma_address(sg);
1762 mr->ibmr.length = 0;
1763 mr->ndescs = sg_nents;
1764
1765 for_each_sg(sgl, sg, sg_nents, i) {
1766 if (unlikely(i > mr->max_descs))
1767 break;
1768 klms[i].va = cpu_to_be64(sg_dma_address(sg));
1769 klms[i].bcount = cpu_to_be32(sg_dma_len(sg));
1770 klms[i].key = cpu_to_be32(lkey);
1771 mr->ibmr.length += sg_dma_len(sg);
1772 }
1773
1774 return i;
1775}
1776
1439static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 1777static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1440{ 1778{
1441 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1779 struct mlx5_ib_mr *mr = to_mmr(ibmr);
@@ -1463,7 +1801,10 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
1463 mr->desc_size * mr->max_descs, 1801 mr->desc_size * mr->max_descs,
1464 DMA_TO_DEVICE); 1802 DMA_TO_DEVICE);
1465 1803
1466 n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); 1804 if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
1805 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents);
1806 else
1807 n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
1467 1808
1468 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 1809 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1469 mr->desc_size * mr->max_descs, 1810 mr->desc_size * mr->max_descs,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index b8d76361a48d..34e79e709c67 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -142,13 +142,13 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
142 u32 key) 142 u32 key)
143{ 143{
144 u32 base_key = mlx5_base_mkey(key); 144 u32 base_key = mlx5_base_mkey(key);
145 struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key); 145 struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
146 struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr); 146 struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
147 147
148 if (!mmr || mmr->key != key || !mr->live) 148 if (!mmkey || mmkey->key != key || !mr->live)
149 return NULL; 149 return NULL;
150 150
151 return container_of(mmr, struct mlx5_ib_mr, mmr); 151 return container_of(mmkey, struct mlx5_ib_mr, mmkey);
152} 152}
153 153
154static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp, 154static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
@@ -232,7 +232,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
232 io_virt += pfault->mpfault.bytes_committed; 232 io_virt += pfault->mpfault.bytes_committed;
233 bcnt -= pfault->mpfault.bytes_committed; 233 bcnt -= pfault->mpfault.bytes_committed;
234 234
235 start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT; 235 start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
236 236
237 if (mr->umem->writable) 237 if (mr->umem->writable)
238 access_mask |= ODP_WRITE_ALLOWED_BIT; 238 access_mask |= ODP_WRITE_ALLOWED_BIT;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 34cb8e87c7b8..8dee8bc1e0fe 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -58,6 +58,7 @@ enum {
58 58
59static const u32 mlx5_ib_opcode[] = { 59static const u32 mlx5_ib_opcode[] = {
60 [IB_WR_SEND] = MLX5_OPCODE_SEND, 60 [IB_WR_SEND] = MLX5_OPCODE_SEND,
61 [IB_WR_LSO] = MLX5_OPCODE_LSO,
61 [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, 62 [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
62 [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, 63 [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
63 [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, 64 [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
@@ -72,6 +73,9 @@ static const u32 mlx5_ib_opcode[] = {
72 [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, 73 [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
73}; 74};
74 75
76struct mlx5_wqe_eth_pad {
77 u8 rsvd0[16];
78};
75 79
76static int is_qp0(enum ib_qp_type qp_type) 80static int is_qp0(enum ib_qp_type qp_type)
77{ 81{
@@ -260,11 +264,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
260 return 0; 264 return 0;
261} 265}
262 266
263static int sq_overhead(enum ib_qp_type qp_type) 267static int sq_overhead(struct ib_qp_init_attr *attr)
264{ 268{
265 int size = 0; 269 int size = 0;
266 270
267 switch (qp_type) { 271 switch (attr->qp_type) {
268 case IB_QPT_XRC_INI: 272 case IB_QPT_XRC_INI:
269 size += sizeof(struct mlx5_wqe_xrc_seg); 273 size += sizeof(struct mlx5_wqe_xrc_seg);
270 /* fall through */ 274 /* fall through */
@@ -287,8 +291,12 @@ static int sq_overhead(enum ib_qp_type qp_type)
287 break; 291 break;
288 292
289 case IB_QPT_UD: 293 case IB_QPT_UD:
294 if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
295 size += sizeof(struct mlx5_wqe_eth_pad) +
296 sizeof(struct mlx5_wqe_eth_seg);
297 /* fall through */
290 case IB_QPT_SMI: 298 case IB_QPT_SMI:
291 case IB_QPT_GSI: 299 case MLX5_IB_QPT_HW_GSI:
292 size += sizeof(struct mlx5_wqe_ctrl_seg) + 300 size += sizeof(struct mlx5_wqe_ctrl_seg) +
293 sizeof(struct mlx5_wqe_datagram_seg); 301 sizeof(struct mlx5_wqe_datagram_seg);
294 break; 302 break;
@@ -311,7 +319,7 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
311 int inl_size = 0; 319 int inl_size = 0;
312 int size; 320 int size;
313 321
314 size = sq_overhead(attr->qp_type); 322 size = sq_overhead(attr);
315 if (size < 0) 323 if (size < 0)
316 return size; 324 return size;
317 325
@@ -348,8 +356,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
348 return -EINVAL; 356 return -EINVAL;
349 } 357 }
350 358
351 qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) - 359 qp->max_inline_data = wqe_size - sq_overhead(attr) -
352 sizeof(struct mlx5_wqe_inline_seg); 360 sizeof(struct mlx5_wqe_inline_seg);
353 attr->cap.max_inline_data = qp->max_inline_data; 361 attr->cap.max_inline_data = qp->max_inline_data;
354 362
355 if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) 363 if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
@@ -590,7 +598,7 @@ static int to_mlx5_st(enum ib_qp_type type)
590 case IB_QPT_XRC_INI: 598 case IB_QPT_XRC_INI:
591 case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC; 599 case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC;
592 case IB_QPT_SMI: return MLX5_QP_ST_QP0; 600 case IB_QPT_SMI: return MLX5_QP_ST_QP0;
593 case IB_QPT_GSI: return MLX5_QP_ST_QP1; 601 case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1;
594 case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; 602 case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
595 case IB_QPT_RAW_PACKET: 603 case IB_QPT_RAW_PACKET:
596 case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; 604 case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
@@ -783,7 +791,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
783 int err; 791 int err;
784 792
785 uuari = &dev->mdev->priv.uuari; 793 uuari = &dev->mdev->priv.uuari;
786 if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) 794 if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
795 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
796 IB_QP_CREATE_IPOIB_UD_LSO |
797 mlx5_ib_create_qp_sqpn_qp1()))
787 return -EINVAL; 798 return -EINVAL;
788 799
789 if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) 800 if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
@@ -828,6 +839,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
828 (*in)->ctx.params1 |= cpu_to_be32(1 << 11); 839 (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
829 (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); 840 (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
830 841
842 if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
843 (*in)->ctx.deth_sqpn = cpu_to_be32(1);
844 qp->flags |= MLX5_IB_QP_SQPN_QP1;
845 }
846
831 mlx5_fill_page_array(&qp->buf, (*in)->pas); 847 mlx5_fill_page_array(&qp->buf, (*in)->pas);
832 848
833 err = mlx5_db_alloc(dev->mdev, &qp->db); 849 err = mlx5_db_alloc(dev->mdev, &qp->db);
@@ -1228,6 +1244,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1228 if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) 1244 if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
1229 qp->flags |= MLX5_IB_QP_MANAGED_RECV; 1245 qp->flags |= MLX5_IB_QP_MANAGED_RECV;
1230 } 1246 }
1247
1248 if (init_attr->qp_type == IB_QPT_UD &&
1249 (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO))
1250 if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
1251 mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n");
1252 return -EOPNOTSUPP;
1253 }
1254
1231 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 1255 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
1232 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; 1256 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
1233 1257
@@ -1271,6 +1295,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1271 ucmd.sq_wqe_count, max_wqes); 1295 ucmd.sq_wqe_count, max_wqes);
1272 return -EINVAL; 1296 return -EINVAL;
1273 } 1297 }
1298 if (init_attr->create_flags &
1299 mlx5_ib_create_qp_sqpn_qp1()) {
1300 mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n");
1301 return -EINVAL;
1302 }
1274 err = create_user_qp(dev, pd, qp, udata, init_attr, &in, 1303 err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
1275 &resp, &inlen, base); 1304 &resp, &inlen, base);
1276 if (err) 1305 if (err)
@@ -1385,6 +1414,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1385 /* 0xffffff means we ask to work with cqe version 0 */ 1414 /* 0xffffff means we ask to work with cqe version 0 */
1386 MLX5_SET(qpc, qpc, user_index, uidx); 1415 MLX5_SET(qpc, qpc, user_index, uidx);
1387 } 1416 }
1417 /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
1418 if (init_attr->qp_type == IB_QPT_UD &&
1419 (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
1420 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
1421 MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
1422 qp->flags |= MLX5_IB_QP_LSO;
1423 }
1388 1424
1389 if (init_attr->qp_type == IB_QPT_RAW_PACKET) { 1425 if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
1390 qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; 1426 qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
@@ -1494,7 +1530,7 @@ static void get_cqs(struct mlx5_ib_qp *qp,
1494 break; 1530 break;
1495 1531
1496 case IB_QPT_SMI: 1532 case IB_QPT_SMI:
1497 case IB_QPT_GSI: 1533 case MLX5_IB_QPT_HW_GSI:
1498 case IB_QPT_RC: 1534 case IB_QPT_RC:
1499 case IB_QPT_UC: 1535 case IB_QPT_UC:
1500 case IB_QPT_UD: 1536 case IB_QPT_UD:
@@ -1657,7 +1693,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1657 case IB_QPT_UC: 1693 case IB_QPT_UC:
1658 case IB_QPT_UD: 1694 case IB_QPT_UD:
1659 case IB_QPT_SMI: 1695 case IB_QPT_SMI:
1660 case IB_QPT_GSI: 1696 case MLX5_IB_QPT_HW_GSI:
1661 case MLX5_IB_QPT_REG_UMR: 1697 case MLX5_IB_QPT_REG_UMR:
1662 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 1698 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1663 if (!qp) 1699 if (!qp)
@@ -1686,6 +1722,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1686 1722
1687 break; 1723 break;
1688 1724
1725 case IB_QPT_GSI:
1726 return mlx5_ib_gsi_create_qp(pd, init_attr);
1727
1689 case IB_QPT_RAW_IPV6: 1728 case IB_QPT_RAW_IPV6:
1690 case IB_QPT_RAW_ETHERTYPE: 1729 case IB_QPT_RAW_ETHERTYPE:
1691 case IB_QPT_MAX: 1730 case IB_QPT_MAX:
@@ -1704,6 +1743,9 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp)
1704 struct mlx5_ib_dev *dev = to_mdev(qp->device); 1743 struct mlx5_ib_dev *dev = to_mdev(qp->device);
1705 struct mlx5_ib_qp *mqp = to_mqp(qp); 1744 struct mlx5_ib_qp *mqp = to_mqp(qp);
1706 1745
1746 if (unlikely(qp->qp_type == IB_QPT_GSI))
1747 return mlx5_ib_gsi_destroy_qp(qp);
1748
1707 destroy_qp_common(dev, mqp); 1749 destroy_qp_common(dev, mqp);
1708 1750
1709 kfree(mqp); 1751 kfree(mqp);
@@ -2161,8 +2203,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2161 2203
2162 context = &in->ctx; 2204 context = &in->ctx;
2163 err = to_mlx5_st(ibqp->qp_type); 2205 err = to_mlx5_st(ibqp->qp_type);
2164 if (err < 0) 2206 if (err < 0) {
2207 mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
2165 goto out; 2208 goto out;
2209 }
2166 2210
2167 context->flags = cpu_to_be32(err << 16); 2211 context->flags = cpu_to_be32(err << 16);
2168 2212
@@ -2182,7 +2226,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2182 } 2226 }
2183 } 2227 }
2184 2228
2185 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { 2229 if (is_sqp(ibqp->qp_type)) {
2186 context->mtu_msgmax = (IB_MTU_256 << 5) | 8; 2230 context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
2187 } else if (ibqp->qp_type == IB_QPT_UD || 2231 } else if (ibqp->qp_type == IB_QPT_UD ||
2188 ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { 2232 ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
@@ -2284,6 +2328,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2284 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 2328 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
2285 context->sq_crq_size |= cpu_to_be16(1 << 4); 2329 context->sq_crq_size |= cpu_to_be16(1 << 4);
2286 2330
2331 if (qp->flags & MLX5_IB_QP_SQPN_QP1)
2332 context->deth_sqpn = cpu_to_be32(1);
2287 2333
2288 mlx5_cur = to_mlx5_state(cur_state); 2334 mlx5_cur = to_mlx5_state(cur_state);
2289 mlx5_new = to_mlx5_state(new_state); 2335 mlx5_new = to_mlx5_state(new_state);
@@ -2363,11 +2409,18 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2363{ 2409{
2364 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 2410 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2365 struct mlx5_ib_qp *qp = to_mqp(ibqp); 2411 struct mlx5_ib_qp *qp = to_mqp(ibqp);
2412 enum ib_qp_type qp_type;
2366 enum ib_qp_state cur_state, new_state; 2413 enum ib_qp_state cur_state, new_state;
2367 int err = -EINVAL; 2414 int err = -EINVAL;
2368 int port; 2415 int port;
2369 enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; 2416 enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
2370 2417
2418 if (unlikely(ibqp->qp_type == IB_QPT_GSI))
2419 return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
2420
2421 qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ?
2422 IB_QPT_GSI : ibqp->qp_type;
2423
2371 mutex_lock(&qp->mutex); 2424 mutex_lock(&qp->mutex);
2372 2425
2373 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 2426 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
@@ -2378,32 +2431,46 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2378 ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); 2431 ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
2379 } 2432 }
2380 2433
2381 if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && 2434 if (qp_type != MLX5_IB_QPT_REG_UMR &&
2382 !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 2435 !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
2383 ll)) 2436 mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
2437 cur_state, new_state, ibqp->qp_type, attr_mask);
2384 goto out; 2438 goto out;
2439 }
2385 2440
2386 if ((attr_mask & IB_QP_PORT) && 2441 if ((attr_mask & IB_QP_PORT) &&
2387 (attr->port_num == 0 || 2442 (attr->port_num == 0 ||
2388 attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) 2443 attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) {
2444 mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
2445 attr->port_num, dev->num_ports);
2389 goto out; 2446 goto out;
2447 }
2390 2448
2391 if (attr_mask & IB_QP_PKEY_INDEX) { 2449 if (attr_mask & IB_QP_PKEY_INDEX) {
2392 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 2450 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
2393 if (attr->pkey_index >= 2451 if (attr->pkey_index >=
2394 dev->mdev->port_caps[port - 1].pkey_table_len) 2452 dev->mdev->port_caps[port - 1].pkey_table_len) {
2453 mlx5_ib_dbg(dev, "invalid pkey index %d\n",
2454 attr->pkey_index);
2395 goto out; 2455 goto out;
2456 }
2396 } 2457 }
2397 2458
2398 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 2459 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
2399 attr->max_rd_atomic > 2460 attr->max_rd_atomic >
2400 (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) 2461 (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
2462 mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
2463 attr->max_rd_atomic);
2401 goto out; 2464 goto out;
2465 }
2402 2466
2403 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 2467 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
2404 attr->max_dest_rd_atomic > 2468 attr->max_dest_rd_atomic >
2405 (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) 2469 (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
2470 mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
2471 attr->max_dest_rd_atomic);
2406 goto out; 2472 goto out;
2473 }
2407 2474
2408 if (cur_state == new_state && cur_state == IB_QPS_RESET) { 2475 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
2409 err = 0; 2476 err = 0;
@@ -2442,6 +2509,59 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
2442 rseg->reserved = 0; 2509 rseg->reserved = 0;
2443} 2510}
2444 2511
2512static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
2513 struct ib_send_wr *wr, void *qend,
2514 struct mlx5_ib_qp *qp, int *size)
2515{
2516 void *seg = eseg;
2517
2518 memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
2519
2520 if (wr->send_flags & IB_SEND_IP_CSUM)
2521 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
2522 MLX5_ETH_WQE_L4_CSUM;
2523
2524 seg += sizeof(struct mlx5_wqe_eth_seg);
2525 *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
2526
2527 if (wr->opcode == IB_WR_LSO) {
2528 struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
2529 int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start);
2530 u64 left, leftlen, copysz;
2531 void *pdata = ud_wr->header;
2532
2533 left = ud_wr->hlen;
2534 eseg->mss = cpu_to_be16(ud_wr->mss);
2535 eseg->inline_hdr_sz = cpu_to_be16(left);
2536
2537 /*
2538 * check if there is space till the end of queue, if yes,
2539 * copy all in one shot, otherwise copy till the end of queue,
2540 * rollback and than the copy the left
2541 */
2542 leftlen = qend - (void *)eseg->inline_hdr_start;
2543 copysz = min_t(u64, leftlen, left);
2544
2545 memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
2546
2547 if (likely(copysz > size_of_inl_hdr_start)) {
2548 seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
2549 *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
2550 }
2551
2552 if (unlikely(copysz < left)) { /* the last wqe in the queue */
2553 seg = mlx5_get_send_wqe(qp, 0);
2554 left -= copysz;
2555 pdata += copysz;
2556 memcpy(seg, pdata, left);
2557 seg += ALIGN(left, 16);
2558 *size += ALIGN(left, 16) / 16;
2559 }
2560 }
2561
2562 return seg;
2563}
2564
2445static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, 2565static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
2446 struct ib_send_wr *wr) 2566 struct ib_send_wr *wr)
2447{ 2567{
@@ -2509,6 +2629,11 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
2509 int ndescs = mr->ndescs; 2629 int ndescs = mr->ndescs;
2510 2630
2511 memset(umr, 0, sizeof(*umr)); 2631 memset(umr, 0, sizeof(*umr));
2632
2633 if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
2634 /* KLMs take twice the size of MTTs */
2635 ndescs *= 2;
2636
2512 umr->flags = MLX5_UMR_CHECK_NOT_FREE; 2637 umr->flags = MLX5_UMR_CHECK_NOT_FREE;
2513 umr->klm_octowords = get_klm_octo(ndescs); 2638 umr->klm_octowords = get_klm_octo(ndescs);
2514 umr->mkey_mask = frwr_mkey_mask(); 2639 umr->mkey_mask = frwr_mkey_mask();
@@ -2558,6 +2683,44 @@ static __be64 get_umr_update_mtt_mask(void)
2558 return cpu_to_be64(result); 2683 return cpu_to_be64(result);
2559} 2684}
2560 2685
2686static __be64 get_umr_update_translation_mask(void)
2687{
2688 u64 result;
2689
2690 result = MLX5_MKEY_MASK_LEN |
2691 MLX5_MKEY_MASK_PAGE_SIZE |
2692 MLX5_MKEY_MASK_START_ADDR |
2693 MLX5_MKEY_MASK_KEY |
2694 MLX5_MKEY_MASK_FREE;
2695
2696 return cpu_to_be64(result);
2697}
2698
2699static __be64 get_umr_update_access_mask(void)
2700{
2701 u64 result;
2702
2703 result = MLX5_MKEY_MASK_LW |
2704 MLX5_MKEY_MASK_RR |
2705 MLX5_MKEY_MASK_RW |
2706 MLX5_MKEY_MASK_A |
2707 MLX5_MKEY_MASK_KEY |
2708 MLX5_MKEY_MASK_FREE;
2709
2710 return cpu_to_be64(result);
2711}
2712
2713static __be64 get_umr_update_pd_mask(void)
2714{
2715 u64 result;
2716
2717 result = MLX5_MKEY_MASK_PD |
2718 MLX5_MKEY_MASK_KEY |
2719 MLX5_MKEY_MASK_FREE;
2720
2721 return cpu_to_be64(result);
2722}
2723
2561static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, 2724static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
2562 struct ib_send_wr *wr) 2725 struct ib_send_wr *wr)
2563{ 2726{
@@ -2576,9 +2739,15 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
2576 umr->mkey_mask = get_umr_update_mtt_mask(); 2739 umr->mkey_mask = get_umr_update_mtt_mask();
2577 umr->bsf_octowords = get_klm_octo(umrwr->target.offset); 2740 umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
2578 umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; 2741 umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
2579 } else {
2580 umr->mkey_mask = get_umr_reg_mr_mask();
2581 } 2742 }
2743 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
2744 umr->mkey_mask |= get_umr_update_translation_mask();
2745 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS)
2746 umr->mkey_mask |= get_umr_update_access_mask();
2747 if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
2748 umr->mkey_mask |= get_umr_update_pd_mask();
2749 if (!umr->mkey_mask)
2750 umr->mkey_mask = get_umr_reg_mr_mask();
2582 } else { 2751 } else {
2583 umr->mkey_mask = get_umr_unreg_mr_mask(); 2752 umr->mkey_mask = get_umr_unreg_mr_mask();
2584 } 2753 }
@@ -2603,13 +2772,19 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
2603 int ndescs = ALIGN(mr->ndescs, 8) >> 1; 2772 int ndescs = ALIGN(mr->ndescs, 8) >> 1;
2604 2773
2605 memset(seg, 0, sizeof(*seg)); 2774 memset(seg, 0, sizeof(*seg));
2606 seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT; 2775
2776 if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
2777 seg->log2_page_size = ilog2(mr->ibmr.page_size);
2778 else if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
2779 /* KLMs take twice the size of MTTs */
2780 ndescs *= 2;
2781
2782 seg->flags = get_umr_flags(access) | mr->access_mode;
2607 seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); 2783 seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
2608 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); 2784 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
2609 seg->start_addr = cpu_to_be64(mr->ibmr.iova); 2785 seg->start_addr = cpu_to_be64(mr->ibmr.iova);
2610 seg->len = cpu_to_be64(mr->ibmr.length); 2786 seg->len = cpu_to_be64(mr->ibmr.length);
2611 seg->xlt_oct_size = cpu_to_be32(ndescs); 2787 seg->xlt_oct_size = cpu_to_be32(ndescs);
2612 seg->log2_page_size = ilog2(mr->ibmr.page_size);
2613} 2788}
2614 2789
2615static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) 2790static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
@@ -2630,7 +2805,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
2630 2805
2631 seg->flags = convert_access(umrwr->access_flags); 2806 seg->flags = convert_access(umrwr->access_flags);
2632 if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) { 2807 if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
2633 seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); 2808 if (umrwr->pd)
2809 seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
2634 seg->start_addr = cpu_to_be64(umrwr->target.virt_addr); 2810 seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
2635 } 2811 }
2636 seg->len = cpu_to_be64(umrwr->length); 2812 seg->len = cpu_to_be64(umrwr->length);
@@ -3196,13 +3372,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3196{ 3372{
3197 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ 3373 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
3198 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 3374 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
3199 struct mlx5_ib_qp *qp = to_mqp(ibqp); 3375 struct mlx5_ib_qp *qp;
3200 struct mlx5_ib_mr *mr; 3376 struct mlx5_ib_mr *mr;
3201 struct mlx5_wqe_data_seg *dpseg; 3377 struct mlx5_wqe_data_seg *dpseg;
3202 struct mlx5_wqe_xrc_seg *xrc; 3378 struct mlx5_wqe_xrc_seg *xrc;
3203 struct mlx5_bf *bf = qp->bf; 3379 struct mlx5_bf *bf;
3204 int uninitialized_var(size); 3380 int uninitialized_var(size);
3205 void *qend = qp->sq.qend; 3381 void *qend;
3206 unsigned long flags; 3382 unsigned long flags;
3207 unsigned idx; 3383 unsigned idx;
3208 int err = 0; 3384 int err = 0;
@@ -3214,6 +3390,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3214 u8 next_fence = 0; 3390 u8 next_fence = 0;
3215 u8 fence; 3391 u8 fence;
3216 3392
3393 if (unlikely(ibqp->qp_type == IB_QPT_GSI))
3394 return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
3395
3396 qp = to_mqp(ibqp);
3397 bf = qp->bf;
3398 qend = qp->sq.qend;
3399
3217 spin_lock_irqsave(&qp->sq.lock, flags); 3400 spin_lock_irqsave(&qp->sq.lock, flags);
3218 3401
3219 for (nreq = 0; wr; nreq++, wr = wr->next) { 3402 for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -3373,16 +3556,37 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3373 } 3556 }
3374 break; 3557 break;
3375 3558
3376 case IB_QPT_UD:
3377 case IB_QPT_SMI: 3559 case IB_QPT_SMI:
3378 case IB_QPT_GSI: 3560 case MLX5_IB_QPT_HW_GSI:
3379 set_datagram_seg(seg, wr); 3561 set_datagram_seg(seg, wr);
3380 seg += sizeof(struct mlx5_wqe_datagram_seg); 3562 seg += sizeof(struct mlx5_wqe_datagram_seg);
3381 size += sizeof(struct mlx5_wqe_datagram_seg) / 16; 3563 size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
3382 if (unlikely((seg == qend))) 3564 if (unlikely((seg == qend)))
3383 seg = mlx5_get_send_wqe(qp, 0); 3565 seg = mlx5_get_send_wqe(qp, 0);
3384 break; 3566 break;
3567 case IB_QPT_UD:
3568 set_datagram_seg(seg, wr);
3569 seg += sizeof(struct mlx5_wqe_datagram_seg);
3570 size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
3571
3572 if (unlikely((seg == qend)))
3573 seg = mlx5_get_send_wqe(qp, 0);
3574
3575 /* handle qp that supports ud offload */
3576 if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
3577 struct mlx5_wqe_eth_pad *pad;
3578
3579 pad = seg;
3580 memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
3581 seg += sizeof(struct mlx5_wqe_eth_pad);
3582 size += sizeof(struct mlx5_wqe_eth_pad) / 16;
3385 3583
3584 seg = set_eth_seg(seg, wr, qend, qp, &size);
3585
3586 if (unlikely((seg == qend)))
3587 seg = mlx5_get_send_wqe(qp, 0);
3588 }
3589 break;
3386 case MLX5_IB_QPT_REG_UMR: 3590 case MLX5_IB_QPT_REG_UMR:
3387 if (wr->opcode != MLX5_IB_WR_UMR) { 3591 if (wr->opcode != MLX5_IB_WR_UMR) {
3388 err = -EINVAL; 3592 err = -EINVAL;
@@ -3502,6 +3706,9 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3502 int ind; 3706 int ind;
3503 int i; 3707 int i;
3504 3708
3709 if (unlikely(ibqp->qp_type == IB_QPT_GSI))
3710 return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
3711
3505 spin_lock_irqsave(&qp->rq.lock, flags); 3712 spin_lock_irqsave(&qp->rq.lock, flags);
3506 3713
3507 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 3714 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
@@ -3822,6 +4029,10 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
3822 int err = 0; 4029 int err = 0;
3823 u8 raw_packet_qp_state; 4030 u8 raw_packet_qp_state;
3824 4031
4032 if (unlikely(ibqp->qp_type == IB_QPT_GSI))
4033 return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
4034 qp_init_attr);
4035
3825#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 4036#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
3826 /* 4037 /*
3827 * Wait for any outstanding page faults, in case the user frees memory 4038 * Wait for any outstanding page faults, in case the user frees memory
@@ -3874,6 +4085,8 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
3874 qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; 4085 qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
3875 if (qp->flags & MLX5_IB_QP_MANAGED_RECV) 4086 if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
3876 qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; 4087 qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
4088 if (qp->flags & MLX5_IB_QP_SQPN_QP1)
4089 qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1();
3877 4090
3878 qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? 4091 qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
3879 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; 4092 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index b94a55404a59..61bc308bb802 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -152,6 +152,13 @@ struct mlx5_ib_create_qp_resp {
152 __u32 uuar_index; 152 __u32 uuar_index;
153}; 153};
154 154
155struct mlx5_ib_alloc_mw {
156 __u32 comp_mask;
157 __u8 num_klms;
158 __u8 reserved1;
159 __u16 reserved2;
160};
161
155static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, 162static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
156 struct mlx5_ib_create_qp *ucmd, 163 struct mlx5_ib_create_qp *ucmd,
157 int inlen, 164 int inlen,
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
index 846dc97cf260..7964eba8e7ed 100644
--- a/drivers/infiniband/hw/nes/Kconfig
+++ b/drivers/infiniband/hw/nes/Kconfig
@@ -2,7 +2,6 @@ config INFINIBAND_NES
2 tristate "NetEffect RNIC Driver" 2 tristate "NetEffect RNIC Driver"
3 depends on PCI && INET && INFINIBAND 3 depends on PCI && INET && INFINIBAND
4 select LIBCRC32C 4 select LIBCRC32C
5 select INET_LRO
6 ---help--- 5 ---help---
7 This is the RDMA Network Interface Card (RNIC) driver for 6 This is the RDMA Network Interface Card (RNIC) driver for
8 NetEffect Ethernet Cluster Server Adapters. 7 NetEffect Ethernet Cluster Server Adapters.
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 9f9d5c563a61..35cbb17bec12 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -111,17 +111,6 @@ static struct pci_device_id nes_pci_table[] = {
111 111
112MODULE_DEVICE_TABLE(pci, nes_pci_table); 112MODULE_DEVICE_TABLE(pci, nes_pci_table);
113 113
114/* registered nes netlink callbacks */
115static struct ibnl_client_cbs nes_nl_cb_table[] = {
116 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
117 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
118 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
119 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
120 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
121 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
122 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
123};
124
125static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *); 114static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
126static int nes_net_event(struct notifier_block *, unsigned long, void *); 115static int nes_net_event(struct notifier_block *, unsigned long, void *);
127static int nes_notifiers_registered; 116static int nes_notifiers_registered;
@@ -682,17 +671,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
682 } 671 }
683 nes_notifiers_registered++; 672 nes_notifiers_registered++;
684 673
685 if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table))
686 printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n",
687 __func__, __LINE__);
688
689 ret = iwpm_init(RDMA_NL_NES);
690 if (ret) {
691 printk(KERN_ERR PFX "%s: port mapper initialization failed\n",
692 pci_name(pcidev));
693 goto bail7;
694 }
695
696 INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status); 674 INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
697 675
698 /* Initialize network devices */ 676 /* Initialize network devices */
@@ -731,7 +709,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
731 709
732 nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n", 710 nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
733 nesdev->netdev_count, nesdev->nesadapter->netdev_count); 711 nesdev->netdev_count, nesdev->nesadapter->netdev_count);
734 ibnl_remove_client(RDMA_NL_NES);
735 712
736 nes_notifiers_registered--; 713 nes_notifiers_registered--;
737 if (nes_notifiers_registered == 0) { 714 if (nes_notifiers_registered == 0) {
@@ -795,8 +772,6 @@ static void nes_remove(struct pci_dev *pcidev)
795 nesdev->nesadapter->netdev_count--; 772 nesdev->nesadapter->netdev_count--;
796 } 773 }
797 } 774 }
798 ibnl_remove_client(RDMA_NL_NES);
799 iwpm_exit(RDMA_NL_NES);
800 775
801 nes_notifiers_registered--; 776 nes_notifiers_registered--;
802 if (nes_notifiers_registered == 0) { 777 if (nes_notifiers_registered == 0) {
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index cb9f0f27308d..7f0aa23aef9d 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -482,11 +482,11 @@ static void form_cm_frame(struct sk_buff *skb,
482 iph->ttl = 0x40; 482 iph->ttl = 0x40;
483 iph->protocol = 0x06; /* IPPROTO_TCP */ 483 iph->protocol = 0x06; /* IPPROTO_TCP */
484 484
485 iph->saddr = htonl(cm_node->mapped_loc_addr); 485 iph->saddr = htonl(cm_node->loc_addr);
486 iph->daddr = htonl(cm_node->mapped_rem_addr); 486 iph->daddr = htonl(cm_node->rem_addr);
487 487
488 tcph->source = htons(cm_node->mapped_loc_port); 488 tcph->source = htons(cm_node->loc_port);
489 tcph->dest = htons(cm_node->mapped_rem_port); 489 tcph->dest = htons(cm_node->rem_port);
490 tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num); 490 tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
491 491
492 if (flags & SET_ACK) { 492 if (flags & SET_ACK) {
@@ -525,125 +525,6 @@ static void form_cm_frame(struct sk_buff *skb,
525 cm_packets_created++; 525 cm_packets_created++;
526} 526}
527 527
528/*
529 * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct
530 */
531static void nes_create_sockaddr(__be32 ip_addr, __be16 port,
532 struct sockaddr_storage *addr)
533{
534 struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr;
535 nes_sockaddr->sin_family = AF_INET;
536 memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32));
537 nes_sockaddr->sin_port = port;
538}
539
540/*
541 * nes_create_mapinfo - Create a mapinfo object in the port mapper data base
542 */
543static int nes_create_mapinfo(struct nes_cm_info *cm_info)
544{
545 struct sockaddr_storage local_sockaddr;
546 struct sockaddr_storage mapped_sockaddr;
547
548 nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
549 &local_sockaddr);
550 nes_create_sockaddr(htonl(cm_info->mapped_loc_addr),
551 htons(cm_info->mapped_loc_port), &mapped_sockaddr);
552
553 return iwpm_create_mapinfo(&local_sockaddr,
554 &mapped_sockaddr, RDMA_NL_NES);
555}
556
557/*
558 * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base
559 * and send a remove mapping op message to
560 * the userspace port mapper
561 */
562static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port,
563 u32 mapped_loc_addr, u16 mapped_loc_port)
564{
565 struct sockaddr_storage local_sockaddr;
566 struct sockaddr_storage mapped_sockaddr;
567
568 nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr);
569 nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port),
570 &mapped_sockaddr);
571
572 iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr);
573 return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES);
574}
575
576/*
577 * nes_form_pm_msg - Form a port mapper message with mapping info
578 */
579static void nes_form_pm_msg(struct nes_cm_info *cm_info,
580 struct iwpm_sa_data *pm_msg)
581{
582 nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
583 &pm_msg->loc_addr);
584 nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port),
585 &pm_msg->rem_addr);
586}
587
588/*
589 * nes_form_reg_msg - Form a port mapper message with dev info
590 */
591static void nes_form_reg_msg(struct nes_vnic *nesvnic,
592 struct iwpm_dev_data *pm_msg)
593{
594 memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name,
595 IWPM_DEVNAME_SIZE);
596 memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE);
597}
598
599static void record_sockaddr_info(struct sockaddr_storage *addr_info,
600 nes_addr_t *ip_addr, u16 *port_num)
601{
602 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr_info;
603
604 if (in_addr->sin_family == AF_INET) {
605 *ip_addr = ntohl(in_addr->sin_addr.s_addr);
606 *port_num = ntohs(in_addr->sin_port);
607 }
608}
609
610/*
611 * nes_record_pm_msg - Save the received mapping info
612 */
613static void nes_record_pm_msg(struct nes_cm_info *cm_info,
614 struct iwpm_sa_data *pm_msg)
615{
616 record_sockaddr_info(&pm_msg->mapped_loc_addr,
617 &cm_info->mapped_loc_addr, &cm_info->mapped_loc_port);
618
619 record_sockaddr_info(&pm_msg->mapped_rem_addr,
620 &cm_info->mapped_rem_addr, &cm_info->mapped_rem_port);
621}
622
623/*
624 * nes_get_reminfo - Get the address info of the remote connecting peer
625 */
626static int nes_get_remote_addr(struct nes_cm_node *cm_node)
627{
628 struct sockaddr_storage mapped_loc_addr, mapped_rem_addr;
629 struct sockaddr_storage remote_addr;
630 int ret;
631
632 nes_create_sockaddr(htonl(cm_node->mapped_loc_addr),
633 htons(cm_node->mapped_loc_port), &mapped_loc_addr);
634 nes_create_sockaddr(htonl(cm_node->mapped_rem_addr),
635 htons(cm_node->mapped_rem_port), &mapped_rem_addr);
636
637 ret = iwpm_get_remote_info(&mapped_loc_addr, &mapped_rem_addr,
638 &remote_addr, RDMA_NL_NES);
639 if (ret)
640 nes_debug(NES_DBG_CM, "Unable to find remote peer address info\n");
641 else
642 record_sockaddr_info(&remote_addr, &cm_node->rem_addr,
643 &cm_node->rem_port);
644 return ret;
645}
646
647/** 528/**
648 * print_core - dump a cm core 529 * print_core - dump a cm core
649 */ 530 */
@@ -1266,11 +1147,10 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
1266 loc_addr, loc_port, 1147 loc_addr, loc_port,
1267 cm_node->rem_addr, cm_node->rem_port, 1148 cm_node->rem_addr, cm_node->rem_port,
1268 rem_addr, rem_port); 1149 rem_addr, rem_port);
1269 if ((cm_node->mapped_loc_addr == loc_addr) && 1150 if ((cm_node->loc_addr == loc_addr) &&
1270 (cm_node->mapped_loc_port == loc_port) && 1151 (cm_node->loc_port == loc_port) &&
1271 (cm_node->mapped_rem_addr == rem_addr) && 1152 (cm_node->rem_addr == rem_addr) &&
1272 (cm_node->mapped_rem_port == rem_port)) { 1153 (cm_node->rem_port == rem_port)) {
1273
1274 add_ref_cm_node(cm_node); 1154 add_ref_cm_node(cm_node);
1275 spin_unlock_irqrestore(&cm_core->ht_lock, flags); 1155 spin_unlock_irqrestore(&cm_core->ht_lock, flags);
1276 return cm_node; 1156 return cm_node;
@@ -1287,8 +1167,8 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
1287 * find_listener - find a cm node listening on this addr-port pair 1167 * find_listener - find a cm node listening on this addr-port pair
1288 */ 1168 */
1289static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, 1169static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
1290 nes_addr_t dst_addr, u16 dst_port, 1170 nes_addr_t dst_addr, u16 dst_port,
1291 enum nes_cm_listener_state listener_state, int local) 1171 enum nes_cm_listener_state listener_state)
1292{ 1172{
1293 unsigned long flags; 1173 unsigned long flags;
1294 struct nes_cm_listener *listen_node; 1174 struct nes_cm_listener *listen_node;
@@ -1298,13 +1178,9 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
1298 /* walk list and find cm_node associated with this session ID */ 1178 /* walk list and find cm_node associated with this session ID */
1299 spin_lock_irqsave(&cm_core->listen_list_lock, flags); 1179 spin_lock_irqsave(&cm_core->listen_list_lock, flags);
1300 list_for_each_entry(listen_node, &cm_core->listen_list.list, list) { 1180 list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
1301 if (local) { 1181 listen_addr = listen_node->loc_addr;
1302 listen_addr = listen_node->loc_addr; 1182 listen_port = listen_node->loc_port;
1303 listen_port = listen_node->loc_port; 1183
1304 } else {
1305 listen_addr = listen_node->mapped_loc_addr;
1306 listen_port = listen_node->mapped_loc_port;
1307 }
1308 /* compare node pair, return node handle if a match */ 1184 /* compare node pair, return node handle if a match */
1309 if (((listen_addr == dst_addr) || 1185 if (((listen_addr == dst_addr) ||
1310 listen_addr == 0x00000000) && 1186 listen_addr == 0x00000000) &&
@@ -1443,17 +1319,13 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1443 1319
1444 if (listener->nesvnic) { 1320 if (listener->nesvnic) {
1445 nes_manage_apbvt(listener->nesvnic, 1321 nes_manage_apbvt(listener->nesvnic,
1446 listener->mapped_loc_port, 1322 listener->loc_port,
1447 PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), 1323 PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn),
1448 NES_MANAGE_APBVT_DEL); 1324 NES_MANAGE_APBVT_DEL);
1449 1325
1450 nes_remove_mapinfo(listener->loc_addr,
1451 listener->loc_port,
1452 listener->mapped_loc_addr,
1453 listener->mapped_loc_port);
1454 nes_debug(NES_DBG_NLMSG, 1326 nes_debug(NES_DBG_NLMSG,
1455 "Delete APBVT mapped_loc_port = %04X\n", 1327 "Delete APBVT loc_port = %04X\n",
1456 listener->mapped_loc_port); 1328 listener->loc_port);
1457 } 1329 }
1458 1330
1459 nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); 1331 nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
@@ -1602,11 +1474,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1602 cm_node->rem_addr = cm_info->rem_addr; 1474 cm_node->rem_addr = cm_info->rem_addr;
1603 cm_node->rem_port = cm_info->rem_port; 1475 cm_node->rem_port = cm_info->rem_port;
1604 1476
1605 cm_node->mapped_loc_addr = cm_info->mapped_loc_addr;
1606 cm_node->mapped_rem_addr = cm_info->mapped_rem_addr;
1607 cm_node->mapped_loc_port = cm_info->mapped_loc_port;
1608 cm_node->mapped_rem_port = cm_info->mapped_rem_port;
1609
1610 cm_node->mpa_frame_rev = mpa_version; 1477 cm_node->mpa_frame_rev = mpa_version;
1611 cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; 1478 cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
1612 cm_node->mpav2_ird_ord = 0; 1479 cm_node->mpav2_ird_ord = 0;
@@ -1655,10 +1522,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1655 cm_node->loopbackpartner = NULL; 1522 cm_node->loopbackpartner = NULL;
1656 1523
1657 /* get the mac addr for the remote node */ 1524 /* get the mac addr for the remote node */
1658 oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr, 1525 oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr,
1659 NULL, NES_ARP_RESOLVE); 1526 NULL, NES_ARP_RESOLVE);
1660 arpindex = nes_addr_resolve_neigh(nesvnic, 1527 arpindex = nes_addr_resolve_neigh(nesvnic, cm_node->rem_addr,
1661 cm_node->mapped_rem_addr, oldarpindex); 1528 oldarpindex);
1662 if (arpindex < 0) { 1529 if (arpindex < 0) {
1663 kfree(cm_node); 1530 kfree(cm_node);
1664 return NULL; 1531 return NULL;
@@ -1720,14 +1587,12 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
1720 mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0); 1587 mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
1721 } else { 1588 } else {
1722 if (cm_node->apbvt_set && cm_node->nesvnic) { 1589 if (cm_node->apbvt_set && cm_node->nesvnic) {
1723 nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port, 1590 nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
1724 PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), 1591 PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
1725 NES_MANAGE_APBVT_DEL); 1592 NES_MANAGE_APBVT_DEL);
1726 } 1593 }
1727 nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n", 1594 nes_debug(NES_DBG_NLMSG, "Delete APBVT loc_port = %04X\n",
1728 cm_node->mapped_loc_port); 1595 cm_node->loc_port);
1729 nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port,
1730 cm_node->mapped_loc_addr, cm_node->mapped_loc_port);
1731 } 1596 }
1732 1597
1733 atomic_dec(&cm_core->node_cnt); 1598 atomic_dec(&cm_core->node_cnt);
@@ -2184,7 +2049,6 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
2184 cm_node->state = NES_CM_STATE_ESTABLISHED; 2049 cm_node->state = NES_CM_STATE_ESTABLISHED;
2185 if (datasize) { 2050 if (datasize) {
2186 cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; 2051 cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
2187 nes_get_remote_addr(cm_node);
2188 handle_rcv_mpa(cm_node, skb); 2052 handle_rcv_mpa(cm_node, skb);
2189 } else { /* rcvd ACK only */ 2053 } else { /* rcvd ACK only */
2190 dev_kfree_skb_any(skb); 2054 dev_kfree_skb_any(skb);
@@ -2399,17 +2263,14 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
2399 struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) 2263 struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
2400{ 2264{
2401 struct nes_cm_listener *listener; 2265 struct nes_cm_listener *listener;
2402 struct iwpm_dev_data pm_reg_msg;
2403 struct iwpm_sa_data pm_msg;
2404 unsigned long flags; 2266 unsigned long flags;
2405 int iwpm_err = 0;
2406 2267
2407 nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", 2268 nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
2408 cm_info->loc_addr, cm_info->loc_port); 2269 cm_info->loc_addr, cm_info->loc_port);
2409 2270
2410 /* cannot have multiple matching listeners */ 2271 /* cannot have multiple matching listeners */
2411 listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port, 2272 listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port,
2412 NES_CM_LISTENER_EITHER_STATE, 1); 2273 NES_CM_LISTENER_EITHER_STATE);
2413 2274
2414 if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { 2275 if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
2415 /* find automatically incs ref count ??? */ 2276 /* find automatically incs ref count ??? */
@@ -2419,22 +2280,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
2419 } 2280 }
2420 2281
2421 if (!listener) { 2282 if (!listener) {
2422 nes_form_reg_msg(nesvnic, &pm_reg_msg);
2423 iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
2424 if (iwpm_err) {
2425 nes_debug(NES_DBG_NLMSG,
2426 "Port Mapper reg pid fail (err = %d).\n", iwpm_err);
2427 }
2428 if (iwpm_valid_pid() && !iwpm_err) {
2429 nes_form_pm_msg(cm_info, &pm_msg);
2430 iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES);
2431 if (iwpm_err)
2432 nes_debug(NES_DBG_NLMSG,
2433 "Port Mapper query fail (err = %d).\n", iwpm_err);
2434 else
2435 nes_record_pm_msg(cm_info, &pm_msg);
2436 }
2437
2438 /* create a CM listen node (1/2 node to compare incoming traffic to) */ 2283 /* create a CM listen node (1/2 node to compare incoming traffic to) */
2439 listener = kzalloc(sizeof(*listener), GFP_ATOMIC); 2284 listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
2440 if (!listener) { 2285 if (!listener) {
@@ -2444,8 +2289,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
2444 2289
2445 listener->loc_addr = cm_info->loc_addr; 2290 listener->loc_addr = cm_info->loc_addr;
2446 listener->loc_port = cm_info->loc_port; 2291 listener->loc_port = cm_info->loc_port;
2447 listener->mapped_loc_addr = cm_info->mapped_loc_addr;
2448 listener->mapped_loc_port = cm_info->mapped_loc_port;
2449 listener->reused_node = 0; 2292 listener->reused_node = 0;
2450 2293
2451 atomic_set(&listener->ref_count, 1); 2294 atomic_set(&listener->ref_count, 1);
@@ -2507,18 +2350,18 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
2507 2350
2508 if (cm_info->loc_addr == cm_info->rem_addr) { 2351 if (cm_info->loc_addr == cm_info->rem_addr) {
2509 loopbackremotelistener = find_listener(cm_core, 2352 loopbackremotelistener = find_listener(cm_core,
2510 cm_node->mapped_loc_addr, cm_node->mapped_rem_port, 2353 cm_node->loc_addr, cm_node->rem_port,
2511 NES_CM_LISTENER_ACTIVE_STATE, 0); 2354 NES_CM_LISTENER_ACTIVE_STATE);
2512 if (loopbackremotelistener == NULL) { 2355 if (loopbackremotelistener == NULL) {
2513 create_event(cm_node, NES_CM_EVENT_ABORTED); 2356 create_event(cm_node, NES_CM_EVENT_ABORTED);
2514 } else { 2357 } else {
2515 loopback_cm_info = *cm_info; 2358 loopback_cm_info = *cm_info;
2516 loopback_cm_info.loc_port = cm_info->rem_port; 2359 loopback_cm_info.loc_port = cm_info->rem_port;
2517 loopback_cm_info.rem_port = cm_info->loc_port; 2360 loopback_cm_info.rem_port = cm_info->loc_port;
2518 loopback_cm_info.mapped_loc_port = 2361 loopback_cm_info.loc_port =
2519 cm_info->mapped_rem_port; 2362 cm_info->rem_port;
2520 loopback_cm_info.mapped_rem_port = 2363 loopback_cm_info.rem_port =
2521 cm_info->mapped_loc_port; 2364 cm_info->loc_port;
2522 loopback_cm_info.cm_id = loopbackremotelistener->cm_id; 2365 loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
2523 loopbackremotenode = make_cm_node(cm_core, nesvnic, 2366 loopbackremotenode = make_cm_node(cm_core, nesvnic,
2524 &loopback_cm_info, loopbackremotelistener); 2367 &loopback_cm_info, loopbackremotelistener);
@@ -2747,12 +2590,6 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2747 nfo.rem_addr = ntohl(iph->saddr); 2590 nfo.rem_addr = ntohl(iph->saddr);
2748 nfo.rem_port = ntohs(tcph->source); 2591 nfo.rem_port = ntohs(tcph->source);
2749 2592
2750 /* If port mapper is available these should be mapped address info */
2751 nfo.mapped_loc_addr = ntohl(iph->daddr);
2752 nfo.mapped_loc_port = ntohs(tcph->dest);
2753 nfo.mapped_rem_addr = ntohl(iph->saddr);
2754 nfo.mapped_rem_port = ntohs(tcph->source);
2755
2756 tmp_daddr = cpu_to_be32(iph->daddr); 2593 tmp_daddr = cpu_to_be32(iph->daddr);
2757 tmp_saddr = cpu_to_be32(iph->saddr); 2594 tmp_saddr = cpu_to_be32(iph->saddr);
2758 2595
@@ -2761,8 +2598,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2761 2598
2762 do { 2599 do {
2763 cm_node = find_node(cm_core, 2600 cm_node = find_node(cm_core,
2764 nfo.mapped_rem_port, nfo.mapped_rem_addr, 2601 nfo.rem_port, nfo.rem_addr,
2765 nfo.mapped_loc_port, nfo.mapped_loc_addr); 2602 nfo.loc_port, nfo.loc_addr);
2766 2603
2767 if (!cm_node) { 2604 if (!cm_node) {
2768 /* Only type of packet accepted are for */ 2605 /* Only type of packet accepted are for */
@@ -2771,9 +2608,9 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2771 skb_handled = 0; 2608 skb_handled = 0;
2772 break; 2609 break;
2773 } 2610 }
2774 listener = find_listener(cm_core, nfo.mapped_loc_addr, 2611 listener = find_listener(cm_core, nfo.loc_addr,
2775 nfo.mapped_loc_port, 2612 nfo.loc_port,
2776 NES_CM_LISTENER_ACTIVE_STATE, 0); 2613 NES_CM_LISTENER_ACTIVE_STATE);
2777 if (!listener) { 2614 if (!listener) {
2778 nfo.cm_id = NULL; 2615 nfo.cm_id = NULL;
2779 nfo.conn_type = 0; 2616 nfo.conn_type = 0;
@@ -2856,12 +2693,22 @@ static struct nes_cm_core *nes_cm_alloc_core(void)
2856 2693
2857 nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n"); 2694 nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n");
2858 cm_core->event_wq = create_singlethread_workqueue("nesewq"); 2695 cm_core->event_wq = create_singlethread_workqueue("nesewq");
2696 if (!cm_core->event_wq)
2697 goto out_free_cmcore;
2859 cm_core->post_event = nes_cm_post_event; 2698 cm_core->post_event = nes_cm_post_event;
2860 nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n"); 2699 nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n");
2861 cm_core->disconn_wq = create_singlethread_workqueue("nesdwq"); 2700 cm_core->disconn_wq = create_singlethread_workqueue("nesdwq");
2701 if (!cm_core->disconn_wq)
2702 goto out_free_wq;
2862 2703
2863 print_core(cm_core); 2704 print_core(cm_core);
2864 return cm_core; 2705 return cm_core;
2706
2707out_free_wq:
2708 destroy_workqueue(cm_core->event_wq);
2709out_free_cmcore:
2710 kfree(cm_core);
2711 return NULL;
2865} 2712}
2866 2713
2867 2714
@@ -3121,8 +2968,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
3121 atomic_inc(&cm_disconnects); 2968 atomic_inc(&cm_disconnects);
3122 cm_event.event = IW_CM_EVENT_DISCONNECT; 2969 cm_event.event = IW_CM_EVENT_DISCONNECT;
3123 cm_event.status = disconn_status; 2970 cm_event.status = disconn_status;
3124 cm_event.local_addr = cm_id->local_addr; 2971 cm_event.local_addr = cm_id->m_local_addr;
3125 cm_event.remote_addr = cm_id->remote_addr; 2972 cm_event.remote_addr = cm_id->m_remote_addr;
3126 cm_event.private_data = NULL; 2973 cm_event.private_data = NULL;
3127 cm_event.private_data_len = 0; 2974 cm_event.private_data_len = 0;
3128 2975
@@ -3148,8 +2995,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
3148 cm_event.event = IW_CM_EVENT_CLOSE; 2995 cm_event.event = IW_CM_EVENT_CLOSE;
3149 cm_event.status = 0; 2996 cm_event.status = 0;
3150 cm_event.provider_data = cm_id->provider_data; 2997 cm_event.provider_data = cm_id->provider_data;
3151 cm_event.local_addr = cm_id->local_addr; 2998 cm_event.local_addr = cm_id->m_local_addr;
3152 cm_event.remote_addr = cm_id->remote_addr; 2999 cm_event.remote_addr = cm_id->m_remote_addr;
3153 cm_event.private_data = NULL; 3000 cm_event.private_data = NULL;
3154 cm_event.private_data_len = 0; 3001 cm_event.private_data_len = 0;
3155 3002
@@ -3240,8 +3087,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3240 u8 *start_ptr = &start_addr; 3087 u8 *start_ptr = &start_addr;
3241 u8 **start_buff = &start_ptr; 3088 u8 **start_buff = &start_ptr;
3242 u16 buff_len = 0; 3089 u16 buff_len = 0;
3243 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; 3090 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3244 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; 3091 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3245 3092
3246 ibqp = nes_get_qp(cm_id->device, conn_param->qpn); 3093 ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
3247 if (!ibqp) 3094 if (!ibqp)
@@ -3378,11 +3225,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3378 nes_cm_init_tsa_conn(nesqp, cm_node); 3225 nes_cm_init_tsa_conn(nesqp, cm_node);
3379 3226
3380 nesqp->nesqp_context->tcpPorts[0] = 3227 nesqp->nesqp_context->tcpPorts[0] =
3381 cpu_to_le16(cm_node->mapped_loc_port); 3228 cpu_to_le16(cm_node->loc_port);
3382 nesqp->nesqp_context->tcpPorts[1] = 3229 nesqp->nesqp_context->tcpPorts[1] =
3383 cpu_to_le16(cm_node->mapped_rem_port); 3230 cpu_to_le16(cm_node->rem_port);
3384 3231
3385 nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr); 3232 nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr);
3386 3233
3387 nesqp->nesqp_context->misc2 |= cpu_to_le32( 3234 nesqp->nesqp_context->misc2 |= cpu_to_le32(
3388 (u32)PCI_FUNC(nesdev->pcidev->devfn) << 3235 (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3406,9 +3253,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3406 memset(&nes_quad, 0, sizeof(nes_quad)); 3253 memset(&nes_quad, 0, sizeof(nes_quad));
3407 nes_quad.DstIpAdrIndex = 3254 nes_quad.DstIpAdrIndex =
3408 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); 3255 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
3409 nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr); 3256 nes_quad.SrcIpadr = htonl(cm_node->rem_addr);
3410 nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port); 3257 nes_quad.TcpPorts[0] = htons(cm_node->rem_port);
3411 nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port); 3258 nes_quad.TcpPorts[1] = htons(cm_node->loc_port);
3412 3259
3413 /* Produce hash key */ 3260 /* Produce hash key */
3414 crc_value = get_crc_value(&nes_quad); 3261 crc_value = get_crc_value(&nes_quad);
@@ -3437,8 +3284,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3437 cm_event.event = IW_CM_EVENT_ESTABLISHED; 3284 cm_event.event = IW_CM_EVENT_ESTABLISHED;
3438 cm_event.status = 0; 3285 cm_event.status = 0;
3439 cm_event.provider_data = (void *)nesqp; 3286 cm_event.provider_data = (void *)nesqp;
3440 cm_event.local_addr = cm_id->local_addr; 3287 cm_event.local_addr = cm_id->m_local_addr;
3441 cm_event.remote_addr = cm_id->remote_addr; 3288 cm_event.remote_addr = cm_id->m_remote_addr;
3442 cm_event.private_data = NULL; 3289 cm_event.private_data = NULL;
3443 cm_event.private_data_len = 0; 3290 cm_event.private_data_len = 0;
3444 cm_event.ird = cm_node->ird_size; 3291 cm_event.ird = cm_node->ird_size;
@@ -3508,11 +3355,8 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3508 struct nes_cm_node *cm_node; 3355 struct nes_cm_node *cm_node;
3509 struct nes_cm_info cm_info; 3356 struct nes_cm_info cm_info;
3510 int apbvt_set = 0; 3357 int apbvt_set = 0;
3511 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; 3358 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3512 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; 3359 struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3513 struct iwpm_dev_data pm_reg_msg;
3514 struct iwpm_sa_data pm_msg;
3515 int iwpm_err = 0;
3516 3360
3517 if (cm_id->remote_addr.ss_family != AF_INET) 3361 if (cm_id->remote_addr.ss_family != AF_INET)
3518 return -ENOSYS; 3362 return -ENOSYS;
@@ -3558,37 +3402,13 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3558 cm_info.cm_id = cm_id; 3402 cm_info.cm_id = cm_id;
3559 cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; 3403 cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
3560 3404
3561 /* No port mapper available, go with the specified peer information */
3562 cm_info.mapped_loc_addr = cm_info.loc_addr;
3563 cm_info.mapped_loc_port = cm_info.loc_port;
3564 cm_info.mapped_rem_addr = cm_info.rem_addr;
3565 cm_info.mapped_rem_port = cm_info.rem_port;
3566
3567 nes_form_reg_msg(nesvnic, &pm_reg_msg);
3568 iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
3569 if (iwpm_err) {
3570 nes_debug(NES_DBG_NLMSG,
3571 "Port Mapper reg pid fail (err = %d).\n", iwpm_err);
3572 }
3573 if (iwpm_valid_pid() && !iwpm_err) {
3574 nes_form_pm_msg(&cm_info, &pm_msg);
3575 iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES);
3576 if (iwpm_err)
3577 nes_debug(NES_DBG_NLMSG,
3578 "Port Mapper query fail (err = %d).\n", iwpm_err);
3579 else
3580 nes_record_pm_msg(&cm_info, &pm_msg);
3581 }
3582
3583 if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) { 3405 if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
3584 nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port, 3406 nes_manage_apbvt(nesvnic, cm_info.loc_port,
3585 PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); 3407 PCI_FUNC(nesdev->pcidev->devfn),
3408 NES_MANAGE_APBVT_ADD);
3586 apbvt_set = 1; 3409 apbvt_set = 1;
3587 } 3410 }
3588 3411
3589 if (nes_create_mapinfo(&cm_info))
3590 return -ENOMEM;
3591
3592 cm_id->add_ref(cm_id); 3412 cm_id->add_ref(cm_id);
3593 3413
3594 /* create a connect CM node connection */ 3414 /* create a connect CM node connection */
@@ -3597,14 +3417,12 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3597 &cm_info); 3417 &cm_info);
3598 if (!cm_node) { 3418 if (!cm_node) {
3599 if (apbvt_set) 3419 if (apbvt_set)
3600 nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port, 3420 nes_manage_apbvt(nesvnic, cm_info.loc_port,
3601 PCI_FUNC(nesdev->pcidev->devfn), 3421 PCI_FUNC(nesdev->pcidev->devfn),
3602 NES_MANAGE_APBVT_DEL); 3422 NES_MANAGE_APBVT_DEL);
3603 3423
3604 nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n", 3424 nes_debug(NES_DBG_NLMSG, "Delete loc_port = %04X\n",
3605 cm_info.mapped_loc_port); 3425 cm_info.loc_port);
3606 nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port,
3607 cm_info.mapped_loc_addr, cm_info.mapped_loc_port);
3608 cm_id->rem_ref(cm_id); 3426 cm_id->rem_ref(cm_id);
3609 return -ENOMEM; 3427 return -ENOMEM;
3610 } 3428 }
@@ -3633,12 +3451,12 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
3633 struct nes_cm_listener *cm_node; 3451 struct nes_cm_listener *cm_node;
3634 struct nes_cm_info cm_info; 3452 struct nes_cm_info cm_info;
3635 int err; 3453 int err;
3636 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; 3454 struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3637 3455
3638 nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n", 3456 nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n",
3639 cm_id, ntohs(laddr->sin_port)); 3457 cm_id, ntohs(laddr->sin_port));
3640 3458
3641 if (cm_id->local_addr.ss_family != AF_INET) 3459 if (cm_id->m_local_addr.ss_family != AF_INET)
3642 return -ENOSYS; 3460 return -ENOSYS;
3643 nesvnic = to_nesvnic(cm_id->device); 3461 nesvnic = to_nesvnic(cm_id->device);
3644 if (!nesvnic) 3462 if (!nesvnic)
@@ -3658,10 +3476,6 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
3658 3476
3659 cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; 3477 cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
3660 3478
3661 /* No port mapper available, go with the specified info */
3662 cm_info.mapped_loc_addr = cm_info.loc_addr;
3663 cm_info.mapped_loc_port = cm_info.loc_port;
3664
3665 cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); 3479 cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
3666 if (!cm_node) { 3480 if (!cm_node) {
3667 printk(KERN_ERR "%s[%u] Error returned from listen API call\n", 3481 printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
@@ -3673,10 +3487,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
3673 cm_node->tos = cm_id->tos; 3487 cm_node->tos = cm_id->tos;
3674 3488
3675 if (!cm_node->reused_node) { 3489 if (!cm_node->reused_node) {
3676 if (nes_create_mapinfo(&cm_info)) 3490 err = nes_manage_apbvt(nesvnic, cm_node->loc_port,
3677 return -ENOMEM;
3678
3679 err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port,
3680 PCI_FUNC(nesvnic->nesdev->pcidev->devfn), 3491 PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
3681 NES_MANAGE_APBVT_ADD); 3492 NES_MANAGE_APBVT_ADD);
3682 if (err) { 3493 if (err) {
@@ -3786,8 +3597,8 @@ static void cm_event_connected(struct nes_cm_event *event)
3786 nesvnic = to_nesvnic(nesqp->ibqp.device); 3597 nesvnic = to_nesvnic(nesqp->ibqp.device);
3787 nesdev = nesvnic->nesdev; 3598 nesdev = nesvnic->nesdev;
3788 nesadapter = nesdev->nesadapter; 3599 nesadapter = nesdev->nesadapter;
3789 laddr = (struct sockaddr_in *)&cm_id->local_addr; 3600 laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3790 raddr = (struct sockaddr_in *)&cm_id->remote_addr; 3601 raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3791 cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr; 3602 cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr;
3792 3603
3793 if (nesqp->destroyed) 3604 if (nesqp->destroyed)
@@ -3802,10 +3613,10 @@ static void cm_event_connected(struct nes_cm_event *event)
3802 3613
3803 /* set the QP tsa context */ 3614 /* set the QP tsa context */
3804 nesqp->nesqp_context->tcpPorts[0] = 3615 nesqp->nesqp_context->tcpPorts[0] =
3805 cpu_to_le16(cm_node->mapped_loc_port); 3616 cpu_to_le16(cm_node->loc_port);
3806 nesqp->nesqp_context->tcpPorts[1] = 3617 nesqp->nesqp_context->tcpPorts[1] =
3807 cpu_to_le16(cm_node->mapped_rem_port); 3618 cpu_to_le16(cm_node->rem_port);
3808 nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr); 3619 nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr);
3809 3620
3810 nesqp->nesqp_context->misc2 |= cpu_to_le32( 3621 nesqp->nesqp_context->misc2 |= cpu_to_le32(
3811 (u32)PCI_FUNC(nesdev->pcidev->devfn) << 3622 (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3835,9 +3646,9 @@ static void cm_event_connected(struct nes_cm_event *event)
3835 3646
3836 nes_quad.DstIpAdrIndex = 3647 nes_quad.DstIpAdrIndex =
3837 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); 3648 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
3838 nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr); 3649 nes_quad.SrcIpadr = htonl(cm_node->rem_addr);
3839 nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port); 3650 nes_quad.TcpPorts[0] = htons(cm_node->rem_port);
3840 nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port); 3651 nes_quad.TcpPorts[1] = htons(cm_node->loc_port);
3841 3652
3842 /* Produce hash key */ 3653 /* Produce hash key */
3843 crc_value = get_crc_value(&nes_quad); 3654 crc_value = get_crc_value(&nes_quad);
@@ -3858,14 +3669,14 @@ static void cm_event_connected(struct nes_cm_event *event)
3858 cm_event.provider_data = cm_id->provider_data; 3669 cm_event.provider_data = cm_id->provider_data;
3859 cm_event_laddr->sin_family = AF_INET; 3670 cm_event_laddr->sin_family = AF_INET;
3860 cm_event_laddr->sin_port = laddr->sin_port; 3671 cm_event_laddr->sin_port = laddr->sin_port;
3861 cm_event.remote_addr = cm_id->remote_addr; 3672 cm_event.remote_addr = cm_id->m_remote_addr;
3862 3673
3863 cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; 3674 cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
3864 cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size; 3675 cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size;
3865 cm_event.ird = cm_node->ird_size; 3676 cm_event.ird = cm_node->ird_size;
3866 cm_event.ord = cm_node->ord_size; 3677 cm_event.ord = cm_node->ord_size;
3867 3678
3868 cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); 3679 cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
3869 ret = cm_id->event_handler(cm_id, &cm_event); 3680 ret = cm_id->event_handler(cm_id, &cm_event);
3870 nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); 3681 nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
3871 3682
@@ -3913,8 +3724,8 @@ static void cm_event_connect_error(struct nes_cm_event *event)
3913 cm_event.event = IW_CM_EVENT_CONNECT_REPLY; 3724 cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
3914 cm_event.status = -ECONNRESET; 3725 cm_event.status = -ECONNRESET;
3915 cm_event.provider_data = cm_id->provider_data; 3726 cm_event.provider_data = cm_id->provider_data;
3916 cm_event.local_addr = cm_id->local_addr; 3727 cm_event.local_addr = cm_id->m_local_addr;
3917 cm_event.remote_addr = cm_id->remote_addr; 3728 cm_event.remote_addr = cm_id->m_remote_addr;
3918 cm_event.private_data = NULL; 3729 cm_event.private_data = NULL;
3919 cm_event.private_data_len = 0; 3730 cm_event.private_data_len = 0;
3920 3731
@@ -3970,8 +3781,8 @@ static void cm_event_reset(struct nes_cm_event *event)
3970 cm_event.event = IW_CM_EVENT_DISCONNECT; 3781 cm_event.event = IW_CM_EVENT_DISCONNECT;
3971 cm_event.status = -ECONNRESET; 3782 cm_event.status = -ECONNRESET;
3972 cm_event.provider_data = cm_id->provider_data; 3783 cm_event.provider_data = cm_id->provider_data;
3973 cm_event.local_addr = cm_id->local_addr; 3784 cm_event.local_addr = cm_id->m_local_addr;
3974 cm_event.remote_addr = cm_id->remote_addr; 3785 cm_event.remote_addr = cm_id->m_remote_addr;
3975 cm_event.private_data = NULL; 3786 cm_event.private_data = NULL;
3976 cm_event.private_data_len = 0; 3787 cm_event.private_data_len = 0;
3977 3788
@@ -3981,8 +3792,8 @@ static void cm_event_reset(struct nes_cm_event *event)
3981 cm_event.event = IW_CM_EVENT_CLOSE; 3792 cm_event.event = IW_CM_EVENT_CLOSE;
3982 cm_event.status = 0; 3793 cm_event.status = 0;
3983 cm_event.provider_data = cm_id->provider_data; 3794 cm_event.provider_data = cm_id->provider_data;
3984 cm_event.local_addr = cm_id->local_addr; 3795 cm_event.local_addr = cm_id->m_local_addr;
3985 cm_event.remote_addr = cm_id->remote_addr; 3796 cm_event.remote_addr = cm_id->m_remote_addr;
3986 cm_event.private_data = NULL; 3797 cm_event.private_data = NULL;
3987 cm_event.private_data_len = 0; 3798 cm_event.private_data_len = 0;
3988 nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node); 3799 nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node);
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 147c2c884227..d827d03e3941 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -293,8 +293,8 @@ struct nes_cm_listener {
293 struct list_head list; 293 struct list_head list;
294 struct nes_cm_core *cm_core; 294 struct nes_cm_core *cm_core;
295 u8 loc_mac[ETH_ALEN]; 295 u8 loc_mac[ETH_ALEN];
296 nes_addr_t loc_addr, mapped_loc_addr; 296 nes_addr_t loc_addr;
297 u16 loc_port, mapped_loc_port; 297 u16 loc_port;
298 struct iw_cm_id *cm_id; 298 struct iw_cm_id *cm_id;
299 enum nes_cm_conn_type conn_type; 299 enum nes_cm_conn_type conn_type;
300 atomic_t ref_count; 300 atomic_t ref_count;
@@ -309,9 +309,7 @@ struct nes_cm_listener {
309/* per connection node and node state information */ 309/* per connection node and node state information */
310struct nes_cm_node { 310struct nes_cm_node {
311 nes_addr_t loc_addr, rem_addr; 311 nes_addr_t loc_addr, rem_addr;
312 nes_addr_t mapped_loc_addr, mapped_rem_addr;
313 u16 loc_port, rem_port; 312 u16 loc_port, rem_port;
314 u16 mapped_loc_port, mapped_rem_port;
315 313
316 u8 loc_mac[ETH_ALEN]; 314 u8 loc_mac[ETH_ALEN];
317 u8 rem_mac[ETH_ALEN]; 315 u8 rem_mac[ETH_ALEN];
@@ -368,11 +366,6 @@ struct nes_cm_info {
368 u16 rem_port; 366 u16 rem_port;
369 nes_addr_t loc_addr; 367 nes_addr_t loc_addr;
370 nes_addr_t rem_addr; 368 nes_addr_t rem_addr;
371 u16 mapped_loc_port;
372 u16 mapped_rem_port;
373 nes_addr_t mapped_loc_addr;
374 nes_addr_t mapped_rem_addr;
375
376 enum nes_cm_conn_type conn_type; 369 enum nes_cm_conn_type conn_type;
377 int backlog; 370 int backlog;
378}; 371};
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 4713dd7ed764..a1c6481d8038 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -35,18 +35,11 @@
35#include <linux/moduleparam.h> 35#include <linux/moduleparam.h>
36#include <linux/netdevice.h> 36#include <linux/netdevice.h>
37#include <linux/etherdevice.h> 37#include <linux/etherdevice.h>
38#include <linux/ip.h>
39#include <linux/tcp.h>
40#include <linux/if_vlan.h> 38#include <linux/if_vlan.h>
41#include <linux/inet_lro.h>
42#include <linux/slab.h> 39#include <linux/slab.h>
43 40
44#include "nes.h" 41#include "nes.h"
45 42
46static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
47module_param(nes_lro_max_aggr, uint, 0444);
48MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation");
49
50static int wide_ppm_offset; 43static int wide_ppm_offset;
51module_param(wide_ppm_offset, int, 0644); 44module_param(wide_ppm_offset, int, 0644);
52MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm"); 45MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm");
@@ -1642,25 +1635,6 @@ static void nes_rq_wqes_timeout(unsigned long parm)
1642} 1635}
1643 1636
1644 1637
1645static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr,
1646 void **tcph, u64 *hdr_flags, void *priv)
1647{
1648 unsigned int ip_len;
1649 struct iphdr *iph;
1650 skb_reset_network_header(skb);
1651 iph = ip_hdr(skb);
1652 if (iph->protocol != IPPROTO_TCP)
1653 return -1;
1654 ip_len = ip_hdrlen(skb);
1655 skb_set_transport_header(skb, ip_len);
1656 *tcph = tcp_hdr(skb);
1657
1658 *hdr_flags = LRO_IPV4 | LRO_TCP;
1659 *iphdr = iph;
1660 return 0;
1661}
1662
1663
1664/** 1638/**
1665 * nes_init_nic_qp 1639 * nes_init_nic_qp
1666 */ 1640 */
@@ -1895,14 +1869,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
1895 return -ENOMEM; 1869 return -ENOMEM;
1896 } 1870 }
1897 1871
1898 nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr;
1899 nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS;
1900 nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc;
1901 nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr;
1902 nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID;
1903 nesvnic->lro_mgr.dev = netdev;
1904 nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY;
1905 nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
1906 return 0; 1872 return 0;
1907} 1873}
1908 1874
@@ -2809,13 +2775,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
2809 u16 pkt_type; 2775 u16 pkt_type;
2810 u16 rqes_processed = 0; 2776 u16 rqes_processed = 0;
2811 u8 sq_cqes = 0; 2777 u8 sq_cqes = 0;
2812 u8 nes_use_lro = 0;
2813 2778
2814 head = cq->cq_head; 2779 head = cq->cq_head;
2815 cq_size = cq->cq_size; 2780 cq_size = cq->cq_size;
2816 cq->cqes_pending = 1; 2781 cq->cqes_pending = 1;
2817 if (nesvnic->netdev->features & NETIF_F_LRO)
2818 nes_use_lro = 1;
2819 do { 2782 do {
2820 if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) & 2783 if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) &
2821 NES_NIC_CQE_VALID) { 2784 NES_NIC_CQE_VALID) {
@@ -2950,10 +2913,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
2950 2913
2951 __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag); 2914 __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag);
2952 } 2915 }
2953 if (nes_use_lro) 2916 napi_gro_receive(&nesvnic->napi, rx_skb);
2954 lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
2955 else
2956 netif_receive_skb(rx_skb);
2957 2917
2958skip_rx_indicate0: 2918skip_rx_indicate0:
2959 ; 2919 ;
@@ -2984,8 +2944,6 @@ skip_rx_indicate0:
2984 2944
2985 } while (1); 2945 } while (1);
2986 2946
2987 if (nes_use_lro)
2988 lro_flush_all(&nesvnic->lro_mgr);
2989 if (sq_cqes) { 2947 if (sq_cqes) {
2990 barrier(); 2948 barrier();
2991 /* restart the queue if it had been stopped */ 2949 /* restart the queue if it had been stopped */
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index c9080208aad2..1b66ef1e9937 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -33,8 +33,6 @@
33#ifndef __NES_HW_H 33#ifndef __NES_HW_H
34#define __NES_HW_H 34#define __NES_HW_H
35 35
36#include <linux/inet_lro.h>
37
38#define NES_PHY_TYPE_CX4 1 36#define NES_PHY_TYPE_CX4 1
39#define NES_PHY_TYPE_1G 2 37#define NES_PHY_TYPE_1G 2
40#define NES_PHY_TYPE_ARGUS 4 38#define NES_PHY_TYPE_ARGUS 4
@@ -1049,8 +1047,6 @@ struct nes_hw_tune_timer {
1049#define NES_TIMER_ENABLE_LIMIT 4 1047#define NES_TIMER_ENABLE_LIMIT 4
1050#define NES_MAX_LINK_INTERRUPTS 128 1048#define NES_MAX_LINK_INTERRUPTS 128
1051#define NES_MAX_LINK_CHECK 200 1049#define NES_MAX_LINK_CHECK 200
1052#define NES_MAX_LRO_DESCRIPTORS 32
1053#define NES_LRO_MAX_AGGR 64
1054 1050
1055struct nes_adapter { 1051struct nes_adapter {
1056 u64 fw_ver; 1052 u64 fw_ver;
@@ -1263,9 +1259,6 @@ struct nes_vnic {
1263 u8 next_qp_nic_index; 1259 u8 next_qp_nic_index;
1264 u8 of_device_registered; 1260 u8 of_device_registered;
1265 u8 rdma_enabled; 1261 u8 rdma_enabled;
1266 u32 lro_max_aggr;
1267 struct net_lro_mgr lro_mgr;
1268 struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS];
1269 struct timer_list event_timer; 1262 struct timer_list event_timer;
1270 enum ib_event_type delayed_event; 1263 enum ib_event_type delayed_event;
1271 enum ib_event_type last_dispatched_event; 1264 enum ib_event_type last_dispatched_event;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 6a0bdfa0ce2e..3ea9e055fdd3 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1085,9 +1085,6 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
1085 "Free 4Kpbls", 1085 "Free 4Kpbls",
1086 "Free 256pbls", 1086 "Free 256pbls",
1087 "Timer Inits", 1087 "Timer Inits",
1088 "LRO aggregated",
1089 "LRO flushed",
1090 "LRO no_desc",
1091 "PAU CreateQPs", 1088 "PAU CreateQPs",
1092 "PAU DestroyQPs", 1089 "PAU DestroyQPs",
1093}; 1090};
@@ -1302,9 +1299,6 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
1302 target_stat_values[++index] = nesadapter->free_4kpbl; 1299 target_stat_values[++index] = nesadapter->free_4kpbl;
1303 target_stat_values[++index] = nesadapter->free_256pbl; 1300 target_stat_values[++index] = nesadapter->free_256pbl;
1304 target_stat_values[++index] = int_mod_timer_init; 1301 target_stat_values[++index] = int_mod_timer_init;
1305 target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated;
1306 target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed;
1307 target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc;
1308 target_stat_values[++index] = atomic_read(&pau_qps_created); 1302 target_stat_values[++index] = atomic_read(&pau_qps_created);
1309 target_stat_values[++index] = atomic_read(&pau_qps_destroyed); 1303 target_stat_values[++index] = atomic_read(&pau_qps_destroyed);
1310} 1304}
@@ -1709,7 +1703,6 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
1709 netdev->hw_features |= NETIF_F_TSO; 1703 netdev->hw_features |= NETIF_F_TSO;
1710 1704
1711 netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX; 1705 netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX;
1712 netdev->hw_features |= NETIF_F_LRO;
1713 1706
1714 nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," 1707 nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"
1715 " nic_index = %d, logical_port = %d, mac_index = %d.\n", 1708 " nic_index = %d, logical_port = %d, mac_index = %d.\n",
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 8c4daf7f22ec..fba69a39a7eb 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -56,7 +56,8 @@ static int nes_dereg_mr(struct ib_mr *ib_mr);
56/** 56/**
57 * nes_alloc_mw 57 * nes_alloc_mw
58 */ 58 */
59static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type) 59static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
60 struct ib_udata *udata)
60{ 61{
61 struct nes_pd *nespd = to_nespd(ibpd); 62 struct nes_pd *nespd = to_nespd(ibpd);
62 struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); 63 struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
@@ -3768,6 +3769,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
3768 nesibdev->ibdev.iwcm->create_listen = nes_create_listen; 3769 nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
3769 nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; 3770 nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
3770 nesibdev->ibdev.get_port_immutable = nes_port_immutable; 3771 nesibdev->ibdev.get_port_immutable = nes_port_immutable;
3772 memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name,
3773 sizeof(nesibdev->ibdev.iwcm->ifname));
3771 3774
3772 return nesibdev; 3775 return nesibdev;
3773} 3776}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 12503f15fbd6..45bdfa0e3b2b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -114,6 +114,7 @@ struct ocrdma_dev_attr {
114 u8 local_ca_ack_delay; 114 u8 local_ca_ack_delay;
115 u8 ird; 115 u8 ird;
116 u8 num_ird_pages; 116 u8 num_ird_pages;
117 u8 udp_encap;
117}; 118};
118 119
119struct ocrdma_dma_mem { 120struct ocrdma_dma_mem {
@@ -356,6 +357,7 @@ struct ocrdma_ah {
356 struct ocrdma_av *av; 357 struct ocrdma_av *av;
357 u16 sgid_index; 358 u16 sgid_index;
358 u32 id; 359 u32 id;
360 u8 hdr_type;
359}; 361};
360 362
361struct ocrdma_qp_hwq_info { 363struct ocrdma_qp_hwq_info {
@@ -598,4 +600,10 @@ static inline u8 ocrdma_get_ae_link_state(u32 ae_state)
598 return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT); 600 return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT);
599} 601}
600 602
603static inline bool ocrdma_is_udp_encap_supported(struct ocrdma_dev *dev)
604{
605 return (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV4) ||
606 (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV6);
607}
608
601#endif 609#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 3790771f2baa..797362a297b2 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -55,18 +55,46 @@
55 55
56#define OCRDMA_VID_PCP_SHIFT 0xD 56#define OCRDMA_VID_PCP_SHIFT 0xD
57 57
58static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
59{
60 switch (hdr_type) {
61 case OCRDMA_L3_TYPE_IB_GRH:
62 return (u16)0x8915;
63 case OCRDMA_L3_TYPE_IPV4:
64 return (u16)0x0800;
65 case OCRDMA_L3_TYPE_IPV6:
66 return (u16)0x86dd;
67 default:
68 pr_err("ocrdma%d: Invalid network header\n", devid);
69 return 0;
70 }
71}
72
58static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, 73static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
59 struct ib_ah_attr *attr, union ib_gid *sgid, 74 struct ib_ah_attr *attr, union ib_gid *sgid,
60 int pdid, bool *isvlan, u16 vlan_tag) 75 int pdid, bool *isvlan, u16 vlan_tag)
61{ 76{
62 int status = 0; 77 int status;
63 struct ocrdma_eth_vlan eth; 78 struct ocrdma_eth_vlan eth;
64 struct ocrdma_grh grh; 79 struct ocrdma_grh grh;
65 int eth_sz; 80 int eth_sz;
81 u16 proto_num = 0;
82 u8 nxthdr = 0x11;
83 struct iphdr ipv4;
84 union {
85 struct sockaddr _sockaddr;
86 struct sockaddr_in _sockaddr_in;
87 struct sockaddr_in6 _sockaddr_in6;
88 } sgid_addr, dgid_addr;
66 89
67 memset(&eth, 0, sizeof(eth)); 90 memset(&eth, 0, sizeof(eth));
68 memset(&grh, 0, sizeof(grh)); 91 memset(&grh, 0, sizeof(grh));
69 92
93 /* Protocol Number */
94 proto_num = ocrdma_hdr_type_to_proto_num(dev->id, ah->hdr_type);
95 if (!proto_num)
96 return -EINVAL;
97 nxthdr = (proto_num == 0x8915) ? 0x1b : 0x11;
70 /* VLAN */ 98 /* VLAN */
71 if (!vlan_tag || (vlan_tag > 0xFFF)) 99 if (!vlan_tag || (vlan_tag > 0xFFF))
72 vlan_tag = dev->pvid; 100 vlan_tag = dev->pvid;
@@ -78,13 +106,13 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
78 dev->id); 106 dev->id);
79 } 107 }
80 eth.eth_type = cpu_to_be16(0x8100); 108 eth.eth_type = cpu_to_be16(0x8100);
81 eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); 109 eth.roce_eth_type = cpu_to_be16(proto_num);
82 vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT; 110 vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT;
83 eth.vlan_tag = cpu_to_be16(vlan_tag); 111 eth.vlan_tag = cpu_to_be16(vlan_tag);
84 eth_sz = sizeof(struct ocrdma_eth_vlan); 112 eth_sz = sizeof(struct ocrdma_eth_vlan);
85 *isvlan = true; 113 *isvlan = true;
86 } else { 114 } else {
87 eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); 115 eth.eth_type = cpu_to_be16(proto_num);
88 eth_sz = sizeof(struct ocrdma_eth_basic); 116 eth_sz = sizeof(struct ocrdma_eth_basic);
89 } 117 }
90 /* MAC */ 118 /* MAC */
@@ -93,18 +121,33 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
93 if (status) 121 if (status)
94 return status; 122 return status;
95 ah->sgid_index = attr->grh.sgid_index; 123 ah->sgid_index = attr->grh.sgid_index;
96 memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
97 memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw));
98
99 grh.tclass_flow = cpu_to_be32((6 << 28) |
100 (attr->grh.traffic_class << 24) |
101 attr->grh.flow_label);
102 /* 0x1b is next header value in GRH */
103 grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
104 (0x1b << 8) | attr->grh.hop_limit);
105 /* Eth HDR */ 124 /* Eth HDR */
106 memcpy(&ah->av->eth_hdr, &eth, eth_sz); 125 memcpy(&ah->av->eth_hdr, &eth, eth_sz);
107 memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); 126 if (ah->hdr_type == RDMA_NETWORK_IPV4) {
127 *((__be16 *)&ipv4) = htons((4 << 12) | (5 << 8) |
128 attr->grh.traffic_class);
129 ipv4.id = cpu_to_be16(pdid);
130 ipv4.frag_off = htons(IP_DF);
131 ipv4.tot_len = htons(0);
132 ipv4.ttl = attr->grh.hop_limit;
133 ipv4.protocol = nxthdr;
134 rdma_gid2ip(&sgid_addr._sockaddr, sgid);
135 ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr;
136 rdma_gid2ip(&dgid_addr._sockaddr, &attr->grh.dgid);
137 ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr;
138 memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr));
139 } else {
140 memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
141 grh.tclass_flow = cpu_to_be32((6 << 28) |
142 (attr->grh.traffic_class << 24) |
143 attr->grh.flow_label);
144 memcpy(&grh.dgid[0], attr->grh.dgid.raw,
145 sizeof(attr->grh.dgid.raw));
146 grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
147 (nxthdr << 8) |
148 attr->grh.hop_limit);
149 memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
150 }
108 if (*isvlan) 151 if (*isvlan)
109 ah->av->valid |= OCRDMA_AV_VLAN_VALID; 152 ah->av->valid |= OCRDMA_AV_VLAN_VALID;
110 ah->av->valid = cpu_to_le32(ah->av->valid); 153 ah->av->valid = cpu_to_le32(ah->av->valid);
@@ -128,6 +171,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
128 171
129 if (atomic_cmpxchg(&dev->update_sl, 1, 0)) 172 if (atomic_cmpxchg(&dev->update_sl, 1, 0))
130 ocrdma_init_service_level(dev); 173 ocrdma_init_service_level(dev);
174
131 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 175 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
132 if (!ah) 176 if (!ah)
133 return ERR_PTR(-ENOMEM); 177 return ERR_PTR(-ENOMEM);
@@ -148,6 +192,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
148 vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); 192 vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
149 dev_put(sgid_attr.ndev); 193 dev_put(sgid_attr.ndev);
150 } 194 }
195 /* Get network header type for this GID */
196 ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
151 197
152 if ((pd->uctx) && 198 if ((pd->uctx) &&
153 (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && 199 (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
@@ -172,6 +218,11 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
172 ahid_addr = pd->uctx->ah_tbl.va + attr->dlid; 218 ahid_addr = pd->uctx->ah_tbl.va + attr->dlid;
173 *ahid_addr = 0; 219 *ahid_addr = 0;
174 *ahid_addr |= ah->id & OCRDMA_AH_ID_MASK; 220 *ahid_addr |= ah->id & OCRDMA_AH_ID_MASK;
221 if (ocrdma_is_udp_encap_supported(dev)) {
222 *ahid_addr |= ((u32)ah->hdr_type &
223 OCRDMA_AH_L3_TYPE_MASK) <<
224 OCRDMA_AH_L3_TYPE_SHIFT;
225 }
175 if (isvlan) 226 if (isvlan)
176 *ahid_addr |= (OCRDMA_AH_VLAN_VALID_MASK << 227 *ahid_addr |= (OCRDMA_AH_VLAN_VALID_MASK <<
177 OCRDMA_AH_VLAN_VALID_SHIFT); 228 OCRDMA_AH_VLAN_VALID_SHIFT);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 04a30ae67473..3856dd4c7e3d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -46,9 +46,10 @@
46enum { 46enum {
47 OCRDMA_AH_ID_MASK = 0x3FF, 47 OCRDMA_AH_ID_MASK = 0x3FF,
48 OCRDMA_AH_VLAN_VALID_MASK = 0x01, 48 OCRDMA_AH_VLAN_VALID_MASK = 0x01,
49 OCRDMA_AH_VLAN_VALID_SHIFT = 0x1F 49 OCRDMA_AH_VLAN_VALID_SHIFT = 0x1F,
50 OCRDMA_AH_L3_TYPE_MASK = 0x03,
51 OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */
50}; 52};
51
52struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *); 53struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *);
53int ocrdma_destroy_ah(struct ib_ah *); 54int ocrdma_destroy_ah(struct ib_ah *);
54int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *); 55int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 283ca842ff74..16740dcb876b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1113,7 +1113,7 @@ mbx_err:
1113static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe, 1113static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe,
1114 void *payload_va) 1114 void *payload_va)
1115{ 1115{
1116 int status = 0; 1116 int status;
1117 struct ocrdma_mbx_rsp *rsp = payload_va; 1117 struct ocrdma_mbx_rsp *rsp = payload_va;
1118 1118
1119 if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> 1119 if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
@@ -1144,6 +1144,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
1144 attr->max_pd = 1144 attr->max_pd =
1145 (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >> 1145 (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >>
1146 OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT; 1146 OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT;
1147 attr->udp_encap = (rsp->max_pd_ca_ack_delay &
1148 OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK) >>
1149 OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT;
1147 attr->max_dpp_pds = 1150 attr->max_dpp_pds =
1148 (rsp->max_dpp_pds_credits & OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK) >> 1151 (rsp->max_dpp_pds_credits & OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK) >>
1149 OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET; 1152 OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET;
@@ -2138,7 +2141,6 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
2138 enum ib_qp_state *old_ib_state) 2141 enum ib_qp_state *old_ib_state)
2139{ 2142{
2140 unsigned long flags; 2143 unsigned long flags;
2141 int status = 0;
2142 enum ocrdma_qp_state new_state; 2144 enum ocrdma_qp_state new_state;
2143 new_state = get_ocrdma_qp_state(new_ib_state); 2145 new_state = get_ocrdma_qp_state(new_ib_state);
2144 2146
@@ -2163,7 +2165,7 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
2163 qp->state = new_state; 2165 qp->state = new_state;
2164 2166
2165 spin_unlock_irqrestore(&qp->q_lock, flags); 2167 spin_unlock_irqrestore(&qp->q_lock, flags);
2166 return status; 2168 return 0;
2167} 2169}
2168 2170
2169static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp) 2171static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp)
@@ -2501,7 +2503,12 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2501 union ib_gid sgid, zgid; 2503 union ib_gid sgid, zgid;
2502 struct ib_gid_attr sgid_attr; 2504 struct ib_gid_attr sgid_attr;
2503 u32 vlan_id = 0xFFFF; 2505 u32 vlan_id = 0xFFFF;
2504 u8 mac_addr[6]; 2506 u8 mac_addr[6], hdr_type;
2507 union {
2508 struct sockaddr _sockaddr;
2509 struct sockaddr_in _sockaddr_in;
2510 struct sockaddr_in6 _sockaddr_in6;
2511 } sgid_addr, dgid_addr;
2505 struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); 2512 struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2506 2513
2507 if ((ah_attr->ah_flags & IB_AH_GRH) == 0) 2514 if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
@@ -2516,6 +2523,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2516 cmd->params.hop_lmt_rq_psn |= 2523 cmd->params.hop_lmt_rq_psn |=
2517 (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT); 2524 (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
2518 cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; 2525 cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
2526
2527 /* GIDs */
2519 memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], 2528 memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
2520 sizeof(cmd->params.dgid)); 2529 sizeof(cmd->params.dgid));
2521 2530
@@ -2538,6 +2547,16 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2538 return status; 2547 return status;
2539 cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | 2548 cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
2540 (mac_addr[2] << 16) | (mac_addr[3] << 24); 2549 (mac_addr[2] << 16) | (mac_addr[3] << 24);
2550
2551 hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
2552 if (hdr_type == RDMA_NETWORK_IPV4) {
2553 rdma_gid2ip(&sgid_addr._sockaddr, &sgid);
2554 rdma_gid2ip(&dgid_addr._sockaddr, &ah_attr->grh.dgid);
2555 memcpy(&cmd->params.dgid[0],
2556 &dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
2557 memcpy(&cmd->params.sgid[0],
2558 &sgid_addr._sockaddr_in.sin_addr.s_addr, 4);
2559 }
2541 /* convert them to LE format. */ 2560 /* convert them to LE format. */
2542 ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); 2561 ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
2543 ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); 2562 ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
@@ -2558,7 +2577,9 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2558 cmd->params.rnt_rc_sl_fl |= 2577 cmd->params.rnt_rc_sl_fl |=
2559 (dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT; 2578 (dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT;
2560 } 2579 }
2561 2580 cmd->params.max_sge_recv_flags |= ((hdr_type <<
2581 OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT) &
2582 OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK);
2562 return 0; 2583 return 0;
2563} 2584}
2564 2585
@@ -2871,7 +2892,7 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
2871static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype, 2892static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
2872 struct ocrdma_dcbx_cfg *dcbxcfg) 2893 struct ocrdma_dcbx_cfg *dcbxcfg)
2873{ 2894{
2874 int status = 0; 2895 int status;
2875 dma_addr_t pa; 2896 dma_addr_t pa;
2876 struct ocrdma_mqe cmd; 2897 struct ocrdma_mqe cmd;
2877 2898
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index f38743018cb4..3d75f65ce87e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -89,8 +89,10 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
89 struct ib_port_immutable *immutable) 89 struct ib_port_immutable *immutable)
90{ 90{
91 struct ib_port_attr attr; 91 struct ib_port_attr attr;
92 struct ocrdma_dev *dev;
92 int err; 93 int err;
93 94
95 dev = get_ocrdma_dev(ibdev);
94 err = ocrdma_query_port(ibdev, port_num, &attr); 96 err = ocrdma_query_port(ibdev, port_num, &attr);
95 if (err) 97 if (err)
96 return err; 98 return err;
@@ -98,6 +100,8 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
98 immutable->pkey_tbl_len = attr.pkey_tbl_len; 100 immutable->pkey_tbl_len = attr.pkey_tbl_len;
99 immutable->gid_tbl_len = attr.gid_tbl_len; 101 immutable->gid_tbl_len = attr.gid_tbl_len;
100 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; 102 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
103 if (ocrdma_is_udp_encap_supported(dev))
104 immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
101 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 105 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
102 106
103 return 0; 107 return 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 99dd6fdf06d7..0efc9662c6d8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -140,7 +140,11 @@ enum {
140 OCRDMA_DB_RQ_SHIFT = 24 140 OCRDMA_DB_RQ_SHIFT = 24
141}; 141};
142 142
143#define OCRDMA_ROUDP_FLAGS_SHIFT 0x03 143enum {
144 OCRDMA_L3_TYPE_IB_GRH = 0x00,
145 OCRDMA_L3_TYPE_IPV4 = 0x01,
146 OCRDMA_L3_TYPE_IPV6 = 0x02
147};
144 148
145#define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */ 149#define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */
146#define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */ 150#define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */
@@ -546,7 +550,8 @@ enum {
546 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT = 8, 550 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT = 8,
547 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK = 0xFF << 551 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK = 0xFF <<
548 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT, 552 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT,
549 553 OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT = 3,
554 OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18,
550 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0, 555 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0,
551 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF, 556 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF,
552 OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16, 557 OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16,
@@ -1107,6 +1112,8 @@ enum {
1107 OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7), 1112 OCRDMA_QP_PARAMS_STATE_MASK = BIT(5) | BIT(6) | BIT(7),
1108 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8), 1113 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = BIT(8),
1109 OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9), 1114 OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = BIT(9),
1115 OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT = 11,
1116 OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK = BIT(11) | BIT(12) | BIT(13),
1110 OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16, 1117 OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16,
1111 OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF << 1118 OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF <<
1112 OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT, 1119 OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT,
@@ -1735,8 +1742,11 @@ enum {
1735 1742
1736 /* w1 */ 1743 /* w1 */
1737 OCRDMA_CQE_UD_XFER_LEN_SHIFT = 16, 1744 OCRDMA_CQE_UD_XFER_LEN_SHIFT = 16,
1745 OCRDMA_CQE_UD_XFER_LEN_MASK = 0x1FFF,
1738 OCRDMA_CQE_PKEY_SHIFT = 0, 1746 OCRDMA_CQE_PKEY_SHIFT = 0,
1739 OCRDMA_CQE_PKEY_MASK = 0xFFFF, 1747 OCRDMA_CQE_PKEY_MASK = 0xFFFF,
1748 OCRDMA_CQE_UD_L3TYPE_SHIFT = 29,
1749 OCRDMA_CQE_UD_L3TYPE_MASK = 0x07,
1740 1750
1741 /* w2 */ 1751 /* w2 */
1742 OCRDMA_CQE_QPN_SHIFT = 0, 1752 OCRDMA_CQE_QPN_SHIFT = 0,
@@ -1861,7 +1871,7 @@ struct ocrdma_ewqe_ud_hdr {
1861 u32 rsvd_dest_qpn; 1871 u32 rsvd_dest_qpn;
1862 u32 qkey; 1872 u32 qkey;
1863 u32 rsvd_ahid; 1873 u32 rsvd_ahid;
1864 u32 rsvd; 1874 u32 hdr_type;
1865}; 1875};
1866 1876
1867/* extended wqe followed by hdr_wqe for Fast Memory register */ 1877/* extended wqe followed by hdr_wqe for Fast Memory register */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 255f774080a4..8bef09a8c49f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -610,7 +610,7 @@ static char *ocrdma_driver_dbg_stats(struct ocrdma_dev *dev)
610static void ocrdma_update_stats(struct ocrdma_dev *dev) 610static void ocrdma_update_stats(struct ocrdma_dev *dev)
611{ 611{
612 ulong now = jiffies, secs; 612 ulong now = jiffies, secs;
613 int status = 0; 613 int status;
614 struct ocrdma_rdma_stats_resp *rdma_stats = 614 struct ocrdma_rdma_stats_resp *rdma_stats =
615 (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; 615 (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
616 struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats; 616 struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats;
@@ -641,7 +641,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp,
641{ 641{
642 char tmp_str[32]; 642 char tmp_str[32];
643 long reset; 643 long reset;
644 int status = 0; 644 int status;
645 struct ocrdma_stats *pstats = filp->private_data; 645 struct ocrdma_stats *pstats = filp->private_data;
646 struct ocrdma_dev *dev = pstats->dev; 646 struct ocrdma_dev *dev = pstats->dev;
647 647
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 12420e4ecf3d..a8496a18e20d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -419,7 +419,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
419 struct ib_udata *udata) 419 struct ib_udata *udata)
420{ 420{
421 struct ocrdma_pd *pd = NULL; 421 struct ocrdma_pd *pd = NULL;
422 int status = 0; 422 int status;
423 423
424 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 424 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
425 if (!pd) 425 if (!pd)
@@ -468,7 +468,7 @@ static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
468static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, 468static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
469 struct ocrdma_pd *pd) 469 struct ocrdma_pd *pd)
470{ 470{
471 int status = 0; 471 int status;
472 472
473 if (dev->pd_mgr->pd_prealloc_valid) 473 if (dev->pd_mgr->pd_prealloc_valid)
474 status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled); 474 status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
@@ -596,7 +596,7 @@ map_err:
596 596
597int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) 597int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
598{ 598{
599 int status = 0; 599 int status;
600 struct ocrdma_mm *mm, *tmp; 600 struct ocrdma_mm *mm, *tmp;
601 struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); 601 struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
602 struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); 602 struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
@@ -623,7 +623,7 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
623 unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; 623 unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
624 u64 unmapped_db = (u64) dev->nic_info.unmapped_db; 624 u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
625 unsigned long len = (vma->vm_end - vma->vm_start); 625 unsigned long len = (vma->vm_end - vma->vm_start);
626 int status = 0; 626 int status;
627 bool found; 627 bool found;
628 628
629 if (vma->vm_start & (PAGE_SIZE - 1)) 629 if (vma->vm_start & (PAGE_SIZE - 1))
@@ -1285,7 +1285,7 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
1285 struct ib_udata *udata, int dpp_offset, 1285 struct ib_udata *udata, int dpp_offset,
1286 int dpp_credit_lmt, int srq) 1286 int dpp_credit_lmt, int srq)
1287{ 1287{
1288 int status = 0; 1288 int status;
1289 u64 usr_db; 1289 u64 usr_db;
1290 struct ocrdma_create_qp_uresp uresp; 1290 struct ocrdma_create_qp_uresp uresp;
1291 struct ocrdma_pd *pd = qp->pd; 1291 struct ocrdma_pd *pd = qp->pd;
@@ -1494,9 +1494,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1494 */ 1494 */
1495 if (status < 0) 1495 if (status < 0)
1496 return status; 1496 return status;
1497 status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); 1497 return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
1498
1499 return status;
1500} 1498}
1501 1499
1502int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1500int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -1949,7 +1947,7 @@ int ocrdma_modify_srq(struct ib_srq *ibsrq,
1949 enum ib_srq_attr_mask srq_attr_mask, 1947 enum ib_srq_attr_mask srq_attr_mask,
1950 struct ib_udata *udata) 1948 struct ib_udata *udata)
1951{ 1949{
1952 int status = 0; 1950 int status;
1953 struct ocrdma_srq *srq; 1951 struct ocrdma_srq *srq;
1954 1952
1955 srq = get_ocrdma_srq(ibsrq); 1953 srq = get_ocrdma_srq(ibsrq);
@@ -2005,6 +2003,7 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
2005 else 2003 else
2006 ud_hdr->qkey = ud_wr(wr)->remote_qkey; 2004 ud_hdr->qkey = ud_wr(wr)->remote_qkey;
2007 ud_hdr->rsvd_ahid = ah->id; 2005 ud_hdr->rsvd_ahid = ah->id;
2006 ud_hdr->hdr_type = ah->hdr_type;
2008 if (ah->av->valid & OCRDMA_AV_VLAN_VALID) 2007 if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
2009 hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT); 2008 hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
2010} 2009}
@@ -2717,9 +2716,11 @@ static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2717 return expand; 2716 return expand;
2718} 2717}
2719 2718
2720static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe) 2719static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc,
2720 struct ocrdma_cqe *cqe)
2721{ 2721{
2722 int status; 2722 int status;
2723 u16 hdr_type = 0;
2723 2724
2724 status = (le32_to_cpu(cqe->flags_status_srcqpn) & 2725 status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2725 OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT; 2726 OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
@@ -2728,7 +2729,17 @@ static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
2728 ibwc->pkey_index = 0; 2729 ibwc->pkey_index = 0;
2729 ibwc->wc_flags = IB_WC_GRH; 2730 ibwc->wc_flags = IB_WC_GRH;
2730 ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >> 2731 ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2731 OCRDMA_CQE_UD_XFER_LEN_SHIFT); 2732 OCRDMA_CQE_UD_XFER_LEN_SHIFT) &
2733 OCRDMA_CQE_UD_XFER_LEN_MASK;
2734
2735 if (ocrdma_is_udp_encap_supported(dev)) {
2736 hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2737 OCRDMA_CQE_UD_L3TYPE_SHIFT) &
2738 OCRDMA_CQE_UD_L3TYPE_MASK;
2739 ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
2740 ibwc->network_hdr_type = hdr_type;
2741 }
2742
2732 return status; 2743 return status;
2733} 2744}
2734 2745
@@ -2791,12 +2802,15 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2791static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp, 2802static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
2792 struct ocrdma_cqe *cqe, struct ib_wc *ibwc) 2803 struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
2793{ 2804{
2805 struct ocrdma_dev *dev;
2806
2807 dev = get_ocrdma_dev(qp->ibqp.device);
2794 ibwc->opcode = IB_WC_RECV; 2808 ibwc->opcode = IB_WC_RECV;
2795 ibwc->qp = &qp->ibqp; 2809 ibwc->qp = &qp->ibqp;
2796 ibwc->status = IB_WC_SUCCESS; 2810 ibwc->status = IB_WC_SUCCESS;
2797 2811
2798 if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) 2812 if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2799 ocrdma_update_ud_rcqe(ibwc, cqe); 2813 ocrdma_update_ud_rcqe(dev, ibwc, cqe);
2800 else 2814 else
2801 ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen); 2815 ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
2802 2816
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index a6f3eab0f350..85be0de3ab26 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -244,6 +244,7 @@ struct ipoib_cm_tx {
244 unsigned tx_tail; 244 unsigned tx_tail;
245 unsigned long flags; 245 unsigned long flags;
246 u32 mtu; 246 u32 mtu;
247 unsigned max_send_sge;
247}; 248};
248 249
249struct ipoib_cm_rx_buf { 250struct ipoib_cm_rx_buf {
@@ -390,6 +391,7 @@ struct ipoib_dev_priv {
390 int hca_caps; 391 int hca_caps;
391 struct ipoib_ethtool_st ethtool; 392 struct ipoib_ethtool_st ethtool;
392 struct timer_list poll_timer; 393 struct timer_list poll_timer;
394 unsigned max_send_sge;
393}; 395};
394 396
395struct ipoib_ah { 397struct ipoib_ah {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 917e46ea3bf6..c8ed53562c9b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -710,6 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
710 struct ipoib_dev_priv *priv = netdev_priv(dev); 710 struct ipoib_dev_priv *priv = netdev_priv(dev);
711 struct ipoib_tx_buf *tx_req; 711 struct ipoib_tx_buf *tx_req;
712 int rc; 712 int rc;
713 unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
713 714
714 if (unlikely(skb->len > tx->mtu)) { 715 if (unlikely(skb->len > tx->mtu)) {
715 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", 716 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -719,7 +720,23 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
719 ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN); 720 ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
720 return; 721 return;
721 } 722 }
722 723 if (skb_shinfo(skb)->nr_frags > usable_sge) {
724 if (skb_linearize(skb) < 0) {
725 ipoib_warn(priv, "skb could not be linearized\n");
726 ++dev->stats.tx_dropped;
727 ++dev->stats.tx_errors;
728 dev_kfree_skb_any(skb);
729 return;
730 }
731 /* Does skb_linearize return ok without reducing nr_frags? */
732 if (skb_shinfo(skb)->nr_frags > usable_sge) {
733 ipoib_warn(priv, "too many frags after skb linearize\n");
734 ++dev->stats.tx_dropped;
735 ++dev->stats.tx_errors;
736 dev_kfree_skb_any(skb);
737 return;
738 }
739 }
723 ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", 740 ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
724 tx->tx_head, skb->len, tx->qp->qp_num); 741 tx->tx_head, skb->len, tx->qp->qp_num);
725 742
@@ -1031,7 +1048,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
1031 struct ib_qp *tx_qp; 1048 struct ib_qp *tx_qp;
1032 1049
1033 if (dev->features & NETIF_F_SG) 1050 if (dev->features & NETIF_F_SG)
1034 attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; 1051 attr.cap.max_send_sge =
1052 min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
1035 1053
1036 tx_qp = ib_create_qp(priv->pd, &attr); 1054 tx_qp = ib_create_qp(priv->pd, &attr);
1037 if (PTR_ERR(tx_qp) == -EINVAL) { 1055 if (PTR_ERR(tx_qp) == -EINVAL) {
@@ -1040,6 +1058,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
1040 attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; 1058 attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
1041 tx_qp = ib_create_qp(priv->pd, &attr); 1059 tx_qp = ib_create_qp(priv->pd, &attr);
1042 } 1060 }
1061 tx->max_send_sge = attr.cap.max_send_sge;
1043 return tx_qp; 1062 return tx_qp;
1044} 1063}
1045 1064
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index fa9c42ff1fb0..899e6b7fb8a5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -538,6 +538,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
538 struct ipoib_tx_buf *tx_req; 538 struct ipoib_tx_buf *tx_req;
539 int hlen, rc; 539 int hlen, rc;
540 void *phead; 540 void *phead;
541 unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb);
541 542
542 if (skb_is_gso(skb)) { 543 if (skb_is_gso(skb)) {
543 hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); 544 hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -561,6 +562,23 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
561 phead = NULL; 562 phead = NULL;
562 hlen = 0; 563 hlen = 0;
563 } 564 }
565 if (skb_shinfo(skb)->nr_frags > usable_sge) {
566 if (skb_linearize(skb) < 0) {
567 ipoib_warn(priv, "skb could not be linearized\n");
568 ++dev->stats.tx_dropped;
569 ++dev->stats.tx_errors;
570 dev_kfree_skb_any(skb);
571 return;
572 }
573 /* Does skb_linearize return ok without reducing nr_frags? */
574 if (skb_shinfo(skb)->nr_frags > usable_sge) {
575 ipoib_warn(priv, "too many frags after skb linearize\n");
576 ++dev->stats.tx_dropped;
577 ++dev->stats.tx_errors;
578 dev_kfree_skb_any(skb);
579 return;
580 }
581 }
564 582
565 ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n", 583 ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
566 skb->len, address, qpn); 584 skb->len, address, qpn);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index d48c5bae7877..b809c373e40e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -206,7 +206,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
206 init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; 206 init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
207 207
208 if (dev->features & NETIF_F_SG) 208 if (dev->features & NETIF_F_SG)
209 init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; 209 init_attr.cap.max_send_sge =
210 min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
210 211
211 priv->qp = ib_create_qp(priv->pd, &init_attr); 212 priv->qp = ib_create_qp(priv->pd, &init_attr);
212 if (IS_ERR(priv->qp)) { 213 if (IS_ERR(priv->qp)) {
@@ -233,6 +234,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
233 priv->rx_wr.next = NULL; 234 priv->rx_wr.next = NULL;
234 priv->rx_wr.sg_list = priv->rx_sge; 235 priv->rx_wr.sg_list = priv->rx_sge;
235 236
237 priv->max_send_sge = init_attr.cap.max_send_sge;
238
236 return 0; 239 return 0;
237 240
238out_free_send_cq: 241out_free_send_cq:
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index c827c93f46c5..80b6bedc172f 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -969,7 +969,16 @@ static umode_t iser_attr_is_visible(int param_type, int param)
969 969
970static int iscsi_iser_slave_alloc(struct scsi_device *sdev) 970static int iscsi_iser_slave_alloc(struct scsi_device *sdev)
971{ 971{
972 blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); 972 struct iscsi_session *session;
973 struct iser_conn *iser_conn;
974 struct ib_device *ib_dev;
975
976 session = starget_to_session(scsi_target(sdev))->dd_data;
977 iser_conn = session->leadconn->dd_data;
978 ib_dev = iser_conn->ib_conn.device->ib_device;
979
980 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
981 blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K);
973 982
974 return 0; 983 return 0;
975} 984}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 95f0a64e076b..0351059783b1 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -458,9 +458,6 @@ struct iser_fr_pool {
458 * @comp: iser completion context 458 * @comp: iser completion context
459 * @fr_pool: connection fast registration poool 459 * @fr_pool: connection fast registration poool
460 * @pi_support: Indicate device T10-PI support 460 * @pi_support: Indicate device T10-PI support
461 * @last: last send wr to signal all flush errors were drained
462 * @last_cqe: cqe handler for last wr
463 * @last_comp: completes when all connection completions consumed
464 */ 461 */
465struct ib_conn { 462struct ib_conn {
466 struct rdma_cm_id *cma_id; 463 struct rdma_cm_id *cma_id;
@@ -472,10 +469,7 @@ struct ib_conn {
472 struct iser_comp *comp; 469 struct iser_comp *comp;
473 struct iser_fr_pool fr_pool; 470 struct iser_fr_pool fr_pool;
474 bool pi_support; 471 bool pi_support;
475 struct ib_send_wr last;
476 struct ib_cqe last_cqe;
477 struct ib_cqe reg_cqe; 472 struct ib_cqe reg_cqe;
478 struct completion last_comp;
479}; 473};
480 474
481/** 475/**
@@ -617,7 +611,6 @@ void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
617void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc); 611void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
618void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc); 612void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
619void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc); 613void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
620void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
621 614
622void iser_task_rdma_init(struct iscsi_iser_task *task); 615void iser_task_rdma_init(struct iscsi_iser_task *task);
623 616
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index ed54b388e7ad..81ae2e30dd12 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -729,13 +729,6 @@ void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
729 kmem_cache_free(ig.desc_cache, desc); 729 kmem_cache_free(ig.desc_cache, desc);
730} 730}
731 731
732void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
733{
734 struct ib_conn *ib_conn = wc->qp->qp_context;
735
736 complete(&ib_conn->last_comp);
737}
738
739void iser_task_rdma_init(struct iscsi_iser_task *iser_task) 732void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
740 733
741{ 734{
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 40c0f4978e2f..1b4945367e4f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -252,14 +252,21 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
252} 252}
253 253
254static int 254static int
255iser_alloc_reg_res(struct ib_device *ib_device, 255iser_alloc_reg_res(struct iser_device *device,
256 struct ib_pd *pd, 256 struct ib_pd *pd,
257 struct iser_reg_resources *res, 257 struct iser_reg_resources *res,
258 unsigned int size) 258 unsigned int size)
259{ 259{
260 struct ib_device *ib_dev = device->ib_device;
261 enum ib_mr_type mr_type;
260 int ret; 262 int ret;
261 263
262 res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size); 264 if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
265 mr_type = IB_MR_TYPE_SG_GAPS;
266 else
267 mr_type = IB_MR_TYPE_MEM_REG;
268
269 res->mr = ib_alloc_mr(pd, mr_type, size);
263 if (IS_ERR(res->mr)) { 270 if (IS_ERR(res->mr)) {
264 ret = PTR_ERR(res->mr); 271 ret = PTR_ERR(res->mr);
265 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); 272 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
@@ -277,7 +284,7 @@ iser_free_reg_res(struct iser_reg_resources *rsc)
277} 284}
278 285
279static int 286static int
280iser_alloc_pi_ctx(struct ib_device *ib_device, 287iser_alloc_pi_ctx(struct iser_device *device,
281 struct ib_pd *pd, 288 struct ib_pd *pd,
282 struct iser_fr_desc *desc, 289 struct iser_fr_desc *desc,
283 unsigned int size) 290 unsigned int size)
@@ -291,7 +298,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device,
291 298
292 pi_ctx = desc->pi_ctx; 299 pi_ctx = desc->pi_ctx;
293 300
294 ret = iser_alloc_reg_res(ib_device, pd, &pi_ctx->rsc, size); 301 ret = iser_alloc_reg_res(device, pd, &pi_ctx->rsc, size);
295 if (ret) { 302 if (ret) {
296 iser_err("failed to allocate reg_resources\n"); 303 iser_err("failed to allocate reg_resources\n");
297 goto alloc_reg_res_err; 304 goto alloc_reg_res_err;
@@ -324,7 +331,7 @@ iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
324} 331}
325 332
326static struct iser_fr_desc * 333static struct iser_fr_desc *
327iser_create_fastreg_desc(struct ib_device *ib_device, 334iser_create_fastreg_desc(struct iser_device *device,
328 struct ib_pd *pd, 335 struct ib_pd *pd,
329 bool pi_enable, 336 bool pi_enable,
330 unsigned int size) 337 unsigned int size)
@@ -336,12 +343,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device,
336 if (!desc) 343 if (!desc)
337 return ERR_PTR(-ENOMEM); 344 return ERR_PTR(-ENOMEM);
338 345
339 ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc, size); 346 ret = iser_alloc_reg_res(device, pd, &desc->rsc, size);
340 if (ret) 347 if (ret)
341 goto reg_res_alloc_failure; 348 goto reg_res_alloc_failure;
342 349
343 if (pi_enable) { 350 if (pi_enable) {
344 ret = iser_alloc_pi_ctx(ib_device, pd, desc, size); 351 ret = iser_alloc_pi_ctx(device, pd, desc, size);
345 if (ret) 352 if (ret)
346 goto pi_ctx_alloc_failure; 353 goto pi_ctx_alloc_failure;
347 } 354 }
@@ -374,7 +381,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
374 spin_lock_init(&fr_pool->lock); 381 spin_lock_init(&fr_pool->lock);
375 fr_pool->size = 0; 382 fr_pool->size = 0;
376 for (i = 0; i < cmds_max; i++) { 383 for (i = 0; i < cmds_max; i++) {
377 desc = iser_create_fastreg_desc(device->ib_device, device->pd, 384 desc = iser_create_fastreg_desc(device, device->pd,
378 ib_conn->pi_support, size); 385 ib_conn->pi_support, size);
379 if (IS_ERR(desc)) { 386 if (IS_ERR(desc)) {
380 ret = PTR_ERR(desc); 387 ret = PTR_ERR(desc);
@@ -663,7 +670,6 @@ void iser_conn_release(struct iser_conn *iser_conn)
663int iser_conn_terminate(struct iser_conn *iser_conn) 670int iser_conn_terminate(struct iser_conn *iser_conn)
664{ 671{
665 struct ib_conn *ib_conn = &iser_conn->ib_conn; 672 struct ib_conn *ib_conn = &iser_conn->ib_conn;
666 struct ib_send_wr *bad_wr;
667 int err = 0; 673 int err = 0;
668 674
669 /* terminate the iser conn only if the conn state is UP */ 675 /* terminate the iser conn only if the conn state is UP */
@@ -688,14 +694,8 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
688 iser_err("Failed to disconnect, conn: 0x%p err %d\n", 694 iser_err("Failed to disconnect, conn: 0x%p err %d\n",
689 iser_conn, err); 695 iser_conn, err);
690 696
691 /* post an indication that all flush errors were consumed */ 697 /* block until all flush errors are consumed */
692 err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr); 698 ib_drain_sq(ib_conn->qp);
693 if (err) {
694 iser_err("conn %p failed to post last wr", ib_conn);
695 return 1;
696 }
697
698 wait_for_completion(&ib_conn->last_comp);
699 } 699 }
700 700
701 return 1; 701 return 1;
@@ -954,10 +954,6 @@ void iser_conn_init(struct iser_conn *iser_conn)
954 954
955 ib_conn->post_recv_buf_count = 0; 955 ib_conn->post_recv_buf_count = 0;
956 ib_conn->reg_cqe.done = iser_reg_comp; 956 ib_conn->reg_cqe.done = iser_reg_comp;
957 ib_conn->last_cqe.done = iser_last_comp;
958 ib_conn->last.wr_cqe = &ib_conn->last_cqe;
959 ib_conn->last.opcode = IB_WR_SEND;
960 init_completion(&ib_conn->last_comp);
961} 957}
962 958
963 /** 959 /**
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 03022f6420d7..b6bf20496021 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -446,49 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
446 dev->max_pages_per_mr); 446 dev->max_pages_per_mr);
447} 447}
448 448
449static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
450{
451 struct srp_rdma_ch *ch = cq->cq_context;
452
453 complete(&ch->done);
454}
455
456static struct ib_cqe srp_drain_cqe = {
457 .done = srp_drain_done,
458};
459
460/** 449/**
461 * srp_destroy_qp() - destroy an RDMA queue pair 450 * srp_destroy_qp() - destroy an RDMA queue pair
462 * @ch: SRP RDMA channel. 451 * @ch: SRP RDMA channel.
463 * 452 *
464 * Change a queue pair into the error state and wait until all receive 453 * Drain the qp before destroying it. This avoids that the receive
465 * completions have been processed before destroying it. This avoids that 454 * completion handler can access the queue pair while it is
466 * the receive completion handler can access the queue pair while it is
467 * being destroyed. 455 * being destroyed.
468 */ 456 */
469static void srp_destroy_qp(struct srp_rdma_ch *ch) 457static void srp_destroy_qp(struct srp_rdma_ch *ch)
470{ 458{
471 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 459 ib_drain_rq(ch->qp);
472 static struct ib_recv_wr wr = { 0 };
473 struct ib_recv_wr *bad_wr;
474 int ret;
475
476 wr.wr_cqe = &srp_drain_cqe;
477 /* Destroying a QP and reusing ch->done is only safe if not connected */
478 WARN_ON_ONCE(ch->connected);
479
480 ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE);
481 WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret);
482 if (ret)
483 goto out;
484
485 init_completion(&ch->done);
486 ret = ib_post_recv(ch->qp, &wr, &bad_wr);
487 WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret);
488 if (ret == 0)
489 wait_for_completion(&ch->done);
490
491out:
492 ib_destroy_qp(ch->qp); 460 ib_destroy_qp(ch->qp);
493} 461}
494 462
@@ -508,7 +476,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
508 if (!init_attr) 476 if (!init_attr)
509 return -ENOMEM; 477 return -ENOMEM;
510 478
511 /* queue_size + 1 for ib_drain_qp */ 479 /* queue_size + 1 for ib_drain_rq() */
512 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, 480 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
513 ch->comp_vector, IB_POLL_SOFTIRQ); 481 ch->comp_vector, IB_POLL_SOFTIRQ);
514 if (IS_ERR(recv_cq)) { 482 if (IS_ERR(recv_cq)) {
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 0c37fee363b1..25bdaeef2520 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -91,76 +91,32 @@ MODULE_PARM_DESC(srpt_service_guid,
91 " instead of using the node_guid of the first HCA."); 91 " instead of using the node_guid of the first HCA.");
92 92
93static struct ib_client srpt_client; 93static struct ib_client srpt_client;
94static void srpt_release_channel(struct srpt_rdma_ch *ch); 94static void srpt_release_cmd(struct se_cmd *se_cmd);
95static void srpt_free_ch(struct kref *kref);
95static int srpt_queue_status(struct se_cmd *cmd); 96static int srpt_queue_status(struct se_cmd *cmd);
96static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); 97static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
97static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); 98static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
99static void srpt_process_wait_list(struct srpt_rdma_ch *ch);
98 100
99/** 101/*
100 * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. 102 * The only allowed channel state changes are those that change the channel
101 */ 103 * state into a state with a higher numerical value. Hence the new > prev test.
102static inline
103enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir)
104{
105 switch (dir) {
106 case DMA_TO_DEVICE: return DMA_FROM_DEVICE;
107 case DMA_FROM_DEVICE: return DMA_TO_DEVICE;
108 default: return dir;
109 }
110}
111
112/**
113 * srpt_sdev_name() - Return the name associated with the HCA.
114 *
115 * Examples are ib0, ib1, ...
116 */
117static inline const char *srpt_sdev_name(struct srpt_device *sdev)
118{
119 return sdev->device->name;
120}
121
122static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch)
123{
124 unsigned long flags;
125 enum rdma_ch_state state;
126
127 spin_lock_irqsave(&ch->spinlock, flags);
128 state = ch->state;
129 spin_unlock_irqrestore(&ch->spinlock, flags);
130 return state;
131}
132
133static enum rdma_ch_state
134srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state)
135{
136 unsigned long flags;
137 enum rdma_ch_state prev;
138
139 spin_lock_irqsave(&ch->spinlock, flags);
140 prev = ch->state;
141 ch->state = new_state;
142 spin_unlock_irqrestore(&ch->spinlock, flags);
143 return prev;
144}
145
146/**
147 * srpt_test_and_set_ch_state() - Test and set the channel state.
148 *
149 * Returns true if and only if the channel state has been set to the new state.
150 */ 104 */
151static bool 105static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new)
152srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old,
153 enum rdma_ch_state new)
154{ 106{
155 unsigned long flags; 107 unsigned long flags;
156 enum rdma_ch_state prev; 108 enum rdma_ch_state prev;
109 bool changed = false;
157 110
158 spin_lock_irqsave(&ch->spinlock, flags); 111 spin_lock_irqsave(&ch->spinlock, flags);
159 prev = ch->state; 112 prev = ch->state;
160 if (prev == old) 113 if (new > prev) {
161 ch->state = new; 114 ch->state = new;
115 changed = true;
116 }
162 spin_unlock_irqrestore(&ch->spinlock, flags); 117 spin_unlock_irqrestore(&ch->spinlock, flags);
163 return prev == old; 118
119 return changed;
164} 120}
165 121
166/** 122/**
@@ -182,7 +138,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
182 return; 138 return;
183 139
184 pr_debug("ASYNC event= %d on device= %s\n", event->event, 140 pr_debug("ASYNC event= %d on device= %s\n", event->event,
185 srpt_sdev_name(sdev)); 141 sdev->device->name);
186 142
187 switch (event->event) { 143 switch (event->event) {
188 case IB_EVENT_PORT_ERR: 144 case IB_EVENT_PORT_ERR:
@@ -220,25 +176,39 @@ static void srpt_srq_event(struct ib_event *event, void *ctx)
220 pr_info("SRQ event %d\n", event->event); 176 pr_info("SRQ event %d\n", event->event);
221} 177}
222 178
179static const char *get_ch_state_name(enum rdma_ch_state s)
180{
181 switch (s) {
182 case CH_CONNECTING:
183 return "connecting";
184 case CH_LIVE:
185 return "live";
186 case CH_DISCONNECTING:
187 return "disconnecting";
188 case CH_DRAINING:
189 return "draining";
190 case CH_DISCONNECTED:
191 return "disconnected";
192 }
193 return "???";
194}
195
223/** 196/**
224 * srpt_qp_event() - QP event callback function. 197 * srpt_qp_event() - QP event callback function.
225 */ 198 */
226static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) 199static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
227{ 200{
228 pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n", 201 pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n",
229 event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch)); 202 event->event, ch->cm_id, ch->sess_name, ch->state);
230 203
231 switch (event->event) { 204 switch (event->event) {
232 case IB_EVENT_COMM_EST: 205 case IB_EVENT_COMM_EST:
233 ib_cm_notify(ch->cm_id, event->event); 206 ib_cm_notify(ch->cm_id, event->event);
234 break; 207 break;
235 case IB_EVENT_QP_LAST_WQE_REACHED: 208 case IB_EVENT_QP_LAST_WQE_REACHED:
236 if (srpt_test_and_set_ch_state(ch, CH_DRAINING, 209 pr_debug("%s-%d, state %s: received Last WQE event.\n",
237 CH_RELEASING)) 210 ch->sess_name, ch->qp->qp_num,
238 srpt_release_channel(ch); 211 get_ch_state_name(ch->state));
239 else
240 pr_debug("%s: state %d - ignored LAST_WQE.\n",
241 ch->sess_name, srpt_get_ch_state(ch));
242 break; 212 break;
243 default: 213 default:
244 pr_err("received unrecognized IB QP event %d\n", event->event); 214 pr_err("received unrecognized IB QP event %d\n", event->event);
@@ -281,7 +251,7 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad)
281 struct ib_class_port_info *cif; 251 struct ib_class_port_info *cif;
282 252
283 cif = (struct ib_class_port_info *)mad->data; 253 cif = (struct ib_class_port_info *)mad->data;
284 memset(cif, 0, sizeof *cif); 254 memset(cif, 0, sizeof(*cif));
285 cif->base_version = 1; 255 cif->base_version = 1;
286 cif->class_version = 1; 256 cif->class_version = 1;
287 cif->resp_time_value = 20; 257 cif->resp_time_value = 20;
@@ -340,7 +310,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
340 return; 310 return;
341 } 311 }
342 312
343 memset(iocp, 0, sizeof *iocp); 313 memset(iocp, 0, sizeof(*iocp));
344 strcpy(iocp->id_string, SRPT_ID_STRING); 314 strcpy(iocp->id_string, SRPT_ID_STRING);
345 iocp->guid = cpu_to_be64(srpt_service_guid); 315 iocp->guid = cpu_to_be64(srpt_service_guid);
346 iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id); 316 iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
@@ -390,7 +360,7 @@ static void srpt_get_svc_entries(u64 ioc_guid,
390 } 360 }
391 361
392 svc_entries = (struct ib_dm_svc_entries *)mad->data; 362 svc_entries = (struct ib_dm_svc_entries *)mad->data;
393 memset(svc_entries, 0, sizeof *svc_entries); 363 memset(svc_entries, 0, sizeof(*svc_entries));
394 svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid); 364 svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
395 snprintf(svc_entries->service_entries[0].name, 365 snprintf(svc_entries->service_entries[0].name,
396 sizeof(svc_entries->service_entries[0].name), 366 sizeof(svc_entries->service_entries[0].name),
@@ -484,7 +454,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
484 rsp->ah = ah; 454 rsp->ah = ah;
485 455
486 dm_mad = rsp->mad; 456 dm_mad = rsp->mad;
487 memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad); 457 memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof(*dm_mad));
488 dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; 458 dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
489 dm_mad->mad_hdr.status = 0; 459 dm_mad->mad_hdr.status = 0;
490 460
@@ -532,7 +502,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
532 struct ib_port_attr port_attr; 502 struct ib_port_attr port_attr;
533 int ret; 503 int ret;
534 504
535 memset(&port_modify, 0, sizeof port_modify); 505 memset(&port_modify, 0, sizeof(port_modify));
536 port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; 506 port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
537 port_modify.clr_port_cap_mask = 0; 507 port_modify.clr_port_cap_mask = 0;
538 508
@@ -553,7 +523,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
553 goto err_query_port; 523 goto err_query_port;
554 524
555 if (!sport->mad_agent) { 525 if (!sport->mad_agent) {
556 memset(&reg_req, 0, sizeof reg_req); 526 memset(&reg_req, 0, sizeof(reg_req));
557 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; 527 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
558 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION; 528 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
559 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask); 529 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
@@ -841,6 +811,39 @@ out:
841} 811}
842 812
843/** 813/**
814 * srpt_zerolength_write() - Perform a zero-length RDMA write.
815 *
816 * A quote from the InfiniBand specification: C9-88: For an HCA responder
817 * using Reliable Connection service, for each zero-length RDMA READ or WRITE
818 * request, the R_Key shall not be validated, even if the request includes
819 * Immediate data.
820 */
821static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
822{
823 struct ib_send_wr wr, *bad_wr;
824
825 memset(&wr, 0, sizeof(wr));
826 wr.opcode = IB_WR_RDMA_WRITE;
827 wr.wr_cqe = &ch->zw_cqe;
828 wr.send_flags = IB_SEND_SIGNALED;
829 return ib_post_send(ch->qp, &wr, &bad_wr);
830}
831
832static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
833{
834 struct srpt_rdma_ch *ch = cq->cq_context;
835
836 if (wc->status == IB_WC_SUCCESS) {
837 srpt_process_wait_list(ch);
838 } else {
839 if (srpt_set_ch_state(ch, CH_DISCONNECTED))
840 schedule_work(&ch->release_work);
841 else
842 WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num);
843 }
844}
845
846/**
844 * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request. 847 * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
845 * @ioctx: Pointer to the I/O context associated with the request. 848 * @ioctx: Pointer to the I/O context associated with the request.
846 * @srp_cmd: Pointer to the SRP_CMD request data. 849 * @srp_cmd: Pointer to the SRP_CMD request data.
@@ -903,14 +906,14 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
903 906
904 db = (struct srp_direct_buf *)(srp_cmd->add_data 907 db = (struct srp_direct_buf *)(srp_cmd->add_data
905 + add_cdb_offset); 908 + add_cdb_offset);
906 memcpy(ioctx->rbufs, db, sizeof *db); 909 memcpy(ioctx->rbufs, db, sizeof(*db));
907 *data_len = be32_to_cpu(db->len); 910 *data_len = be32_to_cpu(db->len);
908 } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || 911 } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
909 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { 912 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
910 idb = (struct srp_indirect_buf *)(srp_cmd->add_data 913 idb = (struct srp_indirect_buf *)(srp_cmd->add_data
911 + add_cdb_offset); 914 + add_cdb_offset);
912 915
913 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db; 916 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof(*db);
914 917
915 if (ioctx->n_rbuf > 918 if (ioctx->n_rbuf >
916 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { 919 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
@@ -929,7 +932,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
929 ioctx->rbufs = &ioctx->single_rbuf; 932 ioctx->rbufs = &ioctx->single_rbuf;
930 else { 933 else {
931 ioctx->rbufs = 934 ioctx->rbufs =
932 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC); 935 kmalloc(ioctx->n_rbuf * sizeof(*db), GFP_ATOMIC);
933 if (!ioctx->rbufs) { 936 if (!ioctx->rbufs) {
934 ioctx->n_rbuf = 0; 937 ioctx->n_rbuf = 0;
935 ret = -ENOMEM; 938 ret = -ENOMEM;
@@ -938,7 +941,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
938 } 941 }
939 942
940 db = idb->desc_list; 943 db = idb->desc_list;
941 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db); 944 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof(*db));
942 *data_len = be32_to_cpu(idb->len); 945 *data_len = be32_to_cpu(idb->len);
943 } 946 }
944out: 947out:
@@ -956,7 +959,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
956 struct ib_qp_attr *attr; 959 struct ib_qp_attr *attr;
957 int ret; 960 int ret;
958 961
959 attr = kzalloc(sizeof *attr, GFP_KERNEL); 962 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
960 if (!attr) 963 if (!attr)
961 return -ENOMEM; 964 return -ENOMEM;
962 965
@@ -1070,7 +1073,7 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1070 dir = ioctx->cmd.data_direction; 1073 dir = ioctx->cmd.data_direction;
1071 BUG_ON(dir == DMA_NONE); 1074 BUG_ON(dir == DMA_NONE);
1072 ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt, 1075 ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt,
1073 opposite_dma_dir(dir)); 1076 target_reverse_dma_direction(&ioctx->cmd));
1074 ioctx->mapped_sg_count = 0; 1077 ioctx->mapped_sg_count = 0;
1075 } 1078 }
1076} 1079}
@@ -1107,7 +1110,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1107 ioctx->sg_cnt = sg_cnt = cmd->t_data_nents; 1110 ioctx->sg_cnt = sg_cnt = cmd->t_data_nents;
1108 1111
1109 count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt, 1112 count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt,
1110 opposite_dma_dir(dir)); 1113 target_reverse_dma_direction(cmd));
1111 if (unlikely(!count)) 1114 if (unlikely(!count))
1112 return -EAGAIN; 1115 return -EAGAIN;
1113 1116
@@ -1313,10 +1316,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1313 1316
1314 /* 1317 /*
1315 * If the command is in a state where the target core is waiting for 1318 * If the command is in a state where the target core is waiting for
1316 * the ib_srpt driver, change the state to the next state. Changing 1319 * the ib_srpt driver, change the state to the next state.
1317 * the state of the command from SRPT_STATE_NEED_DATA to
1318 * SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this
1319 * function a second time.
1320 */ 1320 */
1321 1321
1322 spin_lock_irqsave(&ioctx->spinlock, flags); 1322 spin_lock_irqsave(&ioctx->spinlock, flags);
@@ -1325,25 +1325,17 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1325 case SRPT_STATE_NEED_DATA: 1325 case SRPT_STATE_NEED_DATA:
1326 ioctx->state = SRPT_STATE_DATA_IN; 1326 ioctx->state = SRPT_STATE_DATA_IN;
1327 break; 1327 break;
1328 case SRPT_STATE_DATA_IN:
1329 case SRPT_STATE_CMD_RSP_SENT: 1328 case SRPT_STATE_CMD_RSP_SENT:
1330 case SRPT_STATE_MGMT_RSP_SENT: 1329 case SRPT_STATE_MGMT_RSP_SENT:
1331 ioctx->state = SRPT_STATE_DONE; 1330 ioctx->state = SRPT_STATE_DONE;
1332 break; 1331 break;
1333 default: 1332 default:
1333 WARN_ONCE(true, "%s: unexpected I/O context state %d\n",
1334 __func__, state);
1334 break; 1335 break;
1335 } 1336 }
1336 spin_unlock_irqrestore(&ioctx->spinlock, flags); 1337 spin_unlock_irqrestore(&ioctx->spinlock, flags);
1337 1338
1338 if (state == SRPT_STATE_DONE) {
1339 struct srpt_rdma_ch *ch = ioctx->ch;
1340
1341 BUG_ON(ch->sess == NULL);
1342
1343 target_put_sess_cmd(&ioctx->cmd);
1344 goto out;
1345 }
1346
1347 pr_debug("Aborting cmd with state %d and tag %lld\n", state, 1339 pr_debug("Aborting cmd with state %d and tag %lld\n", state,
1348 ioctx->cmd.tag); 1340 ioctx->cmd.tag);
1349 1341
@@ -1351,19 +1343,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1351 case SRPT_STATE_NEW: 1343 case SRPT_STATE_NEW:
1352 case SRPT_STATE_DATA_IN: 1344 case SRPT_STATE_DATA_IN:
1353 case SRPT_STATE_MGMT: 1345 case SRPT_STATE_MGMT:
1346 case SRPT_STATE_DONE:
1354 /* 1347 /*
1355 * Do nothing - defer abort processing until 1348 * Do nothing - defer abort processing until
1356 * srpt_queue_response() is invoked. 1349 * srpt_queue_response() is invoked.
1357 */ 1350 */
1358 WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false));
1359 break; 1351 break;
1360 case SRPT_STATE_NEED_DATA: 1352 case SRPT_STATE_NEED_DATA:
1361 /* DMA_TO_DEVICE (write) - RDMA read error. */ 1353 pr_debug("tag %#llx: RDMA read error\n", ioctx->cmd.tag);
1362 1354 transport_generic_request_failure(&ioctx->cmd,
1363 /* XXX(hch): this is a horrible layering violation.. */ 1355 TCM_CHECK_CONDITION_ABORT_CMD);
1364 spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
1365 ioctx->cmd.transport_state &= ~CMD_T_ACTIVE;
1366 spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
1367 break; 1356 break;
1368 case SRPT_STATE_CMD_RSP_SENT: 1357 case SRPT_STATE_CMD_RSP_SENT:
1369 /* 1358 /*
@@ -1371,18 +1360,16 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1371 * not been received in time. 1360 * not been received in time.
1372 */ 1361 */
1373 srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); 1362 srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
1374 target_put_sess_cmd(&ioctx->cmd); 1363 transport_generic_free_cmd(&ioctx->cmd, 0);
1375 break; 1364 break;
1376 case SRPT_STATE_MGMT_RSP_SENT: 1365 case SRPT_STATE_MGMT_RSP_SENT:
1377 srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); 1366 transport_generic_free_cmd(&ioctx->cmd, 0);
1378 target_put_sess_cmd(&ioctx->cmd);
1379 break; 1367 break;
1380 default: 1368 default:
1381 WARN(1, "Unexpected command state (%d)", state); 1369 WARN(1, "Unexpected command state (%d)", state);
1382 break; 1370 break;
1383 } 1371 }
1384 1372
1385out:
1386 return state; 1373 return state;
1387} 1374}
1388 1375
@@ -1422,9 +1409,14 @@ static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
1422 container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe); 1409 container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
1423 1410
1424 if (unlikely(wc->status != IB_WC_SUCCESS)) { 1411 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1412 /*
1413 * Note: if an RDMA write error completion is received that
1414 * means that a SEND also has been posted. Defer further
1415 * processing of the associated command until the send error
1416 * completion has been received.
1417 */
1425 pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n", 1418 pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
1426 ioctx, wc->status); 1419 ioctx, wc->status);
1427 srpt_abort_cmd(ioctx);
1428 } 1420 }
1429} 1421}
1430 1422
@@ -1464,7 +1456,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1464 sense_data_len = ioctx->cmd.scsi_sense_length; 1456 sense_data_len = ioctx->cmd.scsi_sense_length;
1465 WARN_ON(sense_data_len > sizeof(ioctx->sense_data)); 1457 WARN_ON(sense_data_len > sizeof(ioctx->sense_data));
1466 1458
1467 memset(srp_rsp, 0, sizeof *srp_rsp); 1459 memset(srp_rsp, 0, sizeof(*srp_rsp));
1468 srp_rsp->opcode = SRP_RSP; 1460 srp_rsp->opcode = SRP_RSP;
1469 srp_rsp->req_lim_delta = 1461 srp_rsp->req_lim_delta =
1470 cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); 1462 cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0));
@@ -1514,7 +1506,7 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1514 1506
1515 srp_rsp = ioctx->ioctx.buf; 1507 srp_rsp = ioctx->ioctx.buf;
1516 BUG_ON(!srp_rsp); 1508 BUG_ON(!srp_rsp);
1517 memset(srp_rsp, 0, sizeof *srp_rsp); 1509 memset(srp_rsp, 0, sizeof(*srp_rsp));
1518 1510
1519 srp_rsp->opcode = SRP_RSP; 1511 srp_rsp->opcode = SRP_RSP;
1520 srp_rsp->req_lim_delta = 1512 srp_rsp->req_lim_delta =
@@ -1528,80 +1520,6 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1528 return resp_len; 1520 return resp_len;
1529} 1521}
1530 1522
1531#define NO_SUCH_LUN ((uint64_t)-1LL)
1532
1533/*
1534 * SCSI LUN addressing method. See also SAM-2 and the section about
1535 * eight byte LUNs.
1536 */
1537enum scsi_lun_addr_method {
1538 SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0,
1539 SCSI_LUN_ADDR_METHOD_FLAT = 1,
1540 SCSI_LUN_ADDR_METHOD_LUN = 2,
1541 SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3,
1542};
1543
1544/*
1545 * srpt_unpack_lun() - Convert from network LUN to linear LUN.
1546 *
1547 * Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte
1548 * order (big endian) to a linear LUN. Supports three LUN addressing methods:
1549 * peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40).
1550 */
1551static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
1552{
1553 uint64_t res = NO_SUCH_LUN;
1554 int addressing_method;
1555
1556 if (unlikely(len < 2)) {
1557 pr_err("Illegal LUN length %d, expected 2 bytes or more\n",
1558 len);
1559 goto out;
1560 }
1561
1562 switch (len) {
1563 case 8:
1564 if ((*((__be64 *)lun) &
1565 cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0)
1566 goto out_err;
1567 break;
1568 case 4:
1569 if (*((__be16 *)&lun[2]) != 0)
1570 goto out_err;
1571 break;
1572 case 6:
1573 if (*((__be32 *)&lun[2]) != 0)
1574 goto out_err;
1575 break;
1576 case 2:
1577 break;
1578 default:
1579 goto out_err;
1580 }
1581
1582 addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */
1583 switch (addressing_method) {
1584 case SCSI_LUN_ADDR_METHOD_PERIPHERAL:
1585 case SCSI_LUN_ADDR_METHOD_FLAT:
1586 case SCSI_LUN_ADDR_METHOD_LUN:
1587 res = *(lun + 1) | (((*lun) & 0x3f) << 8);
1588 break;
1589
1590 case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
1591 default:
1592 pr_err("Unimplemented LUN addressing method %u\n",
1593 addressing_method);
1594 break;
1595 }
1596
1597out:
1598 return res;
1599
1600out_err:
1601 pr_err("Support for multi-level LUNs has not yet been implemented\n");
1602 goto out;
1603}
1604
1605static int srpt_check_stop_free(struct se_cmd *cmd) 1523static int srpt_check_stop_free(struct se_cmd *cmd)
1606{ 1524{
1607 struct srpt_send_ioctx *ioctx = container_of(cmd, 1525 struct srpt_send_ioctx *ioctx = container_of(cmd,
@@ -1613,16 +1531,14 @@ static int srpt_check_stop_free(struct se_cmd *cmd)
1613/** 1531/**
1614 * srpt_handle_cmd() - Process SRP_CMD. 1532 * srpt_handle_cmd() - Process SRP_CMD.
1615 */ 1533 */
1616static int srpt_handle_cmd(struct srpt_rdma_ch *ch, 1534static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
1617 struct srpt_recv_ioctx *recv_ioctx, 1535 struct srpt_recv_ioctx *recv_ioctx,
1618 struct srpt_send_ioctx *send_ioctx) 1536 struct srpt_send_ioctx *send_ioctx)
1619{ 1537{
1620 struct se_cmd *cmd; 1538 struct se_cmd *cmd;
1621 struct srp_cmd *srp_cmd; 1539 struct srp_cmd *srp_cmd;
1622 uint64_t unpacked_lun;
1623 u64 data_len; 1540 u64 data_len;
1624 enum dma_data_direction dir; 1541 enum dma_data_direction dir;
1625 sense_reason_t ret;
1626 int rc; 1542 int rc;
1627 1543
1628 BUG_ON(!send_ioctx); 1544 BUG_ON(!send_ioctx);
@@ -1650,65 +1566,23 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
1650 if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) { 1566 if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
1651 pr_err("0x%llx: parsing SRP descriptor table failed.\n", 1567 pr_err("0x%llx: parsing SRP descriptor table failed.\n",
1652 srp_cmd->tag); 1568 srp_cmd->tag);
1653 ret = TCM_INVALID_CDB_FIELD; 1569 goto release_ioctx;
1654 goto send_sense;
1655 } 1570 }
1656 1571
1657 unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun,
1658 sizeof(srp_cmd->lun));
1659 rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb, 1572 rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb,
1660 &send_ioctx->sense_data[0], unpacked_lun, data_len, 1573 &send_ioctx->sense_data[0],
1661 TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF); 1574 scsilun_to_int(&srp_cmd->lun), data_len,
1575 TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
1662 if (rc != 0) { 1576 if (rc != 0) {
1663 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 1577 pr_debug("target_submit_cmd() returned %d for tag %#llx\n", rc,
1664 goto send_sense; 1578 srp_cmd->tag);
1579 goto release_ioctx;
1665 } 1580 }
1666 return 0; 1581 return;
1667
1668send_sense:
1669 transport_send_check_condition_and_sense(cmd, ret, 0);
1670 return -1;
1671}
1672
1673/**
1674 * srpt_rx_mgmt_fn_tag() - Process a task management function by tag.
1675 * @ch: RDMA channel of the task management request.
1676 * @fn: Task management function to perform.
1677 * @req_tag: Tag of the SRP task management request.
1678 * @mgmt_ioctx: I/O context of the task management request.
1679 *
1680 * Returns zero if the target core will process the task management
1681 * request asynchronously.
1682 *
1683 * Note: It is assumed that the initiator serializes tag-based task management
1684 * requests.
1685 */
1686static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag)
1687{
1688 struct srpt_device *sdev;
1689 struct srpt_rdma_ch *ch;
1690 struct srpt_send_ioctx *target;
1691 int ret, i;
1692 1582
1693 ret = -EINVAL; 1583release_ioctx:
1694 ch = ioctx->ch; 1584 send_ioctx->state = SRPT_STATE_DONE;
1695 BUG_ON(!ch); 1585 srpt_release_cmd(cmd);
1696 BUG_ON(!ch->sport);
1697 sdev = ch->sport->sdev;
1698 BUG_ON(!sdev);
1699 spin_lock_irq(&sdev->spinlock);
1700 for (i = 0; i < ch->rq_size; ++i) {
1701 target = ch->ioctx_ring[i];
1702 if (target->cmd.se_lun == ioctx->cmd.se_lun &&
1703 target->cmd.tag == tag &&
1704 srpt_get_cmd_state(target) != SRPT_STATE_DONE) {
1705 ret = 0;
1706 /* now let the target core abort &target->cmd; */
1707 break;
1708 }
1709 }
1710 spin_unlock_irq(&sdev->spinlock);
1711 return ret;
1712} 1586}
1713 1587
1714static int srp_tmr_to_tcm(int fn) 1588static int srp_tmr_to_tcm(int fn)
@@ -1744,8 +1618,6 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1744 struct srp_tsk_mgmt *srp_tsk; 1618 struct srp_tsk_mgmt *srp_tsk;
1745 struct se_cmd *cmd; 1619 struct se_cmd *cmd;
1746 struct se_session *sess = ch->sess; 1620 struct se_session *sess = ch->sess;
1747 uint64_t unpacked_lun;
1748 uint32_t tag = 0;
1749 int tcm_tmr; 1621 int tcm_tmr;
1750 int rc; 1622 int rc;
1751 1623
@@ -1761,26 +1633,10 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1761 srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); 1633 srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT);
1762 send_ioctx->cmd.tag = srp_tsk->tag; 1634 send_ioctx->cmd.tag = srp_tsk->tag;
1763 tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); 1635 tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func);
1764 if (tcm_tmr < 0) { 1636 rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL,
1765 send_ioctx->cmd.se_tmr_req->response = 1637 scsilun_to_int(&srp_tsk->lun), srp_tsk, tcm_tmr,
1766 TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED; 1638 GFP_KERNEL, srp_tsk->task_tag,
1767 goto fail; 1639 TARGET_SCF_ACK_KREF);
1768 }
1769 unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun,
1770 sizeof(srp_tsk->lun));
1771
1772 if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) {
1773 rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag);
1774 if (rc < 0) {
1775 send_ioctx->cmd.se_tmr_req->response =
1776 TMR_TASK_DOES_NOT_EXIST;
1777 goto fail;
1778 }
1779 tag = srp_tsk->task_tag;
1780 }
1781 rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun,
1782 srp_tsk, tcm_tmr, GFP_KERNEL, tag,
1783 TARGET_SCF_ACK_KREF);
1784 if (rc != 0) { 1640 if (rc != 0) {
1785 send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; 1641 send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED;
1786 goto fail; 1642 goto fail;
@@ -1800,7 +1656,6 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1800 struct srpt_send_ioctx *send_ioctx) 1656 struct srpt_send_ioctx *send_ioctx)
1801{ 1657{
1802 struct srp_cmd *srp_cmd; 1658 struct srp_cmd *srp_cmd;
1803 enum rdma_ch_state ch_state;
1804 1659
1805 BUG_ON(!ch); 1660 BUG_ON(!ch);
1806 BUG_ON(!recv_ioctx); 1661 BUG_ON(!recv_ioctx);
@@ -1809,13 +1664,12 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1809 recv_ioctx->ioctx.dma, srp_max_req_size, 1664 recv_ioctx->ioctx.dma, srp_max_req_size,
1810 DMA_FROM_DEVICE); 1665 DMA_FROM_DEVICE);
1811 1666
1812 ch_state = srpt_get_ch_state(ch); 1667 if (unlikely(ch->state == CH_CONNECTING)) {
1813 if (unlikely(ch_state == CH_CONNECTING)) {
1814 list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); 1668 list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
1815 goto out; 1669 goto out;
1816 } 1670 }
1817 1671
1818 if (unlikely(ch_state != CH_LIVE)) 1672 if (unlikely(ch->state != CH_LIVE))
1819 goto out; 1673 goto out;
1820 1674
1821 srp_cmd = recv_ioctx->ioctx.buf; 1675 srp_cmd = recv_ioctx->ioctx.buf;
@@ -1878,6 +1732,28 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1878 } 1732 }
1879} 1733}
1880 1734
1735/*
1736 * This function must be called from the context in which RDMA completions are
1737 * processed because it accesses the wait list without protection against
1738 * access from other threads.
1739 */
1740static void srpt_process_wait_list(struct srpt_rdma_ch *ch)
1741{
1742 struct srpt_send_ioctx *ioctx;
1743
1744 while (!list_empty(&ch->cmd_wait_list) &&
1745 ch->state >= CH_LIVE &&
1746 (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
1747 struct srpt_recv_ioctx *recv_ioctx;
1748
1749 recv_ioctx = list_first_entry(&ch->cmd_wait_list,
1750 struct srpt_recv_ioctx,
1751 wait_list);
1752 list_del(&recv_ioctx->wait_list);
1753 srpt_handle_new_iu(ch, recv_ioctx, ioctx);
1754 }
1755}
1756
1881/** 1757/**
1882 * Note: Although this has not yet been observed during tests, at least in 1758 * Note: Although this has not yet been observed during tests, at least in
1883 * theory it is possible that the srpt_get_send_ioctx() call invoked by 1759 * theory it is possible that the srpt_get_send_ioctx() call invoked by
@@ -1905,15 +1781,10 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
1905 1781
1906 atomic_inc(&ch->sq_wr_avail); 1782 atomic_inc(&ch->sq_wr_avail);
1907 1783
1908 if (wc->status != IB_WC_SUCCESS) { 1784 if (wc->status != IB_WC_SUCCESS)
1909 pr_info("sending response for ioctx 0x%p failed" 1785 pr_info("sending response for ioctx 0x%p failed"
1910 " with status %d\n", ioctx, wc->status); 1786 " with status %d\n", ioctx, wc->status);
1911 1787
1912 atomic_dec(&ch->req_lim);
1913 srpt_abort_cmd(ioctx);
1914 goto out;
1915 }
1916
1917 if (state != SRPT_STATE_DONE) { 1788 if (state != SRPT_STATE_DONE) {
1918 srpt_unmap_sg_to_ib_sge(ch, ioctx); 1789 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1919 transport_generic_free_cmd(&ioctx->cmd, 0); 1790 transport_generic_free_cmd(&ioctx->cmd, 0);
@@ -1922,18 +1793,7 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
1922 " wr_id = %u.\n", ioctx->ioctx.index); 1793 " wr_id = %u.\n", ioctx->ioctx.index);
1923 } 1794 }
1924 1795
1925out: 1796 srpt_process_wait_list(ch);
1926 while (!list_empty(&ch->cmd_wait_list) &&
1927 srpt_get_ch_state(ch) == CH_LIVE &&
1928 (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
1929 struct srpt_recv_ioctx *recv_ioctx;
1930
1931 recv_ioctx = list_first_entry(&ch->cmd_wait_list,
1932 struct srpt_recv_ioctx,
1933 wait_list);
1934 list_del(&recv_ioctx->wait_list);
1935 srpt_handle_new_iu(ch, recv_ioctx, ioctx);
1936 }
1937} 1797}
1938 1798
1939/** 1799/**
@@ -1950,7 +1810,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1950 WARN_ON(ch->rq_size < 1); 1810 WARN_ON(ch->rq_size < 1);
1951 1811
1952 ret = -ENOMEM; 1812 ret = -ENOMEM;
1953 qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL); 1813 qp_init = kzalloc(sizeof(*qp_init), GFP_KERNEL);
1954 if (!qp_init) 1814 if (!qp_init)
1955 goto out; 1815 goto out;
1956 1816
@@ -2017,168 +1877,102 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
2017} 1877}
2018 1878
2019/** 1879/**
2020 * __srpt_close_ch() - Close an RDMA channel by setting the QP error state. 1880 * srpt_close_ch() - Close an RDMA channel.
2021 * 1881 *
2022 * Reset the QP and make sure all resources associated with the channel will 1882 * Make sure all resources associated with the channel will be deallocated at
2023 * be deallocated at an appropriate time. 1883 * an appropriate time.
2024 * 1884 *
2025 * Note: The caller must hold ch->sport->sdev->spinlock. 1885 * Returns true if and only if the channel state has been modified into
1886 * CH_DRAINING.
2026 */ 1887 */
2027static void __srpt_close_ch(struct srpt_rdma_ch *ch) 1888static bool srpt_close_ch(struct srpt_rdma_ch *ch)
2028{ 1889{
2029 enum rdma_ch_state prev_state; 1890 int ret;
2030 unsigned long flags;
2031 1891
2032 spin_lock_irqsave(&ch->spinlock, flags); 1892 if (!srpt_set_ch_state(ch, CH_DRAINING)) {
2033 prev_state = ch->state; 1893 pr_debug("%s-%d: already closed\n", ch->sess_name,
2034 switch (prev_state) { 1894 ch->qp->qp_num);
2035 case CH_CONNECTING: 1895 return false;
2036 case CH_LIVE:
2037 ch->state = CH_DISCONNECTING;
2038 break;
2039 default:
2040 break;
2041 } 1896 }
2042 spin_unlock_irqrestore(&ch->spinlock, flags);
2043
2044 switch (prev_state) {
2045 case CH_CONNECTING:
2046 ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0,
2047 NULL, 0);
2048 /* fall through */
2049 case CH_LIVE:
2050 if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
2051 pr_err("sending CM DREQ failed.\n");
2052 break;
2053 case CH_DISCONNECTING:
2054 break;
2055 case CH_DRAINING:
2056 case CH_RELEASING:
2057 break;
2058 }
2059}
2060
2061/**
2062 * srpt_close_ch() - Close an RDMA channel.
2063 */
2064static void srpt_close_ch(struct srpt_rdma_ch *ch)
2065{
2066 struct srpt_device *sdev;
2067 1897
2068 sdev = ch->sport->sdev; 1898 kref_get(&ch->kref);
2069 spin_lock_irq(&sdev->spinlock);
2070 __srpt_close_ch(ch);
2071 spin_unlock_irq(&sdev->spinlock);
2072}
2073 1899
2074/** 1900 ret = srpt_ch_qp_err(ch);
2075 * srpt_shutdown_session() - Whether or not a session may be shut down. 1901 if (ret < 0)
2076 */ 1902 pr_err("%s-%d: changing queue pair into error state failed: %d\n",
2077static int srpt_shutdown_session(struct se_session *se_sess) 1903 ch->sess_name, ch->qp->qp_num, ret);
2078{
2079 struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr;
2080 unsigned long flags;
2081 1904
2082 spin_lock_irqsave(&ch->spinlock, flags); 1905 pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
2083 if (ch->in_shutdown) { 1906 ch->qp->qp_num);
2084 spin_unlock_irqrestore(&ch->spinlock, flags); 1907 ret = srpt_zerolength_write(ch);
2085 return true; 1908 if (ret < 0) {
1909 pr_err("%s-%d: queuing zero-length write failed: %d\n",
1910 ch->sess_name, ch->qp->qp_num, ret);
1911 if (srpt_set_ch_state(ch, CH_DISCONNECTED))
1912 schedule_work(&ch->release_work);
1913 else
1914 WARN_ON_ONCE(true);
2086 } 1915 }
2087 1916
2088 ch->in_shutdown = true; 1917 kref_put(&ch->kref, srpt_free_ch);
2089 target_sess_cmd_list_set_waiting(se_sess);
2090 spin_unlock_irqrestore(&ch->spinlock, flags);
2091 1918
2092 return true; 1919 return true;
2093} 1920}
2094 1921
2095/** 1922/*
2096 * srpt_drain_channel() - Drain a channel by resetting the IB queue pair. 1923 * Change the channel state into CH_DISCONNECTING. If a channel has not yet
2097 * @cm_id: Pointer to the CM ID of the channel to be drained. 1924 * reached the connected state, close it. If a channel is in the connected
2098 * 1925 * state, send a DREQ. If a DREQ has been received, send a DREP. Note: it is
2099 * Note: Must be called from inside srpt_cm_handler to avoid a race between 1926 * the responsibility of the caller to ensure that this function is not
2100 * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one() 1927 * invoked concurrently with the code that accepts a connection. This means
2101 * (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one() 1928 * that this function must either be invoked from inside a CM callback
2102 * waits until all target sessions for the associated IB device have been 1929 * function or that it must be invoked with the srpt_port.mutex held.
2103 * unregistered and target session registration involves a call to
2104 * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until
2105 * this function has finished).
2106 */ 1930 */
2107static void srpt_drain_channel(struct ib_cm_id *cm_id) 1931static int srpt_disconnect_ch(struct srpt_rdma_ch *ch)
2108{ 1932{
2109 struct srpt_device *sdev;
2110 struct srpt_rdma_ch *ch;
2111 int ret; 1933 int ret;
2112 bool do_reset = false;
2113 1934
2114 WARN_ON_ONCE(irqs_disabled()); 1935 if (!srpt_set_ch_state(ch, CH_DISCONNECTING))
1936 return -ENOTCONN;
2115 1937
2116 sdev = cm_id->context; 1938 ret = ib_send_cm_dreq(ch->cm_id, NULL, 0);
2117 BUG_ON(!sdev); 1939 if (ret < 0)
2118 spin_lock_irq(&sdev->spinlock); 1940 ret = ib_send_cm_drep(ch->cm_id, NULL, 0);
2119 list_for_each_entry(ch, &sdev->rch_list, list) {
2120 if (ch->cm_id == cm_id) {
2121 do_reset = srpt_test_and_set_ch_state(ch,
2122 CH_CONNECTING, CH_DRAINING) ||
2123 srpt_test_and_set_ch_state(ch,
2124 CH_LIVE, CH_DRAINING) ||
2125 srpt_test_and_set_ch_state(ch,
2126 CH_DISCONNECTING, CH_DRAINING);
2127 break;
2128 }
2129 }
2130 spin_unlock_irq(&sdev->spinlock);
2131 1941
2132 if (do_reset) { 1942 if (ret < 0 && srpt_close_ch(ch))
2133 if (ch->sess) 1943 ret = 0;
2134 srpt_shutdown_session(ch->sess);
2135 1944
2136 ret = srpt_ch_qp_err(ch); 1945 return ret;
2137 if (ret < 0)
2138 pr_err("Setting queue pair in error state"
2139 " failed: %d\n", ret);
2140 }
2141} 1946}
2142 1947
2143/** 1948static void __srpt_close_all_ch(struct srpt_device *sdev)
2144 * srpt_find_channel() - Look up an RDMA channel.
2145 * @cm_id: Pointer to the CM ID of the channel to be looked up.
2146 *
2147 * Return NULL if no matching RDMA channel has been found.
2148 */
2149static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev,
2150 struct ib_cm_id *cm_id)
2151{ 1949{
2152 struct srpt_rdma_ch *ch; 1950 struct srpt_rdma_ch *ch;
2153 bool found;
2154 1951
2155 WARN_ON_ONCE(irqs_disabled()); 1952 lockdep_assert_held(&sdev->mutex);
2156 BUG_ON(!sdev);
2157 1953
2158 found = false;
2159 spin_lock_irq(&sdev->spinlock);
2160 list_for_each_entry(ch, &sdev->rch_list, list) { 1954 list_for_each_entry(ch, &sdev->rch_list, list) {
2161 if (ch->cm_id == cm_id) { 1955 if (srpt_disconnect_ch(ch) >= 0)
2162 found = true; 1956 pr_info("Closing channel %s-%d because target %s has been disabled\n",
2163 break; 1957 ch->sess_name, ch->qp->qp_num,
2164 } 1958 sdev->device->name);
1959 srpt_close_ch(ch);
2165 } 1960 }
2166 spin_unlock_irq(&sdev->spinlock);
2167
2168 return found ? ch : NULL;
2169} 1961}
2170 1962
2171/** 1963/**
2172 * srpt_release_channel() - Release channel resources. 1964 * srpt_shutdown_session() - Whether or not a session may be shut down.
2173 *
2174 * Schedules the actual release because:
2175 * - Calling the ib_destroy_cm_id() call from inside an IB CM callback would
2176 * trigger a deadlock.
2177 * - It is not safe to call TCM transport_* functions from interrupt context.
2178 */ 1965 */
2179static void srpt_release_channel(struct srpt_rdma_ch *ch) 1966static int srpt_shutdown_session(struct se_session *se_sess)
1967{
1968 return 1;
1969}
1970
1971static void srpt_free_ch(struct kref *kref)
2180{ 1972{
2181 schedule_work(&ch->release_work); 1973 struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref);
1974
1975 kfree(ch);
2182} 1976}
2183 1977
2184static void srpt_release_channel_work(struct work_struct *w) 1978static void srpt_release_channel_work(struct work_struct *w)
@@ -2188,8 +1982,8 @@ static void srpt_release_channel_work(struct work_struct *w)
2188 struct se_session *se_sess; 1982 struct se_session *se_sess;
2189 1983
2190 ch = container_of(w, struct srpt_rdma_ch, release_work); 1984 ch = container_of(w, struct srpt_rdma_ch, release_work);
2191 pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess, 1985 pr_debug("%s: %s-%d; release_done = %p\n", __func__, ch->sess_name,
2192 ch->release_done); 1986 ch->qp->qp_num, ch->release_done);
2193 1987
2194 sdev = ch->sport->sdev; 1988 sdev = ch->sport->sdev;
2195 BUG_ON(!sdev); 1989 BUG_ON(!sdev);
@@ -2197,6 +1991,7 @@ static void srpt_release_channel_work(struct work_struct *w)
2197 se_sess = ch->sess; 1991 se_sess = ch->sess;
2198 BUG_ON(!se_sess); 1992 BUG_ON(!se_sess);
2199 1993
1994 target_sess_cmd_list_set_waiting(se_sess);
2200 target_wait_for_sess_cmds(se_sess); 1995 target_wait_for_sess_cmds(se_sess);
2201 1996
2202 transport_deregister_session_configfs(se_sess); 1997 transport_deregister_session_configfs(se_sess);
@@ -2211,16 +2006,15 @@ static void srpt_release_channel_work(struct work_struct *w)
2211 ch->sport->sdev, ch->rq_size, 2006 ch->sport->sdev, ch->rq_size,
2212 ch->rsp_size, DMA_TO_DEVICE); 2007 ch->rsp_size, DMA_TO_DEVICE);
2213 2008
2214 spin_lock_irq(&sdev->spinlock); 2009 mutex_lock(&sdev->mutex);
2215 list_del(&ch->list); 2010 list_del_init(&ch->list);
2216 spin_unlock_irq(&sdev->spinlock);
2217
2218 if (ch->release_done) 2011 if (ch->release_done)
2219 complete(ch->release_done); 2012 complete(ch->release_done);
2013 mutex_unlock(&sdev->mutex);
2220 2014
2221 wake_up(&sdev->ch_releaseQ); 2015 wake_up(&sdev->ch_releaseQ);
2222 2016
2223 kfree(ch); 2017 kref_put(&ch->kref, srpt_free_ch);
2224} 2018}
2225 2019
2226/** 2020/**
@@ -2266,9 +2060,9 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2266 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]), 2060 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
2267 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8])); 2061 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
2268 2062
2269 rsp = kzalloc(sizeof *rsp, GFP_KERNEL); 2063 rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
2270 rej = kzalloc(sizeof *rej, GFP_KERNEL); 2064 rej = kzalloc(sizeof(*rej), GFP_KERNEL);
2271 rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL); 2065 rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL);
2272 2066
2273 if (!rsp || !rej || !rep_param) { 2067 if (!rsp || !rej || !rep_param) {
2274 ret = -ENOMEM; 2068 ret = -ENOMEM;
@@ -2297,7 +2091,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2297 if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { 2091 if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
2298 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN; 2092 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
2299 2093
2300 spin_lock_irq(&sdev->spinlock); 2094 mutex_lock(&sdev->mutex);
2301 2095
2302 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) { 2096 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2303 if (!memcmp(ch->i_port_id, req->initiator_port_id, 16) 2097 if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
@@ -2305,26 +2099,16 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2305 && param->port == ch->sport->port 2099 && param->port == ch->sport->port
2306 && param->listen_id == ch->sport->sdev->cm_id 2100 && param->listen_id == ch->sport->sdev->cm_id
2307 && ch->cm_id) { 2101 && ch->cm_id) {
2308 enum rdma_ch_state ch_state; 2102 if (srpt_disconnect_ch(ch) < 0)
2309
2310 ch_state = srpt_get_ch_state(ch);
2311 if (ch_state != CH_CONNECTING
2312 && ch_state != CH_LIVE)
2313 continue; 2103 continue;
2314 2104 pr_info("Relogin - closed existing channel %s\n",
2315 /* found an existing channel */ 2105 ch->sess_name);
2316 pr_debug("Found existing channel %s"
2317 " cm_id= %p state= %d\n",
2318 ch->sess_name, ch->cm_id, ch_state);
2319
2320 __srpt_close_ch(ch);
2321
2322 rsp->rsp_flags = 2106 rsp->rsp_flags =
2323 SRP_LOGIN_RSP_MULTICHAN_TERMINATED; 2107 SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
2324 } 2108 }
2325 } 2109 }
2326 2110
2327 spin_unlock_irq(&sdev->spinlock); 2111 mutex_unlock(&sdev->mutex);
2328 2112
2329 } else 2113 } else
2330 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; 2114 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
@@ -2340,7 +2124,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2340 goto reject; 2124 goto reject;
2341 } 2125 }
2342 2126
2343 ch = kzalloc(sizeof *ch, GFP_KERNEL); 2127 ch = kzalloc(sizeof(*ch), GFP_KERNEL);
2344 if (!ch) { 2128 if (!ch) {
2345 rej->reason = cpu_to_be32( 2129 rej->reason = cpu_to_be32(
2346 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2130 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
@@ -2349,11 +2133,14 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2349 goto reject; 2133 goto reject;
2350 } 2134 }
2351 2135
2136 kref_init(&ch->kref);
2137 ch->zw_cqe.done = srpt_zerolength_write_done;
2352 INIT_WORK(&ch->release_work, srpt_release_channel_work); 2138 INIT_WORK(&ch->release_work, srpt_release_channel_work);
2353 memcpy(ch->i_port_id, req->initiator_port_id, 16); 2139 memcpy(ch->i_port_id, req->initiator_port_id, 16);
2354 memcpy(ch->t_port_id, req->target_port_id, 16); 2140 memcpy(ch->t_port_id, req->target_port_id, 16);
2355 ch->sport = &sdev->port[param->port - 1]; 2141 ch->sport = &sdev->port[param->port - 1];
2356 ch->cm_id = cm_id; 2142 ch->cm_id = cm_id;
2143 cm_id->context = ch;
2357 /* 2144 /*
2358 * Avoid QUEUE_FULL conditions by limiting the number of buffers used 2145 * Avoid QUEUE_FULL conditions by limiting the number of buffers used
2359 * for the SRP protocol to the command queue size. 2146 * for the SRP protocol to the command queue size.
@@ -2453,7 +2240,7 @@ try_again:
2453 /* create cm reply */ 2240 /* create cm reply */
2454 rep_param->qp_num = ch->qp->qp_num; 2241 rep_param->qp_num = ch->qp->qp_num;
2455 rep_param->private_data = (void *)rsp; 2242 rep_param->private_data = (void *)rsp;
2456 rep_param->private_data_len = sizeof *rsp; 2243 rep_param->private_data_len = sizeof(*rsp);
2457 rep_param->rnr_retry_count = 7; 2244 rep_param->rnr_retry_count = 7;
2458 rep_param->flow_control = 1; 2245 rep_param->flow_control = 1;
2459 rep_param->failover_accepted = 0; 2246 rep_param->failover_accepted = 0;
@@ -2468,14 +2255,14 @@ try_again:
2468 goto release_channel; 2255 goto release_channel;
2469 } 2256 }
2470 2257
2471 spin_lock_irq(&sdev->spinlock); 2258 mutex_lock(&sdev->mutex);
2472 list_add_tail(&ch->list, &sdev->rch_list); 2259 list_add_tail(&ch->list, &sdev->rch_list);
2473 spin_unlock_irq(&sdev->spinlock); 2260 mutex_unlock(&sdev->mutex);
2474 2261
2475 goto out; 2262 goto out;
2476 2263
2477release_channel: 2264release_channel:
2478 srpt_set_ch_state(ch, CH_RELEASING); 2265 srpt_disconnect_ch(ch);
2479 transport_deregister_session_configfs(ch->sess); 2266 transport_deregister_session_configfs(ch->sess);
2480 transport_deregister_session(ch->sess); 2267 transport_deregister_session(ch->sess);
2481 ch->sess = NULL; 2268 ch->sess = NULL;
@@ -2497,7 +2284,7 @@ reject:
2497 | SRP_BUF_FORMAT_INDIRECT); 2284 | SRP_BUF_FORMAT_INDIRECT);
2498 2285
2499 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, 2286 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
2500 (void *)rej, sizeof *rej); 2287 (void *)rej, sizeof(*rej));
2501 2288
2502out: 2289out:
2503 kfree(rep_param); 2290 kfree(rep_param);
@@ -2507,10 +2294,23 @@ out:
2507 return ret; 2294 return ret;
2508} 2295}
2509 2296
2510static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) 2297static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
2298 enum ib_cm_rej_reason reason,
2299 const u8 *private_data,
2300 u8 private_data_len)
2511{ 2301{
2512 pr_info("Received IB REJ for cm_id %p.\n", cm_id); 2302 char *priv = NULL;
2513 srpt_drain_channel(cm_id); 2303 int i;
2304
2305 if (private_data_len && (priv = kmalloc(private_data_len * 3 + 1,
2306 GFP_KERNEL))) {
2307 for (i = 0; i < private_data_len; i++)
2308 sprintf(priv + 3 * i, " %02x", private_data[i]);
2309 }
2310 pr_info("Received CM REJ for ch %s-%d; reason %d%s%s.\n",
2311 ch->sess_name, ch->qp->qp_num, reason, private_data_len ?
2312 "; private data" : "", priv ? priv : " (?)");
2313 kfree(priv);
2514} 2314}
2515 2315
2516/** 2316/**
@@ -2519,87 +2319,23 @@ static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
2519 * An IB_CM_RTU_RECEIVED message indicates that the connection is established 2319 * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2520 * and that the recipient may begin transmitting (RTU = ready to use). 2320 * and that the recipient may begin transmitting (RTU = ready to use).
2521 */ 2321 */
2522static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) 2322static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch)
2523{ 2323{
2524 struct srpt_rdma_ch *ch;
2525 int ret; 2324 int ret;
2526 2325
2527 ch = srpt_find_channel(cm_id->context, cm_id); 2326 if (srpt_set_ch_state(ch, CH_LIVE)) {
2528 BUG_ON(!ch);
2529
2530 if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) {
2531 struct srpt_recv_ioctx *ioctx, *ioctx_tmp;
2532
2533 ret = srpt_ch_qp_rts(ch, ch->qp); 2327 ret = srpt_ch_qp_rts(ch, ch->qp);
2534 2328
2535 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list, 2329 if (ret == 0) {
2536 wait_list) { 2330 /* Trigger wait list processing. */
2537 list_del(&ioctx->wait_list); 2331 ret = srpt_zerolength_write(ch);
2538 srpt_handle_new_iu(ch, ioctx, NULL); 2332 WARN_ONCE(ret < 0, "%d\n", ret);
2539 } 2333 } else {
2540 if (ret)
2541 srpt_close_ch(ch); 2334 srpt_close_ch(ch);
2335 }
2542 } 2336 }
2543} 2337}
2544 2338
2545static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2546{
2547 pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id);
2548 srpt_drain_channel(cm_id);
2549}
2550
2551static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2552{
2553 pr_info("Received IB REP error for cm_id %p.\n", cm_id);
2554 srpt_drain_channel(cm_id);
2555}
2556
2557/**
2558 * srpt_cm_dreq_recv() - Process reception of a DREQ message.
2559 */
2560static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2561{
2562 struct srpt_rdma_ch *ch;
2563 unsigned long flags;
2564 bool send_drep = false;
2565
2566 ch = srpt_find_channel(cm_id->context, cm_id);
2567 BUG_ON(!ch);
2568
2569 pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch));
2570
2571 spin_lock_irqsave(&ch->spinlock, flags);
2572 switch (ch->state) {
2573 case CH_CONNECTING:
2574 case CH_LIVE:
2575 send_drep = true;
2576 ch->state = CH_DISCONNECTING;
2577 break;
2578 case CH_DISCONNECTING:
2579 case CH_DRAINING:
2580 case CH_RELEASING:
2581 WARN(true, "unexpected channel state %d\n", ch->state);
2582 break;
2583 }
2584 spin_unlock_irqrestore(&ch->spinlock, flags);
2585
2586 if (send_drep) {
2587 if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
2588 pr_err("Sending IB DREP failed.\n");
2589 pr_info("Received DREQ and sent DREP for session %s.\n",
2590 ch->sess_name);
2591 }
2592}
2593
2594/**
2595 * srpt_cm_drep_recv() - Process reception of a DREP message.
2596 */
2597static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2598{
2599 pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id);
2600 srpt_drain_channel(cm_id);
2601}
2602
2603/** 2339/**
2604 * srpt_cm_handler() - IB connection manager callback function. 2340 * srpt_cm_handler() - IB connection manager callback function.
2605 * 2341 *
@@ -2612,6 +2348,7 @@ static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2612 */ 2348 */
2613static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 2349static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2614{ 2350{
2351 struct srpt_rdma_ch *ch = cm_id->context;
2615 int ret; 2352 int ret;
2616 2353
2617 ret = 0; 2354 ret = 0;
@@ -2621,32 +2358,39 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2621 event->private_data); 2358 event->private_data);
2622 break; 2359 break;
2623 case IB_CM_REJ_RECEIVED: 2360 case IB_CM_REJ_RECEIVED:
2624 srpt_cm_rej_recv(cm_id); 2361 srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason,
2362 event->private_data,
2363 IB_CM_REJ_PRIVATE_DATA_SIZE);
2625 break; 2364 break;
2626 case IB_CM_RTU_RECEIVED: 2365 case IB_CM_RTU_RECEIVED:
2627 case IB_CM_USER_ESTABLISHED: 2366 case IB_CM_USER_ESTABLISHED:
2628 srpt_cm_rtu_recv(cm_id); 2367 srpt_cm_rtu_recv(ch);
2629 break; 2368 break;
2630 case IB_CM_DREQ_RECEIVED: 2369 case IB_CM_DREQ_RECEIVED:
2631 srpt_cm_dreq_recv(cm_id); 2370 srpt_disconnect_ch(ch);
2632 break; 2371 break;
2633 case IB_CM_DREP_RECEIVED: 2372 case IB_CM_DREP_RECEIVED:
2634 srpt_cm_drep_recv(cm_id); 2373 pr_info("Received CM DREP message for ch %s-%d.\n",
2374 ch->sess_name, ch->qp->qp_num);
2375 srpt_close_ch(ch);
2635 break; 2376 break;
2636 case IB_CM_TIMEWAIT_EXIT: 2377 case IB_CM_TIMEWAIT_EXIT:
2637 srpt_cm_timewait_exit(cm_id); 2378 pr_info("Received CM TimeWait exit for ch %s-%d.\n",
2379 ch->sess_name, ch->qp->qp_num);
2380 srpt_close_ch(ch);
2638 break; 2381 break;
2639 case IB_CM_REP_ERROR: 2382 case IB_CM_REP_ERROR:
2640 srpt_cm_rep_error(cm_id); 2383 pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name,
2384 ch->qp->qp_num);
2641 break; 2385 break;
2642 case IB_CM_DREQ_ERROR: 2386 case IB_CM_DREQ_ERROR:
2643 pr_info("Received IB DREQ ERROR event.\n"); 2387 pr_info("Received CM DREQ ERROR event.\n");
2644 break; 2388 break;
2645 case IB_CM_MRA_RECEIVED: 2389 case IB_CM_MRA_RECEIVED:
2646 pr_info("Received IB MRA event\n"); 2390 pr_info("Received CM MRA event\n");
2647 break; 2391 break;
2648 default: 2392 default:
2649 pr_err("received unrecognized IB CM event %d\n", event->event); 2393 pr_err("received unrecognized CM event %d\n", event->event);
2650 break; 2394 break;
2651 } 2395 }
2652 2396
@@ -2755,41 +2499,14 @@ static int srpt_write_pending_status(struct se_cmd *se_cmd)
2755 */ 2499 */
2756static int srpt_write_pending(struct se_cmd *se_cmd) 2500static int srpt_write_pending(struct se_cmd *se_cmd)
2757{ 2501{
2758 struct srpt_rdma_ch *ch; 2502 struct srpt_send_ioctx *ioctx =
2759 struct srpt_send_ioctx *ioctx; 2503 container_of(se_cmd, struct srpt_send_ioctx, cmd);
2504 struct srpt_rdma_ch *ch = ioctx->ch;
2760 enum srpt_command_state new_state; 2505 enum srpt_command_state new_state;
2761 enum rdma_ch_state ch_state;
2762 int ret;
2763
2764 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
2765 2506
2766 new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); 2507 new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
2767 WARN_ON(new_state == SRPT_STATE_DONE); 2508 WARN_ON(new_state == SRPT_STATE_DONE);
2768 2509 return srpt_xfer_data(ch, ioctx);
2769 ch = ioctx->ch;
2770 BUG_ON(!ch);
2771
2772 ch_state = srpt_get_ch_state(ch);
2773 switch (ch_state) {
2774 case CH_CONNECTING:
2775 WARN(true, "unexpected channel state %d\n", ch_state);
2776 ret = -EINVAL;
2777 goto out;
2778 case CH_LIVE:
2779 break;
2780 case CH_DISCONNECTING:
2781 case CH_DRAINING:
2782 case CH_RELEASING:
2783 pr_debug("cmd with tag %lld: channel disconnecting\n",
2784 ioctx->cmd.tag);
2785 srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN);
2786 ret = -EINVAL;
2787 goto out;
2788 }
2789 ret = srpt_xfer_data(ch, ioctx);
2790
2791out:
2792 return ret;
2793} 2510}
2794 2511
2795static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) 2512static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
@@ -2920,36 +2637,25 @@ static void srpt_refresh_port_work(struct work_struct *work)
2920 srpt_refresh_port(sport); 2637 srpt_refresh_port(sport);
2921} 2638}
2922 2639
2923static int srpt_ch_list_empty(struct srpt_device *sdev)
2924{
2925 int res;
2926
2927 spin_lock_irq(&sdev->spinlock);
2928 res = list_empty(&sdev->rch_list);
2929 spin_unlock_irq(&sdev->spinlock);
2930
2931 return res;
2932}
2933
2934/** 2640/**
2935 * srpt_release_sdev() - Free the channel resources associated with a target. 2641 * srpt_release_sdev() - Free the channel resources associated with a target.
2936 */ 2642 */
2937static int srpt_release_sdev(struct srpt_device *sdev) 2643static int srpt_release_sdev(struct srpt_device *sdev)
2938{ 2644{
2939 struct srpt_rdma_ch *ch, *tmp_ch; 2645 int i, res;
2940 int res;
2941 2646
2942 WARN_ON_ONCE(irqs_disabled()); 2647 WARN_ON_ONCE(irqs_disabled());
2943 2648
2944 BUG_ON(!sdev); 2649 BUG_ON(!sdev);
2945 2650
2946 spin_lock_irq(&sdev->spinlock); 2651 mutex_lock(&sdev->mutex);
2947 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) 2652 for (i = 0; i < ARRAY_SIZE(sdev->port); i++)
2948 __srpt_close_ch(ch); 2653 sdev->port[i].enabled = false;
2949 spin_unlock_irq(&sdev->spinlock); 2654 __srpt_close_all_ch(sdev);
2655 mutex_unlock(&sdev->mutex);
2950 2656
2951 res = wait_event_interruptible(sdev->ch_releaseQ, 2657 res = wait_event_interruptible(sdev->ch_releaseQ,
2952 srpt_ch_list_empty(sdev)); 2658 list_empty_careful(&sdev->rch_list));
2953 if (res) 2659 if (res)
2954 pr_err("%s: interrupted.\n", __func__); 2660 pr_err("%s: interrupted.\n", __func__);
2955 2661
@@ -3003,14 +2709,14 @@ static void srpt_add_one(struct ib_device *device)
3003 pr_debug("device = %p, device->dma_ops = %p\n", device, 2709 pr_debug("device = %p, device->dma_ops = %p\n", device,
3004 device->dma_ops); 2710 device->dma_ops);
3005 2711
3006 sdev = kzalloc(sizeof *sdev, GFP_KERNEL); 2712 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
3007 if (!sdev) 2713 if (!sdev)
3008 goto err; 2714 goto err;
3009 2715
3010 sdev->device = device; 2716 sdev->device = device;
3011 INIT_LIST_HEAD(&sdev->rch_list); 2717 INIT_LIST_HEAD(&sdev->rch_list);
3012 init_waitqueue_head(&sdev->ch_releaseQ); 2718 init_waitqueue_head(&sdev->ch_releaseQ);
3013 spin_lock_init(&sdev->spinlock); 2719 mutex_init(&sdev->mutex);
3014 2720
3015 sdev->pd = ib_alloc_pd(device); 2721 sdev->pd = ib_alloc_pd(device);
3016 if (IS_ERR(sdev->pd)) 2722 if (IS_ERR(sdev->pd))
@@ -3082,7 +2788,7 @@ static void srpt_add_one(struct ib_device *device)
3082 2788
3083 if (srpt_refresh_port(sport)) { 2789 if (srpt_refresh_port(sport)) {
3084 pr_err("MAD registration failed for %s-%d.\n", 2790 pr_err("MAD registration failed for %s-%d.\n",
3085 srpt_sdev_name(sdev), i); 2791 sdev->device->name, i);
3086 goto err_ring; 2792 goto err_ring;
3087 } 2793 }
3088 snprintf(sport->port_guid, sizeof(sport->port_guid), 2794 snprintf(sport->port_guid, sizeof(sport->port_guid),
@@ -3231,24 +2937,26 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
3231static void srpt_close_session(struct se_session *se_sess) 2937static void srpt_close_session(struct se_session *se_sess)
3232{ 2938{
3233 DECLARE_COMPLETION_ONSTACK(release_done); 2939 DECLARE_COMPLETION_ONSTACK(release_done);
3234 struct srpt_rdma_ch *ch; 2940 struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr;
3235 struct srpt_device *sdev; 2941 struct srpt_device *sdev = ch->sport->sdev;
3236 unsigned long res; 2942 bool wait;
3237
3238 ch = se_sess->fabric_sess_ptr;
3239 WARN_ON(ch->sess != se_sess);
3240 2943
3241 pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch)); 2944 pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num,
2945 ch->state);
3242 2946
3243 sdev = ch->sport->sdev; 2947 mutex_lock(&sdev->mutex);
3244 spin_lock_irq(&sdev->spinlock);
3245 BUG_ON(ch->release_done); 2948 BUG_ON(ch->release_done);
3246 ch->release_done = &release_done; 2949 ch->release_done = &release_done;
3247 __srpt_close_ch(ch); 2950 wait = !list_empty(&ch->list);
3248 spin_unlock_irq(&sdev->spinlock); 2951 srpt_disconnect_ch(ch);
2952 mutex_unlock(&sdev->mutex);
3249 2953
3250 res = wait_for_completion_timeout(&release_done, 60 * HZ); 2954 if (!wait)
3251 WARN_ON(res == 0); 2955 return;
2956
2957 while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0)
2958 pr_info("%s(%s-%d state %d): still waiting ...\n", __func__,
2959 ch->sess_name, ch->qp->qp_num, ch->state);
3252} 2960}
3253 2961
3254/** 2962/**
@@ -3456,6 +3164,8 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
3456{ 3164{
3457 struct se_portal_group *se_tpg = to_tpg(item); 3165 struct se_portal_group *se_tpg = to_tpg(item);
3458 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); 3166 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3167 struct srpt_device *sdev = sport->sdev;
3168 struct srpt_rdma_ch *ch;
3459 unsigned long tmp; 3169 unsigned long tmp;
3460 int ret; 3170 int ret;
3461 3171
@@ -3469,11 +3179,24 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
3469 pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp); 3179 pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
3470 return -EINVAL; 3180 return -EINVAL;
3471 } 3181 }
3472 if (tmp == 1) 3182 if (sport->enabled == tmp)
3473 sport->enabled = true; 3183 goto out;
3474 else 3184 sport->enabled = tmp;
3475 sport->enabled = false; 3185 if (sport->enabled)
3186 goto out;
3187
3188 mutex_lock(&sdev->mutex);
3189 list_for_each_entry(ch, &sdev->rch_list, list) {
3190 if (ch->sport == sport) {
3191 pr_debug("%s: ch %p %s-%d\n", __func__, ch,
3192 ch->sess_name, ch->qp->qp_num);
3193 srpt_disconnect_ch(ch);
3194 srpt_close_ch(ch);
3195 }
3196 }
3197 mutex_unlock(&sdev->mutex);
3476 3198
3199out:
3477 return count; 3200 return count;
3478} 3201}
3479 3202
@@ -3565,7 +3288,6 @@ static struct configfs_attribute *srpt_wwn_attrs[] = {
3565static const struct target_core_fabric_ops srpt_template = { 3288static const struct target_core_fabric_ops srpt_template = {
3566 .module = THIS_MODULE, 3289 .module = THIS_MODULE,
3567 .name = "srpt", 3290 .name = "srpt",
3568 .node_acl_size = sizeof(struct srpt_node_acl),
3569 .get_fabric_name = srpt_get_fabric_name, 3291 .get_fabric_name = srpt_get_fabric_name,
3570 .tpg_get_wwn = srpt_get_fabric_wwn, 3292 .tpg_get_wwn = srpt_get_fabric_wwn,
3571 .tpg_get_tag = srpt_get_tag, 3293 .tpg_get_tag = srpt_get_tag,
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 09037f2b0b51..af9b8b527340 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -218,20 +218,20 @@ struct srpt_send_ioctx {
218 218
219/** 219/**
220 * enum rdma_ch_state - SRP channel state. 220 * enum rdma_ch_state - SRP channel state.
221 * @CH_CONNECTING: QP is in RTR state; waiting for RTU. 221 * @CH_CONNECTING: QP is in RTR state; waiting for RTU.
222 * @CH_LIVE: QP is in RTS state. 222 * @CH_LIVE: QP is in RTS state.
223 * @CH_DISCONNECTING: DREQ has been received; waiting for DREP 223 * @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has
224 * or DREQ has been send and waiting for DREP 224 * been received.
225 * or . 225 * @CH_DRAINING: DREP has been received or waiting for DREP timed out
226 * @CH_DRAINING: QP is in ERR state; waiting for last WQE event. 226 * and last work request has been queued.
227 * @CH_RELEASING: Last WQE event has been received; releasing resources. 227 * @CH_DISCONNECTED: Last completion has been received.
228 */ 228 */
229enum rdma_ch_state { 229enum rdma_ch_state {
230 CH_CONNECTING, 230 CH_CONNECTING,
231 CH_LIVE, 231 CH_LIVE,
232 CH_DISCONNECTING, 232 CH_DISCONNECTING,
233 CH_DRAINING, 233 CH_DRAINING,
234 CH_RELEASING 234 CH_DISCONNECTED,
235}; 235};
236 236
237/** 237/**
@@ -267,6 +267,8 @@ struct srpt_rdma_ch {
267 struct ib_cm_id *cm_id; 267 struct ib_cm_id *cm_id;
268 struct ib_qp *qp; 268 struct ib_qp *qp;
269 struct ib_cq *cq; 269 struct ib_cq *cq;
270 struct ib_cqe zw_cqe;
271 struct kref kref;
270 int rq_size; 272 int rq_size;
271 u32 rsp_size; 273 u32 rsp_size;
272 atomic_t sq_wr_avail; 274 atomic_t sq_wr_avail;
@@ -286,7 +288,6 @@ struct srpt_rdma_ch {
286 u8 sess_name[36]; 288 u8 sess_name[36];
287 struct work_struct release_work; 289 struct work_struct release_work;
288 struct completion *release_done; 290 struct completion *release_done;
289 bool in_shutdown;
290}; 291};
291 292
292/** 293/**
@@ -343,7 +344,7 @@ struct srpt_port {
343 * @ioctx_ring: Per-HCA SRQ. 344 * @ioctx_ring: Per-HCA SRQ.
344 * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. 345 * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
345 * @ch_releaseQ: Enables waiting for removal from rch_list. 346 * @ch_releaseQ: Enables waiting for removal from rch_list.
346 * @spinlock: Protects rch_list and tpg. 347 * @mutex: Protects rch_list.
347 * @port: Information about the ports owned by this HCA. 348 * @port: Information about the ports owned by this HCA.
348 * @event_handler: Per-HCA asynchronous IB event handler. 349 * @event_handler: Per-HCA asynchronous IB event handler.
349 * @list: Node in srpt_dev_list. 350 * @list: Node in srpt_dev_list.
@@ -357,18 +358,10 @@ struct srpt_device {
357 struct srpt_recv_ioctx **ioctx_ring; 358 struct srpt_recv_ioctx **ioctx_ring;
358 struct list_head rch_list; 359 struct list_head rch_list;
359 wait_queue_head_t ch_releaseQ; 360 wait_queue_head_t ch_releaseQ;
360 spinlock_t spinlock; 361 struct mutex mutex;
361 struct srpt_port port[2]; 362 struct srpt_port port[2];
362 struct ib_event_handler event_handler; 363 struct ib_event_handler event_handler;
363 struct list_head list; 364 struct list_head list;
364}; 365};
365 366
366/**
367 * struct srpt_node_acl - Per-initiator ACL data (managed via configfs).
368 * @nacl: Target core node ACL information.
369 */
370struct srpt_node_acl {
371 struct se_node_acl nacl;
372};
373
374#endif /* IB_SRPT_H */ 367#endif /* IB_SRPT_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index a072d341e205..1d2d1da40c80 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -1021,6 +1021,8 @@ struct cpl_l2t_write_req {
1021#define L2T_W_NOREPLY_V(x) ((x) << L2T_W_NOREPLY_S) 1021#define L2T_W_NOREPLY_V(x) ((x) << L2T_W_NOREPLY_S)
1022#define L2T_W_NOREPLY_F L2T_W_NOREPLY_V(1U) 1022#define L2T_W_NOREPLY_F L2T_W_NOREPLY_V(1U)
1023 1023
1024#define CPL_L2T_VLAN_NONE 0xfff
1025
1024struct cpl_l2t_write_rpl { 1026struct cpl_l2t_write_rpl {
1025 union opcode_tid ot; 1027 union opcode_tid ot;
1026 u8 status; 1028 u8 status;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index a32de30ea663..c8661c77b4e3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -561,6 +561,7 @@ enum fw_flowc_mnem {
561 FW_FLOWC_MNEM_SNDBUF, 561 FW_FLOWC_MNEM_SNDBUF,
562 FW_FLOWC_MNEM_MSS, 562 FW_FLOWC_MNEM_MSS,
563 FW_FLOWC_MNEM_TXDATAPLEN_MAX, 563 FW_FLOWC_MNEM_TXDATAPLEN_MAX,
564 FW_FLOWC_MNEM_SCHEDCLASS = 11,
564}; 565};
565 566
566struct fw_flowc_mnemval { 567struct fw_flowc_mnemval {
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index d66c690a8597..e97094598b2d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -157,7 +157,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
157 [29] = "802.1ad offload support", 157 [29] = "802.1ad offload support",
158 [31] = "Modifying loopback source checks using UPDATE_QP support", 158 [31] = "Modifying loopback source checks using UPDATE_QP support",
159 [32] = "Loopback source checks support", 159 [32] = "Loopback source checks support",
160 [33] = "RoCEv2 support" 160 [33] = "RoCEv2 support",
161 [34] = "DMFS Sniffer support (UC & MC)"
161 }; 162 };
162 int i; 163 int i;
163 164
@@ -810,6 +811,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
810 if (field & 0x80) 811 if (field & 0x80)
811 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; 812 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
812 dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; 813 dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
814 if (field & 0x20)
815 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER;
813 MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET); 816 MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET);
814 if (field & 0x80) 817 if (field & 0x80)
815 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON; 818 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 1d4e2e054647..42d8de892bfe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -752,8 +752,10 @@ static const u8 __promisc_mode[] = {
752 [MLX4_FS_REGULAR] = 0x0, 752 [MLX4_FS_REGULAR] = 0x0,
753 [MLX4_FS_ALL_DEFAULT] = 0x1, 753 [MLX4_FS_ALL_DEFAULT] = 0x1,
754 [MLX4_FS_MC_DEFAULT] = 0x3, 754 [MLX4_FS_MC_DEFAULT] = 0x3,
755 [MLX4_FS_UC_SNIFFER] = 0x4, 755 [MLX4_FS_MIRROR_RX_PORT] = 0x4,
756 [MLX4_FS_MC_SNIFFER] = 0x5, 756 [MLX4_FS_MIRROR_SX_PORT] = 0x5,
757 [MLX4_FS_UC_SNIFFER] = 0x6,
758 [MLX4_FS_MC_SNIFFER] = 0x7,
757}; 759};
758 760
759int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, 761int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 5b1753233c5d..81b2013ef968 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -516,7 +516,7 @@ struct mlx5e_priv {
516 struct mlx5_uar cq_uar; 516 struct mlx5_uar cq_uar;
517 u32 pdn; 517 u32 pdn;
518 u32 tdn; 518 u32 tdn;
519 struct mlx5_core_mr mr; 519 struct mlx5_core_mkey mkey;
520 struct mlx5e_rq drop_rq; 520 struct mlx5e_rq drop_rq;
521 521
522 struct mlx5e_channel **channel; 522 struct mlx5e_channel **channel;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 402994bf7e16..0c49951606b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -973,7 +973,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
973 c->cpu = cpu; 973 c->cpu = cpu;
974 c->pdev = &priv->mdev->pdev->dev; 974 c->pdev = &priv->mdev->pdev->dev;
975 c->netdev = priv->netdev; 975 c->netdev = priv->netdev;
976 c->mkey_be = cpu_to_be32(priv->mr.key); 976 c->mkey_be = cpu_to_be32(priv->mkey.key);
977 c->num_tc = priv->params.num_tc; 977 c->num_tc = priv->params.num_tc;
978 978
979 mlx5e_build_channeltc_to_txq_map(priv, ix); 979 mlx5e_build_channeltc_to_txq_map(priv, ix);
@@ -2204,7 +2204,7 @@ static void mlx5e_build_netdev(struct net_device *netdev)
2204} 2204}
2205 2205
2206static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, 2206static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
2207 struct mlx5_core_mr *mr) 2207 struct mlx5_core_mkey *mkey)
2208{ 2208{
2209 struct mlx5_core_dev *mdev = priv->mdev; 2209 struct mlx5_core_dev *mdev = priv->mdev;
2210 struct mlx5_create_mkey_mbox_in *in; 2210 struct mlx5_create_mkey_mbox_in *in;
@@ -2220,7 +2220,7 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
2220 in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); 2220 in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
2221 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 2221 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
2222 2222
2223 err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL, 2223 err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL,
2224 NULL); 2224 NULL);
2225 2225
2226 kvfree(in); 2226 kvfree(in);
@@ -2269,7 +2269,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
2269 goto err_dealloc_pd; 2269 goto err_dealloc_pd;
2270 } 2270 }
2271 2271
2272 err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); 2272 err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey);
2273 if (err) { 2273 if (err) {
2274 mlx5_core_err(mdev, "create mkey failed, %d\n", err); 2274 mlx5_core_err(mdev, "create mkey failed, %d\n", err);
2275 goto err_dealloc_transport_domain; 2275 goto err_dealloc_transport_domain;
@@ -2343,7 +2343,7 @@ err_destroy_tises:
2343 mlx5e_destroy_tises(priv); 2343 mlx5e_destroy_tises(priv);
2344 2344
2345err_destroy_mkey: 2345err_destroy_mkey:
2346 mlx5_core_destroy_mkey(mdev, &priv->mr); 2346 mlx5_core_destroy_mkey(mdev, &priv->mkey);
2347 2347
2348err_dealloc_transport_domain: 2348err_dealloc_transport_domain:
2349 mlx5_core_dealloc_transport_domain(mdev, priv->tdn); 2349 mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
@@ -2377,7 +2377,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
2377 mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); 2377 mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT);
2378 mlx5e_close_drop_rq(priv); 2378 mlx5e_close_drop_rq(priv);
2379 mlx5e_destroy_tises(priv); 2379 mlx5e_destroy_tises(priv);
2380 mlx5_core_destroy_mkey(priv->mdev, &priv->mr); 2380 mlx5_core_destroy_mkey(priv->mdev, &priv->mkey);
2381 mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); 2381 mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
2382 mlx5_core_dealloc_pd(priv->mdev, priv->pdn); 2382 mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
2383 mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); 2383 mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6f68dba8d7ed..bf3446794bd5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -77,6 +77,9 @@
77#define KERNEL_NUM_PRIOS 1 77#define KERNEL_NUM_PRIOS 1
78#define KENREL_MIN_LEVEL 2 78#define KENREL_MIN_LEVEL 2
79 79
80#define ANCHOR_MAX_FT 1
81#define ANCHOR_NUM_PRIOS 1
82#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
80struct node_caps { 83struct node_caps {
81 size_t arr_sz; 84 size_t arr_sz;
82 long *caps; 85 long *caps;
@@ -92,7 +95,7 @@ static struct init_tree_node {
92 int max_ft; 95 int max_ft;
93} root_fs = { 96} root_fs = {
94 .type = FS_TYPE_NAMESPACE, 97 .type = FS_TYPE_NAMESPACE,
95 .ar_size = 3, 98 .ar_size = 4,
96 .children = (struct init_tree_node[]) { 99 .children = (struct init_tree_node[]) {
97 ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, 100 ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
98 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), 101 FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en),
@@ -108,6 +111,8 @@ static struct init_tree_node {
108 FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), 111 FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode),
109 FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), 112 FS_CAP(flow_table_properties_nic_receive.flow_table_modify)),
110 ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), 113 ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))),
114 ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
115 ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))),
111 } 116 }
112}; 117};
113 118
@@ -196,8 +201,10 @@ static void tree_put_node(struct fs_node *node)
196 201
197static int tree_remove_node(struct fs_node *node) 202static int tree_remove_node(struct fs_node *node)
198{ 203{
199 if (atomic_read(&node->refcount) > 1) 204 if (atomic_read(&node->refcount) > 1) {
200 return -EPERM; 205 atomic_dec(&node->refcount);
206 return -EEXIST;
207 }
201 tree_put_node(node); 208 tree_put_node(node);
202 return 0; 209 return 0;
203} 210}
@@ -360,6 +367,11 @@ static void del_rule(struct fs_node *node)
360 memcpy(match_value, fte->val, sizeof(fte->val)); 367 memcpy(match_value, fte->val, sizeof(fte->val));
361 fs_get_obj(ft, fg->node.parent); 368 fs_get_obj(ft, fg->node.parent);
362 list_del(&rule->node.list); 369 list_del(&rule->node.list);
370 if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
371 mutex_lock(&rule->dest_attr.ft->lock);
372 list_del(&rule->next_ft);
373 mutex_unlock(&rule->dest_attr.ft->lock);
374 }
363 fte->dests_size--; 375 fte->dests_size--;
364 if (fte->dests_size) { 376 if (fte->dests_size) {
365 err = mlx5_cmd_update_fte(dev, ft, 377 err = mlx5_cmd_update_fte(dev, ft,
@@ -465,6 +477,8 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte,
465 ft->node.type = FS_TYPE_FLOW_TABLE; 477 ft->node.type = FS_TYPE_FLOW_TABLE;
466 ft->type = table_type; 478 ft->type = table_type;
467 ft->max_fte = max_fte; 479 ft->max_fte = max_fte;
480 INIT_LIST_HEAD(&ft->fwd_rules);
481 mutex_init(&ft->lock);
468 482
469 return ft; 483 return ft;
470} 484}
@@ -601,9 +615,63 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
601 return err; 615 return err;
602} 616}
603 617
618static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
619 struct mlx5_flow_destination *dest)
620{
621 struct mlx5_flow_table *ft;
622 struct mlx5_flow_group *fg;
623 struct fs_fte *fte;
624 int err = 0;
625
626 fs_get_obj(fte, rule->node.parent);
627 if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
628 return -EINVAL;
629 lock_ref_node(&fte->node);
630 fs_get_obj(fg, fte->node.parent);
631 fs_get_obj(ft, fg->node.parent);
632
633 memcpy(&rule->dest_attr, dest, sizeof(*dest));
634 err = mlx5_cmd_update_fte(get_dev(&ft->node),
635 ft, fg->id, fte);
636 unlock_ref_node(&fte->node);
637
638 return err;
639}
640
641/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */
642static int connect_fwd_rules(struct mlx5_core_dev *dev,
643 struct mlx5_flow_table *new_next_ft,
644 struct mlx5_flow_table *old_next_ft)
645{
646 struct mlx5_flow_destination dest;
647 struct mlx5_flow_rule *iter;
648 int err = 0;
649
650 /* new_next_ft and old_next_ft could be NULL only
651 * when we create/destroy the anchor flow table.
652 */
653 if (!new_next_ft || !old_next_ft)
654 return 0;
655
656 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
657 dest.ft = new_next_ft;
658
659 mutex_lock(&old_next_ft->lock);
660 list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
661 mutex_unlock(&old_next_ft->lock);
662 list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
663 err = mlx5_modify_rule_destination(iter, &dest);
664 if (err)
665 pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
666 new_next_ft->id);
667 }
668 return 0;
669}
670
604static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, 671static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
605 struct fs_prio *prio) 672 struct fs_prio *prio)
606{ 673{
674 struct mlx5_flow_table *next_ft;
607 int err = 0; 675 int err = 0;
608 676
609 /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ 677 /* Connect_prev_fts and update_root_ft_create are mutually exclusive */
@@ -612,6 +680,11 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table
612 err = connect_prev_fts(dev, ft, prio); 680 err = connect_prev_fts(dev, ft, prio);
613 if (err) 681 if (err)
614 return err; 682 return err;
683
684 next_ft = find_next_chained_ft(prio);
685 err = connect_fwd_rules(dev, ft, next_ft);
686 if (err)
687 return err;
615 } 688 }
616 689
617 if (MLX5_CAP_FLOWTABLE(dev, 690 if (MLX5_CAP_FLOWTABLE(dev,
@@ -762,6 +835,7 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
762 if (!rule) 835 if (!rule)
763 return NULL; 836 return NULL;
764 837
838 INIT_LIST_HEAD(&rule->next_ft);
765 rule->node.type = FS_TYPE_FLOW_DEST; 839 rule->node.type = FS_TYPE_FLOW_DEST;
766 memcpy(&rule->dest_attr, dest, sizeof(*dest)); 840 memcpy(&rule->dest_attr, dest, sizeof(*dest));
767 841
@@ -782,9 +856,14 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte,
782 return ERR_PTR(-ENOMEM); 856 return ERR_PTR(-ENOMEM);
783 857
784 fs_get_obj(ft, fg->node.parent); 858 fs_get_obj(ft, fg->node.parent);
785 /* Add dest to dests list- added as first element after the head */ 859 /* Add dest to dests list- we need flow tables to be in the
860 * end of the list for forward to next prio rules.
861 */
786 tree_init_node(&rule->node, 1, del_rule); 862 tree_init_node(&rule->node, 1, del_rule);
787 list_add_tail(&rule->node.list, &fte->node.children); 863 if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
864 list_add(&rule->node.list, &fte->node.children);
865 else
866 list_add_tail(&rule->node.list, &fte->node.children);
788 fte->dests_size++; 867 fte->dests_size++;
789 if (fte->dests_size == 1) 868 if (fte->dests_size == 1)
790 err = mlx5_cmd_create_fte(get_dev(&ft->node), 869 err = mlx5_cmd_create_fte(get_dev(&ft->node),
@@ -903,6 +982,25 @@ out:
903 return fg; 982 return fg;
904} 983}
905 984
985static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
986 struct mlx5_flow_destination *dest)
987{
988 struct mlx5_flow_rule *rule;
989
990 list_for_each_entry(rule, &fte->node.children, node.list) {
991 if (rule->dest_attr.type == dest->type) {
992 if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
993 dest->vport_num == rule->dest_attr.vport_num) ||
994 (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
995 dest->ft == rule->dest_attr.ft) ||
996 (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
997 dest->tir_num == rule->dest_attr.tir_num))
998 return rule;
999 }
1000 }
1001 return NULL;
1002}
1003
906static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, 1004static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
907 u32 *match_value, 1005 u32 *match_value,
908 u8 action, 1006 u8 action,
@@ -919,6 +1017,13 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
919 nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); 1017 nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
920 if (compare_match_value(&fg->mask, match_value, &fte->val) && 1018 if (compare_match_value(&fg->mask, match_value, &fte->val) &&
921 action == fte->action && flow_tag == fte->flow_tag) { 1019 action == fte->action && flow_tag == fte->flow_tag) {
1020 rule = find_flow_rule(fte, dest);
1021 if (rule) {
1022 atomic_inc(&rule->node.refcount);
1023 unlock_ref_node(&fte->node);
1024 unlock_ref_node(&fg->node);
1025 return rule;
1026 }
922 rule = add_rule_fte(fte, fg, dest); 1027 rule = add_rule_fte(fte, fg, dest);
923 unlock_ref_node(&fte->node); 1028 unlock_ref_node(&fte->node);
924 if (IS_ERR(rule)) 1029 if (IS_ERR(rule))
@@ -984,14 +1089,14 @@ static struct mlx5_flow_rule *add_rule_to_auto_fg(struct mlx5_flow_table *ft,
984 return rule; 1089 return rule;
985} 1090}
986 1091
987struct mlx5_flow_rule * 1092static struct mlx5_flow_rule *
988mlx5_add_flow_rule(struct mlx5_flow_table *ft, 1093_mlx5_add_flow_rule(struct mlx5_flow_table *ft,
989 u8 match_criteria_enable, 1094 u8 match_criteria_enable,
990 u32 *match_criteria, 1095 u32 *match_criteria,
991 u32 *match_value, 1096 u32 *match_value,
992 u32 action, 1097 u32 action,
993 u32 flow_tag, 1098 u32 flow_tag,
994 struct mlx5_flow_destination *dest) 1099 struct mlx5_flow_destination *dest)
995{ 1100{
996 struct mlx5_flow_group *g; 1101 struct mlx5_flow_group *g;
997 struct mlx5_flow_rule *rule; 1102 struct mlx5_flow_rule *rule;
@@ -1014,6 +1119,63 @@ unlock:
1014 unlock_ref_node(&ft->node); 1119 unlock_ref_node(&ft->node);
1015 return rule; 1120 return rule;
1016} 1121}
1122
1123static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
1124{
1125 return ((ft->type == FS_FT_NIC_RX) &&
1126 (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
1127}
1128
1129struct mlx5_flow_rule *
1130mlx5_add_flow_rule(struct mlx5_flow_table *ft,
1131 u8 match_criteria_enable,
1132 u32 *match_criteria,
1133 u32 *match_value,
1134 u32 action,
1135 u32 flow_tag,
1136 struct mlx5_flow_destination *dest)
1137{
1138 struct mlx5_flow_root_namespace *root = find_root(&ft->node);
1139 struct mlx5_flow_destination gen_dest;
1140 struct mlx5_flow_table *next_ft = NULL;
1141 struct mlx5_flow_rule *rule = NULL;
1142 u32 sw_action = action;
1143 struct fs_prio *prio;
1144
1145 fs_get_obj(prio, ft->node.parent);
1146 if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
1147 if (!fwd_next_prio_supported(ft))
1148 return ERR_PTR(-EOPNOTSUPP);
1149 if (dest)
1150 return ERR_PTR(-EINVAL);
1151 mutex_lock(&root->chain_lock);
1152 next_ft = find_next_chained_ft(prio);
1153 if (next_ft) {
1154 gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1155 gen_dest.ft = next_ft;
1156 dest = &gen_dest;
1157 action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1158 } else {
1159 mutex_unlock(&root->chain_lock);
1160 return ERR_PTR(-EOPNOTSUPP);
1161 }
1162 }
1163
1164 rule = _mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
1165 match_value, action, flow_tag, dest);
1166
1167 if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
1168 if (!IS_ERR_OR_NULL(rule) &&
1169 (list_empty(&rule->next_ft))) {
1170 mutex_lock(&next_ft->lock);
1171 list_add(&rule->next_ft, &next_ft->fwd_rules);
1172 mutex_unlock(&next_ft->lock);
1173 rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1174 }
1175 mutex_unlock(&root->chain_lock);
1176 }
1177 return rule;
1178}
1017EXPORT_SYMBOL(mlx5_add_flow_rule); 1179EXPORT_SYMBOL(mlx5_add_flow_rule);
1018 1180
1019void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) 1181void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
@@ -1077,6 +1239,10 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft)
1077 return 0; 1239 return 0;
1078 1240
1079 next_ft = find_next_chained_ft(prio); 1241 next_ft = find_next_chained_ft(prio);
1242 err = connect_fwd_rules(dev, next_ft, ft);
1243 if (err)
1244 return err;
1245
1080 err = connect_prev_fts(dev, next_ft, prio); 1246 err = connect_prev_fts(dev, next_ft, prio);
1081 if (err) 1247 if (err)
1082 mlx5_core_warn(dev, "Failed to disconnect flow table %d\n", 1248 mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
@@ -1126,6 +1292,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
1126 case MLX5_FLOW_NAMESPACE_BYPASS: 1292 case MLX5_FLOW_NAMESPACE_BYPASS:
1127 case MLX5_FLOW_NAMESPACE_KERNEL: 1293 case MLX5_FLOW_NAMESPACE_KERNEL:
1128 case MLX5_FLOW_NAMESPACE_LEFTOVERS: 1294 case MLX5_FLOW_NAMESPACE_LEFTOVERS:
1295 case MLX5_FLOW_NAMESPACE_ANCHOR:
1129 prio = type; 1296 prio = type;
1130 break; 1297 break;
1131 case MLX5_FLOW_NAMESPACE_FDB: 1298 case MLX5_FLOW_NAMESPACE_FDB:
@@ -1351,6 +1518,25 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
1351 } 1518 }
1352} 1519}
1353 1520
1521#define ANCHOR_PRIO 0
1522#define ANCHOR_SIZE 1
1523static int create_anchor_flow_table(struct mlx5_core_dev
1524 *dev)
1525{
1526 struct mlx5_flow_namespace *ns = NULL;
1527 struct mlx5_flow_table *ft;
1528
1529 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR);
1530 if (!ns)
1531 return -EINVAL;
1532 ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE);
1533 if (IS_ERR(ft)) {
1534 mlx5_core_err(dev, "Failed to create last anchor flow table");
1535 return PTR_ERR(ft);
1536 }
1537 return 0;
1538}
1539
1354static int init_root_ns(struct mlx5_core_dev *dev) 1540static int init_root_ns(struct mlx5_core_dev *dev)
1355{ 1541{
1356 1542
@@ -1363,6 +1549,9 @@ static int init_root_ns(struct mlx5_core_dev *dev)
1363 1549
1364 set_prio_attrs(dev->priv.root_ns); 1550 set_prio_attrs(dev->priv.root_ns);
1365 1551
1552 if (create_anchor_flow_table(dev))
1553 goto cleanup;
1554
1366 return 0; 1555 return 0;
1367 1556
1368cleanup: 1557cleanup:
@@ -1392,6 +1581,15 @@ static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev,
1392 root_ns = NULL; 1581 root_ns = NULL;
1393} 1582}
1394 1583
1584static void destroy_flow_tables(struct fs_prio *prio)
1585{
1586 struct mlx5_flow_table *iter;
1587 struct mlx5_flow_table *tmp;
1588
1589 fs_for_each_ft_safe(iter, tmp, prio)
1590 mlx5_destroy_flow_table(iter);
1591}
1592
1395static void cleanup_root_ns(struct mlx5_core_dev *dev) 1593static void cleanup_root_ns(struct mlx5_core_dev *dev)
1396{ 1594{
1397 struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; 1595 struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns;
@@ -1420,6 +1618,7 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev)
1420 list); 1618 list);
1421 1619
1422 fs_get_obj(obj_iter_prio2, iter_prio2); 1620 fs_get_obj(obj_iter_prio2, iter_prio2);
1621 destroy_flow_tables(obj_iter_prio2);
1423 if (tree_remove_node(iter_prio2)) { 1622 if (tree_remove_node(iter_prio2)) {
1424 mlx5_core_warn(dev, 1623 mlx5_core_warn(dev,
1425 "Priority %d wasn't destroyed, refcount > 1\n", 1624 "Priority %d wasn't destroyed, refcount > 1\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 00245fd7e4bc..f37a6248a27b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -68,6 +68,11 @@ struct fs_node {
68struct mlx5_flow_rule { 68struct mlx5_flow_rule {
69 struct fs_node node; 69 struct fs_node node;
70 struct mlx5_flow_destination dest_attr; 70 struct mlx5_flow_destination dest_attr;
71 /* next_ft should be accessed under chain_lock and only of
72 * destination type is FWD_NEXT_fT.
73 */
74 struct list_head next_ft;
75 u32 sw_action;
71}; 76};
72 77
73/* Type of children is mlx5_flow_group */ 78/* Type of children is mlx5_flow_group */
@@ -82,6 +87,10 @@ struct mlx5_flow_table {
82 unsigned int required_groups; 87 unsigned int required_groups;
83 unsigned int num_groups; 88 unsigned int num_groups;
84 } autogroup; 89 } autogroup;
90 /* Protect fwd_rules */
91 struct mutex lock;
92 /* FWD rules that point on this flow table */
93 struct list_head fwd_rules;
85}; 94};
86 95
87/* Type of children is mlx5_flow_rule */ 96/* Type of children is mlx5_flow_rule */
@@ -142,6 +151,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
142#define fs_list_for_each_entry(pos, root) \ 151#define fs_list_for_each_entry(pos, root) \
143 list_for_each_entry(pos, root, node.list) 152 list_for_each_entry(pos, root, node.list)
144 153
154#define fs_list_for_each_entry_safe(pos, tmp, root) \
155 list_for_each_entry_safe(pos, tmp, root, node.list)
156
145#define fs_for_each_ns_or_ft_reverse(pos, prio) \ 157#define fs_for_each_ns_or_ft_reverse(pos, prio) \
146 list_for_each_entry_reverse(pos, &(prio)->node.children, list) 158 list_for_each_entry_reverse(pos, &(prio)->node.children, list)
147 159
@@ -157,6 +169,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
157#define fs_for_each_ft(pos, prio) \ 169#define fs_for_each_ft(pos, prio) \
158 fs_list_for_each_entry(pos, &(prio)->node.children) 170 fs_list_for_each_entry(pos, &(prio)->node.children)
159 171
172#define fs_for_each_ft_safe(pos, tmp, prio) \
173 fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children)
174
160#define fs_for_each_fg(pos, ft) \ 175#define fs_for_each_fg(pos, ft) \
161 fs_list_for_each_entry(pos, &(ft)->node.children) 176 fs_list_for_each_entry(pos, &(ft)->node.children)
162 177
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 1545a944c309..0916bbc69269 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1117,7 +1117,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
1117 mlx5_init_cq_table(dev); 1117 mlx5_init_cq_table(dev);
1118 mlx5_init_qp_table(dev); 1118 mlx5_init_qp_table(dev);
1119 mlx5_init_srq_table(dev); 1119 mlx5_init_srq_table(dev);
1120 mlx5_init_mr_table(dev); 1120 mlx5_init_mkey_table(dev);
1121 1121
1122 err = mlx5_init_fs(dev); 1122 err = mlx5_init_fs(dev);
1123 if (err) { 1123 if (err) {
@@ -1164,7 +1164,7 @@ err_sriov:
1164err_reg_dev: 1164err_reg_dev:
1165 mlx5_cleanup_fs(dev); 1165 mlx5_cleanup_fs(dev);
1166err_fs: 1166err_fs:
1167 mlx5_cleanup_mr_table(dev); 1167 mlx5_cleanup_mkey_table(dev);
1168 mlx5_cleanup_srq_table(dev); 1168 mlx5_cleanup_srq_table(dev);
1169 mlx5_cleanup_qp_table(dev); 1169 mlx5_cleanup_qp_table(dev);
1170 mlx5_cleanup_cq_table(dev); 1170 mlx5_cleanup_cq_table(dev);
@@ -1237,7 +1237,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
1237#endif 1237#endif
1238 1238
1239 mlx5_cleanup_fs(dev); 1239 mlx5_cleanup_fs(dev);
1240 mlx5_cleanup_mr_table(dev); 1240 mlx5_cleanup_mkey_table(dev);
1241 mlx5_cleanup_srq_table(dev); 1241 mlx5_cleanup_srq_table(dev);
1242 mlx5_cleanup_qp_table(dev); 1242 mlx5_cleanup_qp_table(dev);
1243 mlx5_cleanup_cq_table(dev); 1243 mlx5_cleanup_cq_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 6fa22b51e460..77a7293921d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -36,25 +36,26 @@
36#include <linux/mlx5/cmd.h> 36#include <linux/mlx5/cmd.h>
37#include "mlx5_core.h" 37#include "mlx5_core.h"
38 38
39void mlx5_init_mr_table(struct mlx5_core_dev *dev) 39void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
40{ 40{
41 struct mlx5_mr_table *table = &dev->priv.mr_table; 41 struct mlx5_mkey_table *table = &dev->priv.mkey_table;
42 42
43 memset(table, 0, sizeof(*table)); 43 memset(table, 0, sizeof(*table));
44 rwlock_init(&table->lock); 44 rwlock_init(&table->lock);
45 INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); 45 INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
46} 46}
47 47
48void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev) 48void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
49{ 49{
50} 50}
51 51
52int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 52int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
53 struct mlx5_core_mkey *mkey,
53 struct mlx5_create_mkey_mbox_in *in, int inlen, 54 struct mlx5_create_mkey_mbox_in *in, int inlen,
54 mlx5_cmd_cbk_t callback, void *context, 55 mlx5_cmd_cbk_t callback, void *context,
55 struct mlx5_create_mkey_mbox_out *out) 56 struct mlx5_create_mkey_mbox_out *out)
56{ 57{
57 struct mlx5_mr_table *table = &dev->priv.mr_table; 58 struct mlx5_mkey_table *table = &dev->priv.mkey_table;
58 struct mlx5_create_mkey_mbox_out lout; 59 struct mlx5_create_mkey_mbox_out lout;
59 int err; 60 int err;
60 u8 key; 61 u8 key;
@@ -83,34 +84,35 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
83 return mlx5_cmd_status_to_err(&lout.hdr); 84 return mlx5_cmd_status_to_err(&lout.hdr);
84 } 85 }
85 86
86 mr->iova = be64_to_cpu(in->seg.start_addr); 87 mkey->iova = be64_to_cpu(in->seg.start_addr);
87 mr->size = be64_to_cpu(in->seg.len); 88 mkey->size = be64_to_cpu(in->seg.len);
88 mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; 89 mkey->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
89 mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; 90 mkey->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff;
90 91
91 mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", 92 mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
92 be32_to_cpu(lout.mkey), key, mr->key); 93 be32_to_cpu(lout.mkey), key, mkey->key);
93 94
94 /* connect to MR tree */ 95 /* connect to mkey tree */
95 write_lock_irq(&table->lock); 96 write_lock_irq(&table->lock);
96 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr); 97 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
97 write_unlock_irq(&table->lock); 98 write_unlock_irq(&table->lock);
98 if (err) { 99 if (err) {
99 mlx5_core_warn(dev, "failed radix tree insert of mr 0x%x, %d\n", 100 mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
100 mlx5_base_mkey(mr->key), err); 101 mlx5_base_mkey(mkey->key), err);
101 mlx5_core_destroy_mkey(dev, mr); 102 mlx5_core_destroy_mkey(dev, mkey);
102 } 103 }
103 104
104 return err; 105 return err;
105} 106}
106EXPORT_SYMBOL(mlx5_core_create_mkey); 107EXPORT_SYMBOL(mlx5_core_create_mkey);
107 108
108int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr) 109int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
110 struct mlx5_core_mkey *mkey)
109{ 111{
110 struct mlx5_mr_table *table = &dev->priv.mr_table; 112 struct mlx5_mkey_table *table = &dev->priv.mkey_table;
111 struct mlx5_destroy_mkey_mbox_in in; 113 struct mlx5_destroy_mkey_mbox_in in;
112 struct mlx5_destroy_mkey_mbox_out out; 114 struct mlx5_destroy_mkey_mbox_out out;
113 struct mlx5_core_mr *deleted_mr; 115 struct mlx5_core_mkey *deleted_mkey;
114 unsigned long flags; 116 unsigned long flags;
115 int err; 117 int err;
116 118
@@ -118,16 +120,16 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
118 memset(&out, 0, sizeof(out)); 120 memset(&out, 0, sizeof(out));
119 121
120 write_lock_irqsave(&table->lock, flags); 122 write_lock_irqsave(&table->lock, flags);
121 deleted_mr = radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key)); 123 deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
122 write_unlock_irqrestore(&table->lock, flags); 124 write_unlock_irqrestore(&table->lock, flags);
123 if (!deleted_mr) { 125 if (!deleted_mkey) {
124 mlx5_core_warn(dev, "failed radix tree delete of mr 0x%x\n", 126 mlx5_core_warn(dev, "failed radix tree delete of mkey 0x%x\n",
125 mlx5_base_mkey(mr->key)); 127 mlx5_base_mkey(mkey->key));
126 return -ENOENT; 128 return -ENOENT;
127 } 129 }
128 130
129 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY); 131 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY);
130 in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); 132 in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key));
131 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); 133 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
132 if (err) 134 if (err)
133 return err; 135 return err;
@@ -139,7 +141,7 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
139} 141}
140EXPORT_SYMBOL(mlx5_core_destroy_mkey); 142EXPORT_SYMBOL(mlx5_core_destroy_mkey);
141 143
142int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 144int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
143 struct mlx5_query_mkey_mbox_out *out, int outlen) 145 struct mlx5_query_mkey_mbox_out *out, int outlen)
144{ 146{
145 struct mlx5_query_mkey_mbox_in in; 147 struct mlx5_query_mkey_mbox_in in;
@@ -149,7 +151,7 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
149 memset(out, 0, outlen); 151 memset(out, 0, outlen);
150 152
151 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY); 153 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY);
152 in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key)); 154 in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key));
153 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); 155 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
154 if (err) 156 if (err)
155 return err; 157 return err;
@@ -161,7 +163,7 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
161} 163}
162EXPORT_SYMBOL(mlx5_core_query_mkey); 164EXPORT_SYMBOL(mlx5_core_query_mkey);
163 165
164int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 166int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
165 u32 *mkey) 167 u32 *mkey)
166{ 168{
167 struct mlx5_query_special_ctxs_mbox_in in; 169 struct mlx5_query_special_ctxs_mbox_in in;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index a87e773e93f3..5635ce7ad693 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -324,6 +324,29 @@ int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
324} 324}
325EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap); 325EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap);
326 326
327int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
328 u8 port_num, void *out, size_t sz)
329{
330 u32 *in;
331 int err;
332
333 in = mlx5_vzalloc(sz);
334 if (!in) {
335 err = -ENOMEM;
336 return err;
337 }
338
339 MLX5_SET(ppcnt_reg, in, local_port, port_num);
340
341 MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
342 err = mlx5_core_access_reg(dev, in, sz, out,
343 sz, MLX5_REG_PPCNT, 0, 0);
344
345 kvfree(in);
346 return err;
347}
348EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
349
327int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) 350int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
328{ 351{
329 u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; 352 u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index c7398b95aecd..90ab09e375b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -850,3 +850,43 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
850 return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); 850 return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
851} 851}
852EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); 852EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
853
854int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
855 u8 port_num, void *out, size_t out_sz)
856{
857 int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
858 int is_group_manager;
859 void *in;
860 int err;
861
862 is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
863 in = mlx5_vzalloc(in_sz);
864 if (!in) {
865 err = -ENOMEM;
866 return err;
867 }
868
869 MLX5_SET(query_vport_counter_in, in, opcode,
870 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
871 if (other_vport) {
872 if (is_group_manager) {
873 MLX5_SET(query_vport_counter_in, in, other_vport, 1);
874 MLX5_SET(query_vport_counter_in, in, vport_number, 0);
875 } else {
876 err = -EPERM;
877 goto free;
878 }
879 }
880 if (MLX5_CAP_GEN(dev, num_ports) == 2)
881 MLX5_SET(query_vport_counter_in, in, port_num, port_num);
882
883 err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz);
884 if (err)
885 goto free;
886 err = mlx5_cmd_status_to_err_v2(out);
887
888free:
889 kvfree(in);
890 return err;
891}
892EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index a0e8cc8dcc67..8541a913f6a3 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -219,6 +219,7 @@ enum {
219 MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, 219 MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
220 MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, 220 MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
221 MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, 221 MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33,
222 MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34,
222}; 223};
223 224
224enum { 225enum {
@@ -1160,6 +1161,8 @@ enum mlx4_net_trans_promisc_mode {
1160 MLX4_FS_REGULAR = 1, 1161 MLX4_FS_REGULAR = 1,
1161 MLX4_FS_ALL_DEFAULT, 1162 MLX4_FS_ALL_DEFAULT,
1162 MLX4_FS_MC_DEFAULT, 1163 MLX4_FS_MC_DEFAULT,
1164 MLX4_FS_MIRROR_RX_PORT,
1165 MLX4_FS_MIRROR_SX_PORT,
1163 MLX4_FS_UC_SNIFFER, 1166 MLX4_FS_UC_SNIFFER,
1164 MLX4_FS_MC_SNIFFER, 1167 MLX4_FS_MC_SNIFFER,
1165 MLX4_FS_MODE_NUM, /* should be last */ 1168 MLX4_FS_MODE_NUM, /* should be last */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 987764afa65c..9566b3b3b2c5 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -105,6 +105,29 @@ __mlx5_mask(typ, fld))
105 ___t; \ 105 ___t; \
106}) 106})
107 107
108/* Big endian getters */
109#define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\
110 __mlx5_64_off(typ, fld)))
111
112#define MLX5_GET_BE(type_t, typ, p, fld) ({ \
113 type_t tmp; \
114 switch (sizeof(tmp)) { \
115 case sizeof(u8): \
116 tmp = (__force type_t)MLX5_GET(typ, p, fld); \
117 break; \
118 case sizeof(u16): \
119 tmp = (__force type_t)cpu_to_be16(MLX5_GET(typ, p, fld)); \
120 break; \
121 case sizeof(u32): \
122 tmp = (__force type_t)cpu_to_be32(MLX5_GET(typ, p, fld)); \
123 break; \
124 case sizeof(u64): \
125 tmp = (__force type_t)MLX5_GET64_BE(typ, p, fld); \
126 break; \
127 } \
128 tmp; \
129 })
130
108enum { 131enum {
109 MLX5_MAX_COMMANDS = 32, 132 MLX5_MAX_COMMANDS = 32,
110 MLX5_CMD_DATA_BLOCK_SIZE = 512, 133 MLX5_CMD_DATA_BLOCK_SIZE = 512,
@@ -1284,7 +1307,8 @@ enum {
1284 MLX5_RFC_3635_COUNTERS_GROUP = 0x3, 1307 MLX5_RFC_3635_COUNTERS_GROUP = 0x3,
1285 MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, 1308 MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5,
1286 MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, 1309 MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10,
1287 MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11 1310 MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11,
1311 MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20,
1288}; 1312};
1289 1313
1290static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) 1314static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
@@ -1294,6 +1318,11 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
1294 return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; 1318 return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
1295} 1319}
1296 1320
1297#define MLX5_BY_PASS_NUM_PRIOS 9 1321#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8
1322#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8
1323#define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1
1324#define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\
1325 MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\
1326 MLX5_BY_PASS_NUM_MULTICAST_PRIOS)
1298 1327
1299#endif /* MLX5_DEVICE_H */ 1328#endif /* MLX5_DEVICE_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1e3006dcf35d..9108904a6a56 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -338,7 +338,7 @@ struct mlx5_core_sig_ctx {
338 u32 sigerr_count; 338 u32 sigerr_count;
339}; 339};
340 340
341struct mlx5_core_mr { 341struct mlx5_core_mkey {
342 u64 iova; 342 u64 iova;
343 u64 size; 343 u64 size;
344 u32 key; 344 u32 key;
@@ -426,7 +426,7 @@ struct mlx5_srq_table {
426 struct radix_tree_root tree; 426 struct radix_tree_root tree;
427}; 427};
428 428
429struct mlx5_mr_table { 429struct mlx5_mkey_table {
430 /* protect radix tree 430 /* protect radix tree
431 */ 431 */
432 rwlock_t lock; 432 rwlock_t lock;
@@ -484,9 +484,9 @@ struct mlx5_priv {
484 struct mlx5_cq_table cq_table; 484 struct mlx5_cq_table cq_table;
485 /* end: cq staff */ 485 /* end: cq staff */
486 486
487 /* start: mr staff */ 487 /* start: mkey staff */
488 struct mlx5_mr_table mr_table; 488 struct mlx5_mkey_table mkey_table;
489 /* end: mr staff */ 489 /* end: mkey staff */
490 490
491 /* start: alloc staff */ 491 /* start: alloc staff */
492 /* protect buffer alocation according to numa node */ 492 /* protect buffer alocation according to numa node */
@@ -739,16 +739,18 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
739 struct mlx5_query_srq_mbox_out *out); 739 struct mlx5_query_srq_mbox_out *out);
740int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 740int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
741 u16 lwm, int is_srq); 741 u16 lwm, int is_srq);
742void mlx5_init_mr_table(struct mlx5_core_dev *dev); 742void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
743void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev); 743void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev);
744int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 744int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
745 struct mlx5_core_mkey *mkey,
745 struct mlx5_create_mkey_mbox_in *in, int inlen, 746 struct mlx5_create_mkey_mbox_in *in, int inlen,
746 mlx5_cmd_cbk_t callback, void *context, 747 mlx5_cmd_cbk_t callback, void *context,
747 struct mlx5_create_mkey_mbox_out *out); 748 struct mlx5_create_mkey_mbox_out *out);
748int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr); 749int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
749int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 750 struct mlx5_core_mkey *mkey);
751int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
750 struct mlx5_query_mkey_mbox_out *out, int outlen); 752 struct mlx5_query_mkey_mbox_out *out, int outlen);
751int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 753int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
752 u32 *mkey); 754 u32 *mkey);
753int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); 755int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
754int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); 756int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
@@ -847,6 +849,8 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
847void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); 849void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
848int mlx5_query_odp_caps(struct mlx5_core_dev *dev, 850int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
849 struct mlx5_odp_caps *odp_caps); 851 struct mlx5_odp_caps *odp_caps);
852int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
853 u8 port_num, void *out, size_t sz);
850 854
851static inline int fw_initializing(struct mlx5_core_dev *dev) 855static inline int fw_initializing(struct mlx5_core_dev *dev)
852{ 856{
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 8230caa3fb6e..8dec5508d93d 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -38,6 +38,10 @@
38 38
39#define MLX5_FS_DEFAULT_FLOW_TAG 0x0 39#define MLX5_FS_DEFAULT_FLOW_TAG 0x0
40 40
41enum {
42 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16,
43};
44
41#define LEFTOVERS_RULE_NUM 2 45#define LEFTOVERS_RULE_NUM 2
42static inline void build_leftovers_ft_param(int *priority, 46static inline void build_leftovers_ft_param(int *priority,
43 int *n_ent, 47 int *n_ent,
@@ -52,6 +56,7 @@ enum mlx5_flow_namespace_type {
52 MLX5_FLOW_NAMESPACE_BYPASS, 56 MLX5_FLOW_NAMESPACE_BYPASS,
53 MLX5_FLOW_NAMESPACE_KERNEL, 57 MLX5_FLOW_NAMESPACE_KERNEL,
54 MLX5_FLOW_NAMESPACE_LEFTOVERS, 58 MLX5_FLOW_NAMESPACE_LEFTOVERS,
59 MLX5_FLOW_NAMESPACE_ANCHOR,
55 MLX5_FLOW_NAMESPACE_FDB, 60 MLX5_FLOW_NAMESPACE_FDB,
56}; 61};
57 62
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 58eef02edc7e..9b8a02b7880f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -458,7 +458,8 @@ struct mlx5_ifc_ads_bits {
458}; 458};
459 459
460struct mlx5_ifc_flow_table_nic_cap_bits { 460struct mlx5_ifc_flow_table_nic_cap_bits {
461 u8 reserved_at_0[0x200]; 461 u8 nic_rx_multi_path_tirs[0x1];
462 u8 reserved_at_1[0x1ff];
462 463
463 struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; 464 struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
464 465
@@ -736,7 +737,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
736 u8 cqe_version[0x4]; 737 u8 cqe_version[0x4];
737 738
738 u8 compact_address_vector[0x1]; 739 u8 compact_address_vector[0x1];
739 u8 reserved_at_200[0xe]; 740 u8 reserved_at_200[0x3];
741 u8 ipoib_basic_offloads[0x1];
742 u8 reserved_at_204[0xa];
740 u8 drain_sigerr[0x1]; 743 u8 drain_sigerr[0x1];
741 u8 cmdif_checksum[0x2]; 744 u8 cmdif_checksum[0x2];
742 u8 sigerr_cqe[0x1]; 745 u8 sigerr_cqe[0x1];
@@ -767,10 +770,13 @@ struct mlx5_ifc_cmd_hca_cap_bits {
767 u8 cd[0x1]; 770 u8 cd[0x1];
768 u8 reserved_at_22c[0x1]; 771 u8 reserved_at_22c[0x1];
769 u8 apm[0x1]; 772 u8 apm[0x1];
770 u8 reserved_at_22e[0x7]; 773 u8 reserved_at_22e[0x2];
774 u8 imaicl[0x1];
775 u8 reserved_at_231[0x4];
771 u8 qkv[0x1]; 776 u8 qkv[0x1];
772 u8 pkv[0x1]; 777 u8 pkv[0x1];
773 u8 reserved_at_237[0x4]; 778 u8 set_deth_sqpn[0x1];
779 u8 reserved_at_239[0x3];
774 u8 xrc[0x1]; 780 u8 xrc[0x1];
775 u8 ud[0x1]; 781 u8 ud[0x1];
776 u8 uc[0x1]; 782 u8 uc[0x1];
@@ -1208,6 +1214,36 @@ struct mlx5_ifc_phys_layer_cntrs_bits {
1208 u8 reserved_at_640[0x180]; 1214 u8 reserved_at_640[0x180];
1209}; 1215};
1210 1216
1217struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits {
1218 u8 symbol_error_counter[0x10];
1219
1220 u8 link_error_recovery_counter[0x8];
1221
1222 u8 link_downed_counter[0x8];
1223
1224 u8 port_rcv_errors[0x10];
1225
1226 u8 port_rcv_remote_physical_errors[0x10];
1227
1228 u8 port_rcv_switch_relay_errors[0x10];
1229
1230 u8 port_xmit_discards[0x10];
1231
1232 u8 port_xmit_constraint_errors[0x8];
1233
1234 u8 port_rcv_constraint_errors[0x8];
1235
1236 u8 reserved_at_70[0x8];
1237
1238 u8 link_overrun_errors[0x8];
1239
1240 u8 reserved_at_80[0x10];
1241
1242 u8 vl_15_dropped[0x10];
1243
1244 u8 reserved_at_a0[0xa0];
1245};
1246
1211struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { 1247struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits {
1212 u8 transmit_queue_high[0x20]; 1248 u8 transmit_queue_high[0x20];
1213 1249
@@ -1780,7 +1816,7 @@ struct mlx5_ifc_qpc_bits {
1780 u8 log_sq_size[0x4]; 1816 u8 log_sq_size[0x4];
1781 u8 reserved_at_55[0x6]; 1817 u8 reserved_at_55[0x6];
1782 u8 rlky[0x1]; 1818 u8 rlky[0x1];
1783 u8 reserved_at_5c[0x4]; 1819 u8 ulp_stateless_offload_mode[0x4];
1784 1820
1785 u8 counter_set_id[0x8]; 1821 u8 counter_set_id[0x8];
1786 u8 uar_page[0x18]; 1822 u8 uar_page[0x18];
@@ -2618,6 +2654,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
2618 struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout; 2654 struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout;
2619 struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout; 2655 struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout;
2620 struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; 2656 struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
2657 struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout;
2621 struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; 2658 struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
2622 u8 reserved_at_0[0x7c0]; 2659 u8 reserved_at_0[0x7c0];
2623}; 2660};
@@ -3126,7 +3163,8 @@ struct mlx5_ifc_query_vport_counter_in_bits {
3126 u8 op_mod[0x10]; 3163 u8 op_mod[0x10];
3127 3164
3128 u8 other_vport[0x1]; 3165 u8 other_vport[0x1];
3129 u8 reserved_at_41[0xf]; 3166 u8 reserved_at_41[0xb];
3167 u8 port_num[0x4];
3130 u8 vport_number[0x10]; 3168 u8 vport_number[0x10];
3131 3169
3132 u8 reserved_at_60[0x60]; 3170 u8 reserved_at_60[0x60];
@@ -6956,6 +6994,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
6956 struct mlx5_ifc_peir_reg_bits peir_reg; 6994 struct mlx5_ifc_peir_reg_bits peir_reg;
6957 struct mlx5_ifc_pelc_reg_bits pelc_reg; 6995 struct mlx5_ifc_pelc_reg_bits pelc_reg;
6958 struct mlx5_ifc_pfcc_reg_bits pfcc_reg; 6996 struct mlx5_ifc_pfcc_reg_bits pfcc_reg;
6997 struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout;
6959 struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; 6998 struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
6960 struct mlx5_ifc_pifr_reg_bits pifr_reg; 6999 struct mlx5_ifc_pifr_reg_bits pifr_reg;
6961 struct mlx5_ifc_pipg_reg_bits pipg_reg; 7000 struct mlx5_ifc_pipg_reg_bits pipg_reg;
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 5b8c89ffaa58..cf031a3f16c5 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -499,7 +499,8 @@ struct mlx5_qp_context {
499 u8 reserved2[4]; 499 u8 reserved2[4];
500 __be32 next_send_psn; 500 __be32 next_send_psn;
501 __be32 cqn_send; 501 __be32 cqn_send;
502 u8 reserved3[8]; 502 __be32 deth_sqpn;
503 u8 reserved3[4];
503 __be32 last_acked_psn; 504 __be32 last_acked_psn;
504 __be32 ssn; 505 __be32 ssn;
505 __be32 params2; 506 __be32 params2;
@@ -621,9 +622,9 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
621 return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); 622 return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
622} 623}
623 624
624static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) 625static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
625{ 626{
626 return radix_tree_lookup(&dev->priv.mr_table.tree, key); 627 return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
627} 628}
628 629
629struct mlx5_page_fault_resume_mbox_in { 630struct mlx5_page_fault_resume_mbox_in {
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 123771003e68..a9f2bcc98cab 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -92,5 +92,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
92 92
93int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev); 93int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
94int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev); 94int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
95int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
96 u8 port_num, void *out, size_t out_sz);
95 97
96#endif /* __MLX5_VPORT_H__ */ 98#endif /* __MLX5_VPORT_H__ */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 0ff049bd9ad4..37dd534cbeab 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -424,11 +424,11 @@ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent,
424/** 424/**
425 * ib_mad_snoop_handler - Callback handler for snooping sent MADs. 425 * ib_mad_snoop_handler - Callback handler for snooping sent MADs.
426 * @mad_agent: MAD agent that snooped the MAD. 426 * @mad_agent: MAD agent that snooped the MAD.
427 * @send_wr: Work request information on the sent MAD. 427 * @send_buf: send MAD data buffer.
428 * @mad_send_wc: Work completion information on the sent MAD. Valid 428 * @mad_send_wc: Work completion information on the sent MAD. Valid
429 * only for snooping that occurs on a send completion. 429 * only for snooping that occurs on a send completion.
430 * 430 *
431 * Clients snooping MADs should not modify data referenced by the @send_wr 431 * Clients snooping MADs should not modify data referenced by the @send_buf
432 * or @mad_send_wc. 432 * or @mad_send_wc.
433 */ 433 */
434typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, 434typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 284b00c8fea4..3a03c1d18afa 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -212,6 +212,7 @@ enum ib_device_cap_flags {
212 IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), 212 IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
213 IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), 213 IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30),
214 IB_DEVICE_ON_DEMAND_PAGING = (1 << 31), 214 IB_DEVICE_ON_DEMAND_PAGING = (1 << 31),
215 IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
215}; 216};
216 217
217enum ib_signature_prot_cap { 218enum ib_signature_prot_cap {
@@ -662,10 +663,15 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
662 * @IB_MR_TYPE_SIGNATURE: memory region that is used for 663 * @IB_MR_TYPE_SIGNATURE: memory region that is used for
663 * signature operations (data-integrity 664 * signature operations (data-integrity
664 * capable regions) 665 * capable regions)
666 * @IB_MR_TYPE_SG_GAPS: memory region that is capable to
667 * register any arbitrary sg lists (without
668 * the normal mr constraints - see
669 * ib_map_mr_sg)
665 */ 670 */
666enum ib_mr_type { 671enum ib_mr_type {
667 IB_MR_TYPE_MEM_REG, 672 IB_MR_TYPE_MEM_REG,
668 IB_MR_TYPE_SIGNATURE, 673 IB_MR_TYPE_SIGNATURE,
674 IB_MR_TYPE_SG_GAPS,
669}; 675};
670 676
671/** 677/**
@@ -1487,6 +1493,11 @@ enum ib_flow_domain {
1487 IB_FLOW_DOMAIN_NUM /* Must be last */ 1493 IB_FLOW_DOMAIN_NUM /* Must be last */
1488}; 1494};
1489 1495
1496enum ib_flow_flags {
1497 IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
1498 IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */
1499};
1500
1490struct ib_flow_eth_filter { 1501struct ib_flow_eth_filter {
1491 u8 dst_mac[6]; 1502 u8 dst_mac[6];
1492 u8 src_mac[6]; 1503 u8 src_mac[6];
@@ -1808,7 +1819,8 @@ struct ib_device {
1808 struct scatterlist *sg, 1819 struct scatterlist *sg,
1809 int sg_nents); 1820 int sg_nents);
1810 struct ib_mw * (*alloc_mw)(struct ib_pd *pd, 1821 struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
1811 enum ib_mw_type type); 1822 enum ib_mw_type type,
1823 struct ib_udata *udata);
1812 int (*dealloc_mw)(struct ib_mw *mw); 1824 int (*dealloc_mw)(struct ib_mw *mw);
1813 struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, 1825 struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
1814 int mr_access_flags, 1826 int mr_access_flags,
@@ -1846,6 +1858,8 @@ struct ib_device {
1846 int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, 1858 int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
1847 struct ib_mr_status *mr_status); 1859 struct ib_mr_status *mr_status);
1848 void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); 1860 void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
1861 void (*drain_rq)(struct ib_qp *qp);
1862 void (*drain_sq)(struct ib_qp *qp);
1849 1863
1850 struct ib_dma_mapping_ops *dma_ops; 1864 struct ib_dma_mapping_ops *dma_ops;
1851 1865
@@ -3094,4 +3108,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
3094 int sg_nents, 3108 int sg_nents,
3095 int (*set_page)(struct ib_mr *, u64)); 3109 int (*set_page)(struct ib_mr *, u64));
3096 3110
3111void ib_drain_rq(struct ib_qp *qp);
3112void ib_drain_sq(struct ib_qp *qp);
3113void ib_drain_qp(struct ib_qp *qp);
3097#endif /* IB_VERBS_H */ 3114#endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 036bd2772662..6d0065c322b7 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -83,8 +83,10 @@ struct iw_cm_id {
83 iw_cm_handler cm_handler; /* client callback function */ 83 iw_cm_handler cm_handler; /* client callback function */
84 void *context; /* client cb context */ 84 void *context; /* client cb context */
85 struct ib_device *device; 85 struct ib_device *device;
86 struct sockaddr_storage local_addr; 86 struct sockaddr_storage local_addr; /* local addr */
87 struct sockaddr_storage remote_addr; 87 struct sockaddr_storage remote_addr;
88 struct sockaddr_storage m_local_addr; /* nmapped local addr */
89 struct sockaddr_storage m_remote_addr; /* nmapped rem addr */
88 void *provider_data; /* provider private data */ 90 void *provider_data; /* provider private data */
89 iw_event_handler event_handler; /* cb for provider 91 iw_event_handler event_handler; /* cb for provider
90 events */ 92 events */
@@ -92,6 +94,7 @@ struct iw_cm_id {
92 void (*add_ref)(struct iw_cm_id *); 94 void (*add_ref)(struct iw_cm_id *);
93 void (*rem_ref)(struct iw_cm_id *); 95 void (*rem_ref)(struct iw_cm_id *);
94 u8 tos; 96 u8 tos;
97 bool mapped;
95}; 98};
96 99
97struct iw_cm_conn_param { 100struct iw_cm_conn_param {
@@ -123,6 +126,7 @@ struct iw_cm_verbs {
123 int backlog); 126 int backlog);
124 127
125 int (*destroy_listen)(struct iw_cm_id *cm_id); 128 int (*destroy_listen)(struct iw_cm_id *cm_id);
129 char ifname[IFNAMSIZ];
126}; 130};
127 131
128/** 132/**
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index c19a5dc1531a..f7d7b6fec935 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -5,8 +5,8 @@
5 5
6enum { 6enum {
7 RDMA_NL_RDMA_CM = 1, 7 RDMA_NL_RDMA_CM = 1,
8 RDMA_NL_NES, 8 RDMA_NL_IWCM,
9 RDMA_NL_C4IW, 9 RDMA_NL_RSVD,
10 RDMA_NL_LS, /* RDMA Local Services */ 10 RDMA_NL_LS, /* RDMA Local Services */
11 RDMA_NL_NUM_CLIENTS 11 RDMA_NL_NUM_CLIENTS
12}; 12};
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 52b4a2f993f2..1852e383afd6 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -109,14 +109,13 @@ struct p9_trans_rdma {
109/** 109/**
110 * p9_rdma_context - Keeps track of in-process WR 110 * p9_rdma_context - Keeps track of in-process WR
111 * 111 *
112 * @wc_op: The original WR op for when the CQE completes in error.
113 * @busa: Bus address to unmap when the WR completes 112 * @busa: Bus address to unmap when the WR completes
114 * @req: Keeps track of requests (send) 113 * @req: Keeps track of requests (send)
115 * @rc: Keepts track of replies (receive) 114 * @rc: Keepts track of replies (receive)
116 */ 115 */
117struct p9_rdma_req; 116struct p9_rdma_req;
118struct p9_rdma_context { 117struct p9_rdma_context {
119 enum ib_wc_opcode wc_op; 118 struct ib_cqe cqe;
120 dma_addr_t busa; 119 dma_addr_t busa;
121 union { 120 union {
122 struct p9_req_t *req; 121 struct p9_req_t *req;
@@ -284,9 +283,12 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
284} 283}
285 284
286static void 285static void
287handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, 286recv_done(struct ib_cq *cq, struct ib_wc *wc)
288 struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
289{ 287{
288 struct p9_client *client = cq->cq_context;
289 struct p9_trans_rdma *rdma = client->trans;
290 struct p9_rdma_context *c =
291 container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
290 struct p9_req_t *req; 292 struct p9_req_t *req;
291 int err = 0; 293 int err = 0;
292 int16_t tag; 294 int16_t tag;
@@ -295,7 +297,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
295 ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, 297 ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
296 DMA_FROM_DEVICE); 298 DMA_FROM_DEVICE);
297 299
298 if (status != IB_WC_SUCCESS) 300 if (wc->status != IB_WC_SUCCESS)
299 goto err_out; 301 goto err_out;
300 302
301 err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); 303 err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
@@ -316,21 +318,32 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
316 req->rc = c->rc; 318 req->rc = c->rc;
317 p9_client_cb(client, req, REQ_STATUS_RCVD); 319 p9_client_cb(client, req, REQ_STATUS_RCVD);
318 320
321 out:
322 up(&rdma->rq_sem);
323 kfree(c);
319 return; 324 return;
320 325
321 err_out: 326 err_out:
322 p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); 327 p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n",
328 req, err, wc->status);
323 rdma->state = P9_RDMA_FLUSHING; 329 rdma->state = P9_RDMA_FLUSHING;
324 client->status = Disconnected; 330 client->status = Disconnected;
331 goto out;
325} 332}
326 333
327static void 334static void
328handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, 335send_done(struct ib_cq *cq, struct ib_wc *wc)
329 struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
330{ 336{
337 struct p9_client *client = cq->cq_context;
338 struct p9_trans_rdma *rdma = client->trans;
339 struct p9_rdma_context *c =
340 container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
341
331 ib_dma_unmap_single(rdma->cm_id->device, 342 ib_dma_unmap_single(rdma->cm_id->device,
332 c->busa, c->req->tc->size, 343 c->busa, c->req->tc->size,
333 DMA_TO_DEVICE); 344 DMA_TO_DEVICE);
345 up(&rdma->sq_sem);
346 kfree(c);
334} 347}
335 348
336static void qp_event_handler(struct ib_event *event, void *context) 349static void qp_event_handler(struct ib_event *event, void *context)
@@ -339,42 +352,6 @@ static void qp_event_handler(struct ib_event *event, void *context)
339 event->event, context); 352 event->event, context);
340} 353}
341 354
342static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
343{
344 struct p9_client *client = cq_context;
345 struct p9_trans_rdma *rdma = client->trans;
346 int ret;
347 struct ib_wc wc;
348
349 ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
350 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
351 struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id;
352
353 switch (c->wc_op) {
354 case IB_WC_RECV:
355 handle_recv(client, rdma, c, wc.status, wc.byte_len);
356 up(&rdma->rq_sem);
357 break;
358
359 case IB_WC_SEND:
360 handle_send(client, rdma, c, wc.status, wc.byte_len);
361 up(&rdma->sq_sem);
362 break;
363
364 default:
365 pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n",
366 c->wc_op, wc.opcode, wc.status);
367 break;
368 }
369 kfree(c);
370 }
371}
372
373static void cq_event_handler(struct ib_event *e, void *v)
374{
375 p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
376}
377
378static void rdma_destroy_trans(struct p9_trans_rdma *rdma) 355static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
379{ 356{
380 if (!rdma) 357 if (!rdma)
@@ -387,7 +364,7 @@ static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
387 ib_dealloc_pd(rdma->pd); 364 ib_dealloc_pd(rdma->pd);
388 365
389 if (rdma->cq && !IS_ERR(rdma->cq)) 366 if (rdma->cq && !IS_ERR(rdma->cq))
390 ib_destroy_cq(rdma->cq); 367 ib_free_cq(rdma->cq);
391 368
392 if (rdma->cm_id && !IS_ERR(rdma->cm_id)) 369 if (rdma->cm_id && !IS_ERR(rdma->cm_id))
393 rdma_destroy_id(rdma->cm_id); 370 rdma_destroy_id(rdma->cm_id);
@@ -408,13 +385,14 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
408 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) 385 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
409 goto error; 386 goto error;
410 387
388 c->cqe.done = recv_done;
389
411 sge.addr = c->busa; 390 sge.addr = c->busa;
412 sge.length = client->msize; 391 sge.length = client->msize;
413 sge.lkey = rdma->pd->local_dma_lkey; 392 sge.lkey = rdma->pd->local_dma_lkey;
414 393
415 wr.next = NULL; 394 wr.next = NULL;
416 c->wc_op = IB_WC_RECV; 395 wr.wr_cqe = &c->cqe;
417 wr.wr_id = (unsigned long) c;
418 wr.sg_list = &sge; 396 wr.sg_list = &sge;
419 wr.num_sge = 1; 397 wr.num_sge = 1;
420 return ib_post_recv(rdma->qp, &wr, &bad_wr); 398 return ib_post_recv(rdma->qp, &wr, &bad_wr);
@@ -499,13 +477,14 @@ dont_need_post_recv:
499 goto send_error; 477 goto send_error;
500 } 478 }
501 479
480 c->cqe.done = send_done;
481
502 sge.addr = c->busa; 482 sge.addr = c->busa;
503 sge.length = c->req->tc->size; 483 sge.length = c->req->tc->size;
504 sge.lkey = rdma->pd->local_dma_lkey; 484 sge.lkey = rdma->pd->local_dma_lkey;
505 485
506 wr.next = NULL; 486 wr.next = NULL;
507 c->wc_op = IB_WC_SEND; 487 wr.wr_cqe = &c->cqe;
508 wr.wr_id = (unsigned long) c;
509 wr.opcode = IB_WR_SEND; 488 wr.opcode = IB_WR_SEND;
510 wr.send_flags = IB_SEND_SIGNALED; 489 wr.send_flags = IB_SEND_SIGNALED;
511 wr.sg_list = &sge; 490 wr.sg_list = &sge;
@@ -642,7 +621,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
642 struct p9_trans_rdma *rdma; 621 struct p9_trans_rdma *rdma;
643 struct rdma_conn_param conn_param; 622 struct rdma_conn_param conn_param;
644 struct ib_qp_init_attr qp_attr; 623 struct ib_qp_init_attr qp_attr;
645 struct ib_cq_init_attr cq_attr = {};
646 624
647 /* Parse the transport specific mount options */ 625 /* Parse the transport specific mount options */
648 err = parse_opts(args, &opts); 626 err = parse_opts(args, &opts);
@@ -695,13 +673,11 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
695 goto error; 673 goto error;
696 674
697 /* Create the Completion Queue */ 675 /* Create the Completion Queue */
698 cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; 676 rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
699 rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, 677 opts.sq_depth + opts.rq_depth + 1,
700 cq_event_handler, client, 678 0, IB_POLL_SOFTIRQ);
701 &cq_attr);
702 if (IS_ERR(rdma->cq)) 679 if (IS_ERR(rdma->cq))
703 goto error; 680 goto error;
704 ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
705 681
706 /* Create the Protection Domain */ 682 /* Create the Protection Domain */
707 rdma->pd = ib_alloc_pd(rdma->cm_id->device); 683 rdma->pd = ib_alloc_pd(rdma->cm_id->device);