aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-driver-ib_srp13
-rw-r--r--Documentation/ABI/stable/sysfs-transport-srp39
-rw-r--r--drivers/infiniband/Kconfig11
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/cma.c68
-rw-r--r--drivers/infiniband/core/netlink.c2
-rw-r--r--drivers/infiniband/core/sysfs.c1
-rw-r--r--drivers/infiniband/core/ucma.c4
-rw-r--r--drivers/infiniband/core/uverbs.h36
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c109
-rw-r--r--drivers/infiniband/core/uverbs_main.c128
-rw-r--r--drivers/infiniband/core/verbs.c17
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.c7
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c9
-rw-r--r--drivers/infiniband/hw/mlx4/main.c8
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c25
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h6
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c167
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c21
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c8
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h53
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h14
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h10
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c500
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c106
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c196
-rw-r--r--drivers/scsi/scsi_transport_srp.c540
-rw-r--r--include/linux/mlx5/device.h13
-rw-r--r--include/linux/mlx5/driver.h18
-rw-r--r--include/rdma/ib_verbs.h20
-rw-r--r--include/scsi/scsi_transport_srp.h83
-rw-r--r--include/uapi/rdma/ib_user_verbs.h95
45 files changed, 1907 insertions, 564 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp
index 5c53d28f775c..b9688de8455b 100644
--- a/Documentation/ABI/stable/sysfs-driver-ib_srp
+++ b/Documentation/ABI/stable/sysfs-driver-ib_srp
@@ -61,6 +61,12 @@ Description: Interface for making ib_srp connect to a new target.
61 interrupt is handled by a different CPU then the comp_vector 61 interrupt is handled by a different CPU then the comp_vector
62 parameter can be used to spread the SRP completion workload 62 parameter can be used to spread the SRP completion workload
63 over multiple CPU's. 63 over multiple CPU's.
64 * tl_retry_count, a number in the range 2..7 specifying the
65 IB RC retry count.
66 * queue_size, the maximum number of commands that the
67 initiator is allowed to queue per SCSI host. The default
68 value for this parameter is 62. The lowest supported value
69 is 2.
64 70
65What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev 71What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev
66Date: January 2, 2006 72Date: January 2, 2006
@@ -153,6 +159,13 @@ Contact: linux-rdma@vger.kernel.org
153Description: InfiniBand service ID used for establishing communication with 159Description: InfiniBand service ID used for establishing communication with
154 the SRP target. 160 the SRP target.
155 161
162What: /sys/class/scsi_host/host<n>/sgid
163Date: February 1, 2014
164KernelVersion: 3.13
165Contact: linux-rdma@vger.kernel.org
166Description: InfiniBand GID of the source port used for communication with
167 the SRP target.
168
156What: /sys/class/scsi_host/host<n>/zero_req_lim 169What: /sys/class/scsi_host/host<n>/zero_req_lim
157Date: September 20, 2006 170Date: September 20, 2006
158KernelVersion: 2.6.18 171KernelVersion: 2.6.18
diff --git a/Documentation/ABI/stable/sysfs-transport-srp b/Documentation/ABI/stable/sysfs-transport-srp
index b36fb0dc13c8..ec7af69fea0a 100644
--- a/Documentation/ABI/stable/sysfs-transport-srp
+++ b/Documentation/ABI/stable/sysfs-transport-srp
@@ -5,6 +5,24 @@ Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
5Description: Instructs an SRP initiator to disconnect from a target and to 5Description: Instructs an SRP initiator to disconnect from a target and to
6 remove all LUNs imported from that target. 6 remove all LUNs imported from that target.
7 7
8What: /sys/class/srp_remote_ports/port-<h>:<n>/dev_loss_tmo
9Date: February 1, 2014
10KernelVersion: 3.13
11Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
12Description: Number of seconds the SCSI layer will wait after a transport
13 layer error has been observed before removing a target port.
14 Zero means immediate removal. Setting this attribute to "off"
15 will disable the dev_loss timer.
16
17What: /sys/class/srp_remote_ports/port-<h>:<n>/fast_io_fail_tmo
18Date: February 1, 2014
19KernelVersion: 3.13
20Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
21Description: Number of seconds the SCSI layer will wait after a transport
22 layer error has been observed before failing I/O. Zero means
23 failing I/O immediately. Setting this attribute to "off" will
24 disable the fast_io_fail timer.
25
8What: /sys/class/srp_remote_ports/port-<h>:<n>/port_id 26What: /sys/class/srp_remote_ports/port-<h>:<n>/port_id
9Date: June 27, 2007 27Date: June 27, 2007
10KernelVersion: 2.6.24 28KernelVersion: 2.6.24
@@ -12,8 +30,29 @@ Contact: linux-scsi@vger.kernel.org
12Description: 16-byte local SRP port identifier in hexadecimal format. An 30Description: 16-byte local SRP port identifier in hexadecimal format. An
13 example: 4c:49:4e:55:58:20:56:49:4f:00:00:00:00:00:00:00. 31 example: 4c:49:4e:55:58:20:56:49:4f:00:00:00:00:00:00:00.
14 32
33What: /sys/class/srp_remote_ports/port-<h>:<n>/reconnect_delay
34Date: February 1, 2014
35KernelVersion: 3.13
36Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
37Description: Number of seconds the SCSI layer will wait after a reconnect
38 attempt failed before retrying. Setting this attribute to
39 "off" will disable time-based reconnecting.
40
15What: /sys/class/srp_remote_ports/port-<h>:<n>/roles 41What: /sys/class/srp_remote_ports/port-<h>:<n>/roles
16Date: June 27, 2007 42Date: June 27, 2007
17KernelVersion: 2.6.24 43KernelVersion: 2.6.24
18Contact: linux-scsi@vger.kernel.org 44Contact: linux-scsi@vger.kernel.org
19Description: Role of the remote port. Either "SRP Initiator" or "SRP Target". 45Description: Role of the remote port. Either "SRP Initiator" or "SRP Target".
46
47What: /sys/class/srp_remote_ports/port-<h>:<n>/state
48Date: February 1, 2014
49KernelVersion: 3.13
50Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
51Description: State of the transport layer used for communication with the
52 remote port. "running" if the transport layer is operational;
53 "blocked" if a transport layer error has been encountered but
54 the fast_io_fail_tmo timer has not yet fired; "fail-fast"
55 after the fast_io_fail_tmo timer has fired and before the
56 "dev_loss_tmo" timer has fired; "lost" after the
57 "dev_loss_tmo" timer has fired and before the port is finally
58 removed.
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index b84791f03a27..5ceda710f516 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -31,17 +31,6 @@ config INFINIBAND_USER_ACCESS
31 libibverbs, libibcm and a hardware driver library from 31 libibverbs, libibcm and a hardware driver library from
32 <http://www.openfabrics.org/git/>. 32 <http://www.openfabrics.org/git/>.
33 33
34config INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
35 bool "Experimental and unstable ABI for userspace access to flow steering verbs"
36 depends on INFINIBAND_USER_ACCESS
37 depends on STAGING
38 ---help---
39 The final ABI for userspace access to flow steering verbs
40 has not been defined. To use the current ABI, *WHICH WILL
41 CHANGE IN THE FUTURE*, say Y here.
42
43 If unsure, say N.
44
45config INFINIBAND_USER_MEM 34config INFINIBAND_USER_MEM
46 bool 35 bool
47 depends on INFINIBAND_USER_ACCESS != n 36 depends on INFINIBAND_USER_ACCESS != n
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 784b97cb05b0..f2ef7ef0f36f 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -383,14 +383,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
383{ 383{
384 unsigned long flags; 384 unsigned long flags;
385 int id; 385 int id;
386 static int next_id;
387 386
388 idr_preload(GFP_KERNEL); 387 idr_preload(GFP_KERNEL);
389 spin_lock_irqsave(&cm.lock, flags); 388 spin_lock_irqsave(&cm.lock, flags);
390 389
391 id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT); 390 id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
392 if (id >= 0)
393 next_id = max(id + 1, 0);
394 391
395 spin_unlock_irqrestore(&cm.lock, flags); 392 spin_unlock_irqrestore(&cm.lock, flags);
396 idr_preload_end(); 393 idr_preload_end();
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index dab4b41f1715..830c983fdeff 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -328,28 +328,6 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
328 return ret; 328 return ret;
329} 329}
330 330
331static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
332{
333 int i;
334 int err;
335 struct ib_port_attr props;
336 union ib_gid tmp;
337
338 err = ib_query_port(device, port_num, &props);
339 if (err)
340 return err;
341
342 for (i = 0; i < props.gid_tbl_len; ++i) {
343 err = ib_query_gid(device, port_num, i, &tmp);
344 if (err)
345 return err;
346 if (!memcmp(&tmp, gid, sizeof tmp))
347 return 0;
348 }
349
350 return -EADDRNOTAVAIL;
351}
352
353static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 331static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
354{ 332{
355 dev_addr->dev_type = ARPHRD_INFINIBAND; 333 dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -371,13 +349,14 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
371 return ret; 349 return ret;
372} 350}
373 351
374static int cma_acquire_dev(struct rdma_id_private *id_priv) 352static int cma_acquire_dev(struct rdma_id_private *id_priv,
353 struct rdma_id_private *listen_id_priv)
375{ 354{
376 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 355 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
377 struct cma_device *cma_dev; 356 struct cma_device *cma_dev;
378 union ib_gid gid, iboe_gid; 357 union ib_gid gid, iboe_gid;
379 int ret = -ENODEV; 358 int ret = -ENODEV;
380 u8 port; 359 u8 port, found_port;
381 enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? 360 enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
382 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; 361 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
383 362
@@ -389,17 +368,39 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
389 iboe_addr_get_sgid(dev_addr, &iboe_gid); 368 iboe_addr_get_sgid(dev_addr, &iboe_gid);
390 memcpy(&gid, dev_addr->src_dev_addr + 369 memcpy(&gid, dev_addr->src_dev_addr +
391 rdma_addr_gid_offset(dev_addr), sizeof gid); 370 rdma_addr_gid_offset(dev_addr), sizeof gid);
371 if (listen_id_priv &&
372 rdma_port_get_link_layer(listen_id_priv->id.device,
373 listen_id_priv->id.port_num) == dev_ll) {
374 cma_dev = listen_id_priv->cma_dev;
375 port = listen_id_priv->id.port_num;
376 if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
377 rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
378 ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
379 &found_port, NULL);
380 else
381 ret = ib_find_cached_gid(cma_dev->device, &gid,
382 &found_port, NULL);
383
384 if (!ret && (port == found_port)) {
385 id_priv->id.port_num = found_port;
386 goto out;
387 }
388 }
392 list_for_each_entry(cma_dev, &dev_list, list) { 389 list_for_each_entry(cma_dev, &dev_list, list) {
393 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 390 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
391 if (listen_id_priv &&
392 listen_id_priv->cma_dev == cma_dev &&
393 listen_id_priv->id.port_num == port)
394 continue;
394 if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { 395 if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
395 if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && 396 if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
396 rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) 397 rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
397 ret = find_gid_port(cma_dev->device, &iboe_gid, port); 398 ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
398 else 399 else
399 ret = find_gid_port(cma_dev->device, &gid, port); 400 ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
400 401
401 if (!ret) { 402 if (!ret && (port == found_port)) {
402 id_priv->id.port_num = port; 403 id_priv->id.port_num = found_port;
403 goto out; 404 goto out;
404 } 405 }
405 } 406 }
@@ -1292,7 +1293,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1292 } 1293 }
1293 1294
1294 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1295 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1295 ret = cma_acquire_dev(conn_id); 1296 ret = cma_acquire_dev(conn_id, listen_id);
1296 if (ret) 1297 if (ret)
1297 goto err2; 1298 goto err2;
1298 1299
@@ -1451,7 +1452,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1451{ 1452{
1452 struct rdma_cm_id *new_cm_id; 1453 struct rdma_cm_id *new_cm_id;
1453 struct rdma_id_private *listen_id, *conn_id; 1454 struct rdma_id_private *listen_id, *conn_id;
1454 struct net_device *dev = NULL;
1455 struct rdma_cm_event event; 1455 struct rdma_cm_event event;
1456 int ret; 1456 int ret;
1457 struct ib_device_attr attr; 1457 struct ib_device_attr attr;
@@ -1481,7 +1481,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1481 goto out; 1481 goto out;
1482 } 1482 }
1483 1483
1484 ret = cma_acquire_dev(conn_id); 1484 ret = cma_acquire_dev(conn_id, listen_id);
1485 if (ret) { 1485 if (ret) {
1486 mutex_unlock(&conn_id->handler_mutex); 1486 mutex_unlock(&conn_id->handler_mutex);
1487 rdma_destroy_id(new_cm_id); 1487 rdma_destroy_id(new_cm_id);
@@ -1529,8 +1529,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1529 cma_deref_id(conn_id); 1529 cma_deref_id(conn_id);
1530 1530
1531out: 1531out:
1532 if (dev)
1533 dev_put(dev);
1534 mutex_unlock(&listen_id->handler_mutex); 1532 mutex_unlock(&listen_id->handler_mutex);
1535 return ret; 1533 return ret;
1536} 1534}
@@ -2050,7 +2048,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
2050 goto out; 2048 goto out;
2051 2049
2052 if (!status && !id_priv->cma_dev) 2050 if (!status && !id_priv->cma_dev)
2053 status = cma_acquire_dev(id_priv); 2051 status = cma_acquire_dev(id_priv, NULL);
2054 2052
2055 if (status) { 2053 if (status) {
2056 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2054 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@ -2547,7 +2545,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2547 if (ret) 2545 if (ret)
2548 goto err1; 2546 goto err1;
2549 2547
2550 ret = cma_acquire_dev(id_priv); 2548 ret = cma_acquire_dev(id_priv, NULL);
2551 if (ret) 2549 if (ret)
2552 goto err1; 2550 goto err1;
2553 } 2551 }
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index da06abde9e0d..a1e9cba84944 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -148,7 +148,7 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
148 list_for_each_entry(client, &client_list, list) { 148 list_for_each_entry(client, &client_list, list) {
149 if (client->index == index) { 149 if (client->index == index) {
150 if (op < 0 || op >= client->nops || 150 if (op < 0 || op >= client->nops ||
151 !client->cb_table[RDMA_NL_GET_OP(op)].dump) 151 !client->cb_table[op].dump)
152 return -EINVAL; 152 return -EINVAL;
153 153
154 { 154 {
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index cde1e7b5b85d..faad2caf22b1 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -612,6 +612,7 @@ static ssize_t show_node_type(struct device *device,
612 switch (dev->node_type) { 612 switch (dev->node_type) {
613 case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); 613 case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
614 case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); 614 case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
615 case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
615 case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); 616 case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
616 case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); 617 case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
617 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); 618 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index b0f189be543b..ab8b1c30b36b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -57,7 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
57static unsigned int max_backlog = 1024; 57static unsigned int max_backlog = 1024;
58 58
59static struct ctl_table_header *ucma_ctl_table_hdr; 59static struct ctl_table_header *ucma_ctl_table_hdr;
60static ctl_table ucma_ctl_table[] = { 60static struct ctl_table ucma_ctl_table[] = {
61 { 61 {
62 .procname = "max_backlog", 62 .procname = "max_backlog",
63 .data = &max_backlog, 63 .data = &max_backlog,
@@ -271,7 +271,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
271 goto out; 271 goto out;
272 } 272 }
273 ctx->backlog--; 273 ctx->backlog--;
274 } else if (!ctx->uid) { 274 } else if (!ctx->uid || ctx->cm_id != cm_id) {
275 /* 275 /*
276 * We ignore events for new connections until userspace has set 276 * We ignore events for new connections until userspace has set
277 * their context. This can only happen if an error occurs on a 277 * their context. This can only happen if an error occurs on a
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index d8f9c6c272d7..bdc842e9faef 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -47,6 +47,14 @@
47#include <rdma/ib_umem.h> 47#include <rdma/ib_umem.h>
48#include <rdma/ib_user_verbs.h> 48#include <rdma/ib_user_verbs.h>
49 49
50#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
51 do { \
52 (udata)->inbuf = (void __user *) (ibuf); \
53 (udata)->outbuf = (void __user *) (obuf); \
54 (udata)->inlen = (ilen); \
55 (udata)->outlen = (olen); \
56 } while (0)
57
50/* 58/*
51 * Our lifetime rules for these structs are the following: 59 * Our lifetime rules for these structs are the following:
52 * 60 *
@@ -178,6 +186,22 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
178 struct ib_event *event); 186 struct ib_event *event);
179void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); 187void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
180 188
189struct ib_uverbs_flow_spec {
190 union {
191 union {
192 struct ib_uverbs_flow_spec_hdr hdr;
193 struct {
194 __u32 type;
195 __u16 size;
196 __u16 reserved;
197 };
198 };
199 struct ib_uverbs_flow_spec_eth eth;
200 struct ib_uverbs_flow_spec_ipv4 ipv4;
201 struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
202 };
203};
204
181#define IB_UVERBS_DECLARE_CMD(name) \ 205#define IB_UVERBS_DECLARE_CMD(name) \
182 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ 206 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
183 const char __user *buf, int in_len, \ 207 const char __user *buf, int in_len, \
@@ -217,9 +241,13 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
217IB_UVERBS_DECLARE_CMD(create_xsrq); 241IB_UVERBS_DECLARE_CMD(create_xsrq);
218IB_UVERBS_DECLARE_CMD(open_xrcd); 242IB_UVERBS_DECLARE_CMD(open_xrcd);
219IB_UVERBS_DECLARE_CMD(close_xrcd); 243IB_UVERBS_DECLARE_CMD(close_xrcd);
220#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 244
221IB_UVERBS_DECLARE_CMD(create_flow); 245#define IB_UVERBS_DECLARE_EX_CMD(name) \
222IB_UVERBS_DECLARE_CMD(destroy_flow); 246 int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
223#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 247 struct ib_udata *ucore, \
248 struct ib_udata *uhw)
249
250IB_UVERBS_DECLARE_EX_CMD(create_flow);
251IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
224 252
225#endif /* UVERBS_H */ 253#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2f0f01b70e3b..65f6e7dc380c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -54,17 +54,7 @@ static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
54static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; 54static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
55static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; 55static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
56static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; 56static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
57#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
58static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; 57static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
59#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
60
61#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
62 do { \
63 (udata)->inbuf = (void __user *) (ibuf); \
64 (udata)->outbuf = (void __user *) (obuf); \
65 (udata)->inlen = (ilen); \
66 (udata)->outlen = (olen); \
67 } while (0)
68 58
69/* 59/*
70 * The ib_uobject locking scheme is as follows: 60 * The ib_uobject locking scheme is as follows:
@@ -939,13 +929,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
939 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) 929 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
940 return -EINVAL; 930 return -EINVAL;
941 931
942 /* 932 ret = ib_check_mr_access(cmd.access_flags);
943 * Local write permission is required if remote write or 933 if (ret)
944 * remote atomic permission is also requested. 934 return ret;
945 */
946 if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
947 !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
948 return -EINVAL;
949 935
950 uobj = kmalloc(sizeof *uobj, GFP_KERNEL); 936 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
951 if (!uobj) 937 if (!uobj)
@@ -2128,6 +2114,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2128 } 2114 }
2129 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; 2115 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
2130 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; 2116 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
2117 if (next->opcode == IB_WR_SEND_WITH_IMM)
2118 next->ex.imm_data =
2119 (__be32 __force) user_wr->ex.imm_data;
2131 } else { 2120 } else {
2132 switch (next->opcode) { 2121 switch (next->opcode) {
2133 case IB_WR_RDMA_WRITE_WITH_IMM: 2122 case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -2601,8 +2590,7 @@ out_put:
2601 return ret ? ret : in_len; 2590 return ret ? ret : in_len;
2602} 2591}
2603 2592
2604#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 2593static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
2605static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
2606 union ib_flow_spec *ib_spec) 2594 union ib_flow_spec *ib_spec)
2607{ 2595{
2608 ib_spec->type = kern_spec->type; 2596 ib_spec->type = kern_spec->type;
@@ -2642,28 +2630,31 @@ static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
2642 return 0; 2630 return 0;
2643} 2631}
2644 2632
2645ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file, 2633int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
2646 const char __user *buf, int in_len, 2634 struct ib_udata *ucore,
2647 int out_len) 2635 struct ib_udata *uhw)
2648{ 2636{
2649 struct ib_uverbs_create_flow cmd; 2637 struct ib_uverbs_create_flow cmd;
2650 struct ib_uverbs_create_flow_resp resp; 2638 struct ib_uverbs_create_flow_resp resp;
2651 struct ib_uobject *uobj; 2639 struct ib_uobject *uobj;
2652 struct ib_flow *flow_id; 2640 struct ib_flow *flow_id;
2653 struct ib_kern_flow_attr *kern_flow_attr; 2641 struct ib_uverbs_flow_attr *kern_flow_attr;
2654 struct ib_flow_attr *flow_attr; 2642 struct ib_flow_attr *flow_attr;
2655 struct ib_qp *qp; 2643 struct ib_qp *qp;
2656 int err = 0; 2644 int err = 0;
2657 void *kern_spec; 2645 void *kern_spec;
2658 void *ib_spec; 2646 void *ib_spec;
2659 int i; 2647 int i;
2660 int kern_attr_size;
2661 2648
2662 if (out_len < sizeof(resp)) 2649 if (ucore->outlen < sizeof(resp))
2663 return -ENOSPC; 2650 return -ENOSPC;
2664 2651
2665 if (copy_from_user(&cmd, buf, sizeof(cmd))) 2652 err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
2666 return -EFAULT; 2653 if (err)
2654 return err;
2655
2656 ucore->inbuf += sizeof(cmd);
2657 ucore->inlen -= sizeof(cmd);
2667 2658
2668 if (cmd.comp_mask) 2659 if (cmd.comp_mask)
2669 return -EINVAL; 2660 return -EINVAL;
@@ -2672,32 +2663,27 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2672 !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) 2663 !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
2673 return -EPERM; 2664 return -EPERM;
2674 2665
2675 if (cmd.flow_attr.num_of_specs < 0 || 2666 if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
2676 cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
2677 return -EINVAL; 2667 return -EINVAL;
2678 2668
2679 kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - 2669 if (cmd.flow_attr.size > ucore->inlen ||
2680 sizeof(struct ib_uverbs_cmd_hdr_ex); 2670 cmd.flow_attr.size >
2681 2671 (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
2682 if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||
2683 kern_attr_size < 0 || kern_attr_size >
2684 (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
2685 return -EINVAL; 2672 return -EINVAL;
2686 2673
2687 if (cmd.flow_attr.num_of_specs) { 2674 if (cmd.flow_attr.num_of_specs) {
2688 kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); 2675 kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
2676 GFP_KERNEL);
2689 if (!kern_flow_attr) 2677 if (!kern_flow_attr)
2690 return -ENOMEM; 2678 return -ENOMEM;
2691 2679
2692 memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); 2680 memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
2693 if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd), 2681 err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
2694 kern_attr_size)) { 2682 cmd.flow_attr.size);
2695 err = -EFAULT; 2683 if (err)
2696 goto err_free_attr; 2684 goto err_free_attr;
2697 }
2698 } else { 2685 } else {
2699 kern_flow_attr = &cmd.flow_attr; 2686 kern_flow_attr = &cmd.flow_attr;
2700 kern_attr_size = sizeof(cmd.flow_attr);
2701 } 2687 }
2702 2688
2703 uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); 2689 uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
@@ -2714,7 +2700,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2714 goto err_uobj; 2700 goto err_uobj;
2715 } 2701 }
2716 2702
2717 flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); 2703 flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
2718 if (!flow_attr) { 2704 if (!flow_attr) {
2719 err = -ENOMEM; 2705 err = -ENOMEM;
2720 goto err_put; 2706 goto err_put;
@@ -2729,19 +2715,22 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2729 2715
2730 kern_spec = kern_flow_attr + 1; 2716 kern_spec = kern_flow_attr + 1;
2731 ib_spec = flow_attr + 1; 2717 ib_spec = flow_attr + 1;
2732 for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) { 2718 for (i = 0; i < flow_attr->num_of_specs &&
2719 cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
2720 cmd.flow_attr.size >=
2721 ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
2733 err = kern_spec_to_ib_spec(kern_spec, ib_spec); 2722 err = kern_spec_to_ib_spec(kern_spec, ib_spec);
2734 if (err) 2723 if (err)
2735 goto err_free; 2724 goto err_free;
2736 flow_attr->size += 2725 flow_attr->size +=
2737 ((union ib_flow_spec *) ib_spec)->size; 2726 ((union ib_flow_spec *) ib_spec)->size;
2738 kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size; 2727 cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
2739 kern_spec += ((struct ib_kern_spec *) kern_spec)->size; 2728 kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
2740 ib_spec += ((union ib_flow_spec *) ib_spec)->size; 2729 ib_spec += ((union ib_flow_spec *) ib_spec)->size;
2741 } 2730 }
2742 if (kern_attr_size) { 2731 if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
2743 pr_warn("create flow failed, %d bytes left from uverb cmd\n", 2732 pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
2744 kern_attr_size); 2733 i, cmd.flow_attr.size);
2745 goto err_free; 2734 goto err_free;
2746 } 2735 }
2747 flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); 2736 flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
@@ -2760,11 +2749,10 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2760 memset(&resp, 0, sizeof(resp)); 2749 memset(&resp, 0, sizeof(resp));
2761 resp.flow_handle = uobj->id; 2750 resp.flow_handle = uobj->id;
2762 2751
2763 if (copy_to_user((void __user *)(unsigned long) cmd.response, 2752 err = ib_copy_to_udata(ucore,
2764 &resp, sizeof(resp))) { 2753 &resp, sizeof(resp));
2765 err = -EFAULT; 2754 if (err)
2766 goto err_copy; 2755 goto err_copy;
2767 }
2768 2756
2769 put_qp_read(qp); 2757 put_qp_read(qp);
2770 mutex_lock(&file->mutex); 2758 mutex_lock(&file->mutex);
@@ -2777,7 +2765,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2777 kfree(flow_attr); 2765 kfree(flow_attr);
2778 if (cmd.flow_attr.num_of_specs) 2766 if (cmd.flow_attr.num_of_specs)
2779 kfree(kern_flow_attr); 2767 kfree(kern_flow_attr);
2780 return in_len; 2768 return 0;
2781err_copy: 2769err_copy:
2782 idr_remove_uobj(&ib_uverbs_rule_idr, uobj); 2770 idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
2783destroy_flow: 2771destroy_flow:
@@ -2794,16 +2782,18 @@ err_free_attr:
2794 return err; 2782 return err;
2795} 2783}
2796 2784
2797ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file, 2785int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
2798 const char __user *buf, int in_len, 2786 struct ib_udata *ucore,
2799 int out_len) { 2787 struct ib_udata *uhw)
2788{
2800 struct ib_uverbs_destroy_flow cmd; 2789 struct ib_uverbs_destroy_flow cmd;
2801 struct ib_flow *flow_id; 2790 struct ib_flow *flow_id;
2802 struct ib_uobject *uobj; 2791 struct ib_uobject *uobj;
2803 int ret; 2792 int ret;
2804 2793
2805 if (copy_from_user(&cmd, buf, sizeof(cmd))) 2794 ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
2806 return -EFAULT; 2795 if (ret)
2796 return ret;
2807 2797
2808 uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, 2798 uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
2809 file->ucontext); 2799 file->ucontext);
@@ -2825,9 +2815,8 @@ ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
2825 2815
2826 put_uobj(uobj); 2816 put_uobj(uobj);
2827 2817
2828 return ret ? ret : in_len; 2818 return ret;
2829} 2819}
2830#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
2831 2820
2832static int __uverbs_create_xsrq(struct ib_uverbs_file *file, 2821static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
2833 struct ib_uverbs_create_xsrq *cmd, 2822 struct ib_uverbs_create_xsrq *cmd,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2df31f68ea09..34386943ebcf 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -115,10 +115,13 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
115 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, 115 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
116 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, 116 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
117 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, 117 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
118#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 118};
119 [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow, 119
120 [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow 120static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
121#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 121 struct ib_udata *ucore,
122 struct ib_udata *uhw) = {
123 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
124 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow
122}; 125};
123 126
124static void ib_uverbs_add_one(struct ib_device *device); 127static void ib_uverbs_add_one(struct ib_device *device);
@@ -589,6 +592,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
589{ 592{
590 struct ib_uverbs_file *file = filp->private_data; 593 struct ib_uverbs_file *file = filp->private_data;
591 struct ib_uverbs_cmd_hdr hdr; 594 struct ib_uverbs_cmd_hdr hdr;
595 __u32 flags;
592 596
593 if (count < sizeof hdr) 597 if (count < sizeof hdr)
594 return -EINVAL; 598 return -EINVAL;
@@ -596,45 +600,105 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
596 if (copy_from_user(&hdr, buf, sizeof hdr)) 600 if (copy_from_user(&hdr, buf, sizeof hdr))
597 return -EFAULT; 601 return -EFAULT;
598 602
599 if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || 603 flags = (hdr.command &
600 !uverbs_cmd_table[hdr.command]) 604 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
601 return -EINVAL;
602 605
603 if (!file->ucontext && 606 if (!flags) {
604 hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) 607 __u32 command;
605 return -EINVAL;
606 608
607 if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) 609 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
608 return -ENOSYS; 610 IB_USER_VERBS_CMD_COMMAND_MASK))
611 return -EINVAL;
609 612
610#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 613 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
611 if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) {
612 struct ib_uverbs_cmd_hdr_ex hdr_ex;
613 614
614 if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex))) 615 if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
615 return -EFAULT; 616 !uverbs_cmd_table[command])
617 return -EINVAL;
616 618
617 if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count) 619 if (!file->ucontext &&
620 command != IB_USER_VERBS_CMD_GET_CONTEXT)
618 return -EINVAL; 621 return -EINVAL;
619 622
620 return uverbs_cmd_table[hdr.command](file, 623 if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command)))
621 buf + sizeof(hdr_ex), 624 return -ENOSYS;
622 (hdr_ex.in_words + 625
623 hdr_ex.provider_in_words) * 4,
624 (hdr_ex.out_words +
625 hdr_ex.provider_out_words) * 4);
626 } else {
627#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
628 if (hdr.in_words * 4 != count) 626 if (hdr.in_words * 4 != count)
629 return -EINVAL; 627 return -EINVAL;
630 628
631 return uverbs_cmd_table[hdr.command](file, 629 return uverbs_cmd_table[command](file,
632 buf + sizeof(hdr), 630 buf + sizeof(hdr),
633 hdr.in_words * 4, 631 hdr.in_words * 4,
634 hdr.out_words * 4); 632 hdr.out_words * 4);
635#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 633
634 } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
635 __u32 command;
636
637 struct ib_uverbs_ex_cmd_hdr ex_hdr;
638 struct ib_udata ucore;
639 struct ib_udata uhw;
640 int err;
641 size_t written_count = count;
642
643 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
644 IB_USER_VERBS_CMD_COMMAND_MASK))
645 return -EINVAL;
646
647 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
648
649 if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
650 !uverbs_ex_cmd_table[command])
651 return -ENOSYS;
652
653 if (!file->ucontext)
654 return -EINVAL;
655
656 if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
657 return -ENOSYS;
658
659 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
660 return -EINVAL;
661
662 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
663 return -EFAULT;
664
665 count -= sizeof(hdr) + sizeof(ex_hdr);
666 buf += sizeof(hdr) + sizeof(ex_hdr);
667
668 if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
669 return -EINVAL;
670
671 if (ex_hdr.response) {
672 if (!hdr.out_words && !ex_hdr.provider_out_words)
673 return -EINVAL;
674 } else {
675 if (hdr.out_words || ex_hdr.provider_out_words)
676 return -EINVAL;
677 }
678
679 INIT_UDATA(&ucore,
680 (hdr.in_words) ? buf : 0,
681 (unsigned long)ex_hdr.response,
682 hdr.in_words * 8,
683 hdr.out_words * 8);
684
685 INIT_UDATA(&uhw,
686 (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
687 (ex_hdr.provider_out_words) ? (unsigned long)ex_hdr.response + ucore.outlen : 0,
688 ex_hdr.provider_in_words * 8,
689 ex_hdr.provider_out_words * 8);
690
691 err = uverbs_ex_cmd_table[command](file,
692 &ucore,
693 &uhw);
694
695 if (err)
696 return err;
697
698 return written_count;
636 } 699 }
637#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 700
701 return -ENOSYS;
638} 702}
639 703
640static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) 704static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index a321df28bab2..d4f6ddf72ffa 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -114,6 +114,8 @@ rdma_node_get_transport(enum rdma_node_type node_type)
114 return RDMA_TRANSPORT_IB; 114 return RDMA_TRANSPORT_IB;
115 case RDMA_NODE_RNIC: 115 case RDMA_NODE_RNIC:
116 return RDMA_TRANSPORT_IWARP; 116 return RDMA_TRANSPORT_IWARP;
117 case RDMA_NODE_USNIC:
118 return RDMA_TRANSPORT_USNIC;
117 default: 119 default:
118 BUG(); 120 BUG();
119 return 0; 121 return 0;
@@ -130,6 +132,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
130 case RDMA_TRANSPORT_IB: 132 case RDMA_TRANSPORT_IB:
131 return IB_LINK_LAYER_INFINIBAND; 133 return IB_LINK_LAYER_INFINIBAND;
132 case RDMA_TRANSPORT_IWARP: 134 case RDMA_TRANSPORT_IWARP:
135 case RDMA_TRANSPORT_USNIC:
133 return IB_LINK_LAYER_ETHERNET; 136 return IB_LINK_LAYER_ETHERNET;
134 default: 137 default:
135 return IB_LINK_LAYER_UNSPECIFIED; 138 return IB_LINK_LAYER_UNSPECIFIED;
@@ -958,6 +961,11 @@ EXPORT_SYMBOL(ib_resize_cq);
958struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) 961struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
959{ 962{
960 struct ib_mr *mr; 963 struct ib_mr *mr;
964 int err;
965
966 err = ib_check_mr_access(mr_access_flags);
967 if (err)
968 return ERR_PTR(err);
961 969
962 mr = pd->device->get_dma_mr(pd, mr_access_flags); 970 mr = pd->device->get_dma_mr(pd, mr_access_flags);
963 971
@@ -980,6 +988,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
980 u64 *iova_start) 988 u64 *iova_start)
981{ 989{
982 struct ib_mr *mr; 990 struct ib_mr *mr;
991 int err;
992
993 err = ib_check_mr_access(mr_access_flags);
994 if (err)
995 return ERR_PTR(err);
983 996
984 if (!pd->device->reg_phys_mr) 997 if (!pd->device->reg_phys_mr)
985 return ERR_PTR(-ENOSYS); 998 return ERR_PTR(-ENOSYS);
@@ -1010,6 +1023,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,
1010 struct ib_pd *old_pd; 1023 struct ib_pd *old_pd;
1011 int ret; 1024 int ret;
1012 1025
1026 ret = ib_check_mr_access(mr_access_flags);
1027 if (ret)
1028 return ret;
1029
1013 if (!mr->device->rereg_phys_mr) 1030 if (!mr->device->rereg_phys_mr)
1014 return -ENOSYS; 1031 return -ENOSYS;
1015 1032
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 33d2cc6ab562..4a033853312e 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -602,10 +602,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
602 rdev->lldi.vr->qp.size, 602 rdev->lldi.vr->qp.size,
603 rdev->lldi.vr->cq.start, 603 rdev->lldi.vr->cq.start,
604 rdev->lldi.vr->cq.size); 604 rdev->lldi.vr->cq.size);
605 PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " 605 PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
606 "qpmask 0x%x cqshift %lu cqmask 0x%x\n", 606 "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
607 (unsigned)pci_resource_len(rdev->lldi.pdev, 2), 607 (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
608 (void *)(unsigned long)pci_resource_start(rdev->lldi.pdev, 2), 608 (u64)pci_resource_start(rdev->lldi.pdev, 2),
609 rdev->lldi.db_reg, 609 rdev->lldi.db_reg,
610 rdev->lldi.gts_reg, 610 rdev->lldi.gts_reg,
611 rdev->qpshift, rdev->qpmask, 611 rdev->qpshift, rdev->qpmask,
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index f5cb13b21445..cc04b7ba3488 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -280,9 +280,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
280 int j; 280 int j;
281 int ret; 281 int ret;
282 282
283 ret = get_user_pages(current, current->mm, addr, 283 ret = get_user_pages_fast(addr, npages, 0, pages);
284 npages, 0, 1, pages, NULL);
285
286 if (ret != npages) { 284 if (ret != npages) {
287 int i; 285 int i;
288 286
@@ -811,10 +809,7 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
811 while (dim) { 809 while (dim) {
812 const int mxp = 8; 810 const int mxp = 8;
813 811
814 down_write(&current->mm->mmap_sem);
815 ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); 812 ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
816 up_write(&current->mm->mmap_sem);
817
818 if (ret <= 0) 813 if (ret <= 0)
819 goto done_unlock; 814 goto done_unlock;
820 else { 815 else {
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index d5e60f44ba5a..66dbf8062374 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -324,7 +324,7 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
324 u32 i; 324 u32 i;
325 325
326 i = cq->mcq.cons_index; 326 i = cq->mcq.cons_index;
327 while (get_sw_cqe(cq, i & cq->ibcq.cqe)) 327 while (get_sw_cqe(cq, i))
328 ++i; 328 ++i;
329 329
330 return i - cq->mcq.cons_index; 330 return i - cq->mcq.cons_index;
@@ -365,7 +365,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
365 365
366 mutex_lock(&cq->resize_mutex); 366 mutex_lock(&cq->resize_mutex);
367 367
368 if (entries < 1 || entries > dev->dev->caps.max_cqes) { 368 if (entries < 1) {
369 err = -EINVAL; 369 err = -EINVAL;
370 goto out; 370 goto out;
371 } 371 }
@@ -376,6 +376,11 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
376 goto out; 376 goto out;
377 } 377 }
378 378
379 if (entries > dev->dev->caps.max_cqes) {
380 err = -EINVAL;
381 goto out;
382 }
383
379 if (ibcq->uobject) { 384 if (ibcq->uobject) {
380 err = mlx4_alloc_resize_umem(dev, cq, entries, udata); 385 err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
381 if (err) 386 if (err)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index f0612645de99..1aad9b3e6bdd 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1691,11 +1691,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1691 ibdev->ib_dev.create_flow = mlx4_ib_create_flow; 1691 ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
1692 ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; 1692 ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
1693 1693
1694#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 1694 ibdev->ib_dev.uverbs_ex_cmd_mask |=
1695 ibdev->ib_dev.uverbs_cmd_mask |= 1695 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
1696 (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) | 1696 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
1697 (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
1698#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
1699 } 1697 }
1700 1698
1701 mlx4_ib_alloc_eqs(dev, ibdev); 1699 mlx4_ib_alloc_eqs(dev, ibdev);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 344ab03948a3..b72627429745 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -556,7 +556,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
556 goto err_db; 556 goto err_db;
557 } 557 }
558 mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); 558 mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
559 (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; 559 (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
560 560
561 *index = to_mucontext(context)->uuari.uars[0].index; 561 *index = to_mucontext(context)->uuari.uars[0].index;
562 562
@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
620 } 620 }
621 mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); 621 mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
622 622
623 (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT; 623 (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
624 *index = dev->mdev.priv.uuari.uars[0].index; 624 *index = dev->mdev.priv.uuari.uars[0].index;
625 625
626 return 0; 626 return 0;
@@ -653,8 +653,11 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
653 int eqn; 653 int eqn;
654 int err; 654 int err;
655 655
656 if (entries < 0)
657 return ERR_PTR(-EINVAL);
658
656 entries = roundup_pow_of_two(entries + 1); 659 entries = roundup_pow_of_two(entries + 1);
657 if (entries < 1 || entries > dev->mdev.caps.max_cqes) 660 if (entries > dev->mdev.caps.max_cqes)
658 return ERR_PTR(-EINVAL); 661 return ERR_PTR(-EINVAL);
659 662
660 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 663 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -747,17 +750,9 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq)
747 return 0; 750 return 0;
748} 751}
749 752
750static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq, 753static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
751 u32 rsn)
752{ 754{
753 u32 lrsn; 755 return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
754
755 if (srq)
756 lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
757 else
758 lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
759
760 return rsn == lrsn;
761} 756}
762 757
763void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) 758void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
@@ -787,8 +782,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
787 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { 782 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
788 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); 783 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
789 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; 784 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
790 if (is_equal_rsn(cqe64, srq, rsn)) { 785 if (is_equal_rsn(cqe64, rsn)) {
791 if (srq) 786 if (srq && (ntohl(cqe64->srqn) & 0xffffff))
792 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter)); 787 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
793 ++nfreed; 788 ++nfreed;
794 } else if (nfreed) { 789 } else if (nfreed) {
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b1a6cb3a2809..306534109627 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -745,7 +745,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
745 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 745 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
746 seg->start_addr = 0; 746 seg->start_addr = 0;
747 747
748 err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in)); 748 err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
749 NULL, NULL, NULL);
749 if (err) { 750 if (err) {
750 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); 751 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
751 goto err_in; 752 goto err_in;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 836be9157242..4c134d93d4fc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -262,6 +262,9 @@ struct mlx5_ib_mr {
262 int npages; 262 int npages;
263 struct completion done; 263 struct completion done;
264 enum ib_wc_status status; 264 enum ib_wc_status status;
265 struct mlx5_ib_dev *dev;
266 struct mlx5_create_mkey_mbox_out out;
267 unsigned long start;
265}; 268};
266 269
267struct mlx5_ib_fast_reg_page_list { 270struct mlx5_ib_fast_reg_page_list {
@@ -323,6 +326,7 @@ struct mlx5_cache_ent {
323 struct mlx5_ib_dev *dev; 326 struct mlx5_ib_dev *dev;
324 struct work_struct work; 327 struct work_struct work;
325 struct delayed_work dwork; 328 struct delayed_work dwork;
329 int pending;
326}; 330};
327 331
328struct mlx5_mr_cache { 332struct mlx5_mr_cache {
@@ -358,6 +362,8 @@ struct mlx5_ib_dev {
358 spinlock_t mr_lock; 362 spinlock_t mr_lock;
359 struct mlx5_ib_resources devr; 363 struct mlx5_ib_resources devr;
360 struct mlx5_mr_cache cache; 364 struct mlx5_mr_cache cache;
365 struct timer_list delay_timer;
366 int fill_delay;
361}; 367};
362 368
363static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) 369static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3453580b1eb2..039c3e40fcb4 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -35,11 +35,12 @@
35#include <linux/random.h> 35#include <linux/random.h>
36#include <linux/debugfs.h> 36#include <linux/debugfs.h>
37#include <linux/export.h> 37#include <linux/export.h>
38#include <linux/delay.h>
38#include <rdma/ib_umem.h> 39#include <rdma/ib_umem.h>
39#include "mlx5_ib.h" 40#include "mlx5_ib.h"
40 41
41enum { 42enum {
42 DEF_CACHE_SIZE = 10, 43 MAX_PENDING_REG_MR = 8,
43}; 44};
44 45
45enum { 46enum {
@@ -63,6 +64,51 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
63 return order - cache->ent[0].order; 64 return order - cache->ent[0].order;
64} 65}
65 66
67static void reg_mr_callback(int status, void *context)
68{
69 struct mlx5_ib_mr *mr = context;
70 struct mlx5_ib_dev *dev = mr->dev;
71 struct mlx5_mr_cache *cache = &dev->cache;
72 int c = order2idx(dev, mr->order);
73 struct mlx5_cache_ent *ent = &cache->ent[c];
74 u8 key;
75 unsigned long flags;
76
77 spin_lock_irqsave(&ent->lock, flags);
78 ent->pending--;
79 spin_unlock_irqrestore(&ent->lock, flags);
80 if (status) {
81 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
82 kfree(mr);
83 dev->fill_delay = 1;
84 mod_timer(&dev->delay_timer, jiffies + HZ);
85 return;
86 }
87
88 if (mr->out.hdr.status) {
89 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
90 mr->out.hdr.status,
91 be32_to_cpu(mr->out.hdr.syndrome));
92 kfree(mr);
93 dev->fill_delay = 1;
94 mod_timer(&dev->delay_timer, jiffies + HZ);
95 return;
96 }
97
98 spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
99 key = dev->mdev.priv.mkey_key++;
100 spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
101 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
102
103 cache->last_add = jiffies;
104
105 spin_lock_irqsave(&ent->lock, flags);
106 list_add_tail(&mr->list, &ent->head);
107 ent->cur++;
108 ent->size++;
109 spin_unlock_irqrestore(&ent->lock, flags);
110}
111
66static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 112static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
67{ 113{
68 struct mlx5_mr_cache *cache = &dev->cache; 114 struct mlx5_mr_cache *cache = &dev->cache;
@@ -78,36 +124,39 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
78 return -ENOMEM; 124 return -ENOMEM;
79 125
80 for (i = 0; i < num; i++) { 126 for (i = 0; i < num; i++) {
127 if (ent->pending >= MAX_PENDING_REG_MR) {
128 err = -EAGAIN;
129 break;
130 }
131
81 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 132 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
82 if (!mr) { 133 if (!mr) {
83 err = -ENOMEM; 134 err = -ENOMEM;
84 goto out; 135 break;
85 } 136 }
86 mr->order = ent->order; 137 mr->order = ent->order;
87 mr->umred = 1; 138 mr->umred = 1;
139 mr->dev = dev;
88 in->seg.status = 1 << 6; 140 in->seg.status = 1 << 6;
89 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 141 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
90 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 142 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
91 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 143 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
92 in->seg.log2_page_size = 12; 144 in->seg.log2_page_size = 12;
93 145
146 spin_lock_irq(&ent->lock);
147 ent->pending++;
148 spin_unlock_irq(&ent->lock);
149 mr->start = jiffies;
94 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, 150 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
95 sizeof(*in)); 151 sizeof(*in), reg_mr_callback,
152 mr, &mr->out);
96 if (err) { 153 if (err) {
97 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 154 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
98 kfree(mr); 155 kfree(mr);
99 goto out; 156 break;
100 } 157 }
101 cache->last_add = jiffies;
102
103 spin_lock(&ent->lock);
104 list_add_tail(&mr->list, &ent->head);
105 ent->cur++;
106 ent->size++;
107 spin_unlock(&ent->lock);
108 } 158 }
109 159
110out:
111 kfree(in); 160 kfree(in);
112 return err; 161 return err;
113} 162}
@@ -121,16 +170,16 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
121 int i; 170 int i;
122 171
123 for (i = 0; i < num; i++) { 172 for (i = 0; i < num; i++) {
124 spin_lock(&ent->lock); 173 spin_lock_irq(&ent->lock);
125 if (list_empty(&ent->head)) { 174 if (list_empty(&ent->head)) {
126 spin_unlock(&ent->lock); 175 spin_unlock_irq(&ent->lock);
127 return; 176 return;
128 } 177 }
129 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 178 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
130 list_del(&mr->list); 179 list_del(&mr->list);
131 ent->cur--; 180 ent->cur--;
132 ent->size--; 181 ent->size--;
133 spin_unlock(&ent->lock); 182 spin_unlock_irq(&ent->lock);
134 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 183 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
135 if (err) 184 if (err)
136 mlx5_ib_warn(dev, "failed destroy mkey\n"); 185 mlx5_ib_warn(dev, "failed destroy mkey\n");
@@ -162,9 +211,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
162 return -EINVAL; 211 return -EINVAL;
163 212
164 if (var > ent->size) { 213 if (var > ent->size) {
165 err = add_keys(dev, c, var - ent->size); 214 do {
166 if (err) 215 err = add_keys(dev, c, var - ent->size);
167 return err; 216 if (err && err != -EAGAIN)
217 return err;
218
219 usleep_range(3000, 5000);
220 } while (err);
168 } else if (var < ent->size) { 221 } else if (var < ent->size) {
169 remove_keys(dev, c, ent->size - var); 222 remove_keys(dev, c, ent->size - var);
170 } 223 }
@@ -280,23 +333,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
280 struct mlx5_ib_dev *dev = ent->dev; 333 struct mlx5_ib_dev *dev = ent->dev;
281 struct mlx5_mr_cache *cache = &dev->cache; 334 struct mlx5_mr_cache *cache = &dev->cache;
282 int i = order2idx(dev, ent->order); 335 int i = order2idx(dev, ent->order);
336 int err;
283 337
284 if (cache->stopped) 338 if (cache->stopped)
285 return; 339 return;
286 340
287 ent = &dev->cache.ent[i]; 341 ent = &dev->cache.ent[i];
288 if (ent->cur < 2 * ent->limit) { 342 if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
289 add_keys(dev, i, 1); 343 err = add_keys(dev, i, 1);
290 if (ent->cur < 2 * ent->limit) 344 if (ent->cur < 2 * ent->limit) {
291 queue_work(cache->wq, &ent->work); 345 if (err == -EAGAIN) {
346 mlx5_ib_dbg(dev, "returned eagain, order %d\n",
347 i + 2);
348 queue_delayed_work(cache->wq, &ent->dwork,
349 msecs_to_jiffies(3));
350 } else if (err) {
351 mlx5_ib_warn(dev, "command failed order %d, err %d\n",
352 i + 2, err);
353 queue_delayed_work(cache->wq, &ent->dwork,
354 msecs_to_jiffies(1000));
355 } else {
356 queue_work(cache->wq, &ent->work);
357 }
358 }
292 } else if (ent->cur > 2 * ent->limit) { 359 } else if (ent->cur > 2 * ent->limit) {
293 if (!someone_adding(cache) && 360 if (!someone_adding(cache) &&
294 time_after(jiffies, cache->last_add + 60 * HZ)) { 361 time_after(jiffies, cache->last_add + 300 * HZ)) {
295 remove_keys(dev, i, 1); 362 remove_keys(dev, i, 1);
296 if (ent->cur > ent->limit) 363 if (ent->cur > ent->limit)
297 queue_work(cache->wq, &ent->work); 364 queue_work(cache->wq, &ent->work);
298 } else { 365 } else {
299 queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ); 366 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
300 } 367 }
301 } 368 }
302} 369}
@@ -336,18 +403,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
336 403
337 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 404 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
338 405
339 spin_lock(&ent->lock); 406 spin_lock_irq(&ent->lock);
340 if (!list_empty(&ent->head)) { 407 if (!list_empty(&ent->head)) {
341 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 408 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
342 list); 409 list);
343 list_del(&mr->list); 410 list_del(&mr->list);
344 ent->cur--; 411 ent->cur--;
345 spin_unlock(&ent->lock); 412 spin_unlock_irq(&ent->lock);
346 if (ent->cur < ent->limit) 413 if (ent->cur < ent->limit)
347 queue_work(cache->wq, &ent->work); 414 queue_work(cache->wq, &ent->work);
348 break; 415 break;
349 } 416 }
350 spin_unlock(&ent->lock); 417 spin_unlock_irq(&ent->lock);
351 418
352 queue_work(cache->wq, &ent->work); 419 queue_work(cache->wq, &ent->work);
353 420
@@ -374,12 +441,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
374 return; 441 return;
375 } 442 }
376 ent = &cache->ent[c]; 443 ent = &cache->ent[c];
377 spin_lock(&ent->lock); 444 spin_lock_irq(&ent->lock);
378 list_add_tail(&mr->list, &ent->head); 445 list_add_tail(&mr->list, &ent->head);
379 ent->cur++; 446 ent->cur++;
380 if (ent->cur > 2 * ent->limit) 447 if (ent->cur > 2 * ent->limit)
381 shrink = 1; 448 shrink = 1;
382 spin_unlock(&ent->lock); 449 spin_unlock_irq(&ent->lock);
383 450
384 if (shrink) 451 if (shrink)
385 queue_work(cache->wq, &ent->work); 452 queue_work(cache->wq, &ent->work);
@@ -394,16 +461,16 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
394 461
395 cancel_delayed_work(&ent->dwork); 462 cancel_delayed_work(&ent->dwork);
396 while (1) { 463 while (1) {
397 spin_lock(&ent->lock); 464 spin_lock_irq(&ent->lock);
398 if (list_empty(&ent->head)) { 465 if (list_empty(&ent->head)) {
399 spin_unlock(&ent->lock); 466 spin_unlock_irq(&ent->lock);
400 return; 467 return;
401 } 468 }
402 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 469 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
403 list_del(&mr->list); 470 list_del(&mr->list);
404 ent->cur--; 471 ent->cur--;
405 ent->size--; 472 ent->size--;
406 spin_unlock(&ent->lock); 473 spin_unlock_irq(&ent->lock);
407 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 474 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
408 if (err) 475 if (err)
409 mlx5_ib_warn(dev, "failed destroy mkey\n"); 476 mlx5_ib_warn(dev, "failed destroy mkey\n");
@@ -464,12 +531,18 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
464 debugfs_remove_recursive(dev->cache.root); 531 debugfs_remove_recursive(dev->cache.root);
465} 532}
466 533
534static void delay_time_func(unsigned long ctx)
535{
536 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
537
538 dev->fill_delay = 0;
539}
540
467int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 541int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
468{ 542{
469 struct mlx5_mr_cache *cache = &dev->cache; 543 struct mlx5_mr_cache *cache = &dev->cache;
470 struct mlx5_cache_ent *ent; 544 struct mlx5_cache_ent *ent;
471 int limit; 545 int limit;
472 int size;
473 int err; 546 int err;
474 int i; 547 int i;
475 548
@@ -479,6 +552,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
479 return -ENOMEM; 552 return -ENOMEM;
480 } 553 }
481 554
555 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
482 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 556 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
483 INIT_LIST_HEAD(&cache->ent[i].head); 557 INIT_LIST_HEAD(&cache->ent[i].head);
484 spin_lock_init(&cache->ent[i].lock); 558 spin_lock_init(&cache->ent[i].lock);
@@ -489,13 +563,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
489 ent->order = i + 2; 563 ent->order = i + 2;
490 ent->dev = dev; 564 ent->dev = dev;
491 565
492 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) { 566 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
493 size = dev->mdev.profile->mr_cache[i].size;
494 limit = dev->mdev.profile->mr_cache[i].limit; 567 limit = dev->mdev.profile->mr_cache[i].limit;
495 } else { 568 else
496 size = DEF_CACHE_SIZE;
497 limit = 0; 569 limit = 0;
498 } 570
499 INIT_WORK(&ent->work, cache_work_func); 571 INIT_WORK(&ent->work, cache_work_func);
500 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 572 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
501 ent->limit = limit; 573 ent->limit = limit;
@@ -522,6 +594,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
522 clean_keys(dev, i); 594 clean_keys(dev, i);
523 595
524 destroy_workqueue(dev->cache.wq); 596 destroy_workqueue(dev->cache.wq);
597 del_timer_sync(&dev->delay_timer);
525 598
526 return 0; 599 return 0;
527} 600}
@@ -551,7 +624,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
551 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 624 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
552 seg->start_addr = 0; 625 seg->start_addr = 0;
553 626
554 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in)); 627 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
628 NULL);
555 if (err) 629 if (err)
556 goto err_in; 630 goto err_in;
557 631
@@ -660,14 +734,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
660 int err; 734 int err;
661 int i; 735 int i;
662 736
663 for (i = 0; i < 10; i++) { 737 for (i = 0; i < 1; i++) {
664 mr = alloc_cached_mr(dev, order); 738 mr = alloc_cached_mr(dev, order);
665 if (mr) 739 if (mr)
666 break; 740 break;
667 741
668 err = add_keys(dev, order2idx(dev, order), 1); 742 err = add_keys(dev, order2idx(dev, order), 1);
669 if (err) { 743 if (err && err != -EAGAIN) {
670 mlx5_ib_warn(dev, "add_keys failed\n"); 744 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
671 break; 745 break;
672 } 746 }
673 } 747 }
@@ -759,8 +833,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
759 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 833 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
760 in->seg.log2_page_size = page_shift; 834 in->seg.log2_page_size = page_shift;
761 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 835 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
762 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 836 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
763 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen); 837 1 << page_shift));
838 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
839 NULL, NULL);
764 if (err) { 840 if (err) {
765 mlx5_ib_warn(dev, "create mkey failed\n"); 841 mlx5_ib_warn(dev, "create mkey failed\n");
766 goto err_2; 842 goto err_2;
@@ -944,7 +1020,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
944 * TBD not needed - issue 197292 */ 1020 * TBD not needed - issue 197292 */
945 in->seg.log2_page_size = PAGE_SHIFT; 1021 in->seg.log2_page_size = PAGE_SHIFT;
946 1022
947 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in)); 1023 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
1024 NULL, NULL);
948 kfree(in); 1025 kfree(in);
949 if (err) 1026 if (err)
950 goto err_free; 1027 goto err_free;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 5659ea880741..7c6b4ba49bec 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
551 } 551 }
552 mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); 552 mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
553 (*in)->ctx.log_pg_sz_remote_qpn = 553 (*in)->ctx.log_pg_sz_remote_qpn =
554 cpu_to_be32((page_shift - PAGE_SHIFT) << 24); 554 cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
555 (*in)->ctx.params2 = cpu_to_be32(offset << 6); 555 (*in)->ctx.params2 = cpu_to_be32(offset << 6);
556 556
557 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); 557 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
@@ -648,7 +648,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
648 goto err_buf; 648 goto err_buf;
649 } 649 }
650 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); 650 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
651 (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24); 651 (*in)->ctx.log_pg_sz_remote_qpn =
652 cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
652 /* Set "fast registration enabled" for all kernel QPs */ 653 /* Set "fast registration enabled" for all kernel QPs */
653 (*in)->ctx.params1 |= cpu_to_be32(1 << 11); 654 (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
654 (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); 655 (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
@@ -1317,9 +1318,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
1317 MLX5_QP_OPTPAR_RAE | 1318 MLX5_QP_OPTPAR_RAE |
1318 MLX5_QP_OPTPAR_RWE | 1319 MLX5_QP_OPTPAR_RWE |
1319 MLX5_QP_OPTPAR_RNR_TIMEOUT | 1320 MLX5_QP_OPTPAR_RNR_TIMEOUT |
1320 MLX5_QP_OPTPAR_PM_STATE, 1321 MLX5_QP_OPTPAR_PM_STATE |
1322 MLX5_QP_OPTPAR_ALT_ADDR_PATH,
1321 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | 1323 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
1322 MLX5_QP_OPTPAR_PM_STATE, 1324 MLX5_QP_OPTPAR_PM_STATE |
1325 MLX5_QP_OPTPAR_ALT_ADDR_PATH,
1323 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | 1326 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
1324 MLX5_QP_OPTPAR_SRQN | 1327 MLX5_QP_OPTPAR_SRQN |
1325 MLX5_QP_OPTPAR_CQN_RCV, 1328 MLX5_QP_OPTPAR_CQN_RCV,
@@ -1550,7 +1553,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1550 mlx5_cur = to_mlx5_state(cur_state); 1553 mlx5_cur = to_mlx5_state(cur_state);
1551 mlx5_new = to_mlx5_state(new_state); 1554 mlx5_new = to_mlx5_state(new_state);
1552 mlx5_st = to_mlx5_st(ibqp->qp_type); 1555 mlx5_st = to_mlx5_st(ibqp->qp_type);
1553 if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0) 1556 if (mlx5_st < 0)
1554 goto out; 1557 goto out;
1555 1558
1556 optpar = ib_mask_to_mlx5_opt(attr_mask); 1559 optpar = ib_mask_to_mlx5_opt(attr_mask);
@@ -1744,6 +1747,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
1744 MLX5_MKEY_MASK_PD | 1747 MLX5_MKEY_MASK_PD |
1745 MLX5_MKEY_MASK_LR | 1748 MLX5_MKEY_MASK_LR |
1746 MLX5_MKEY_MASK_LW | 1749 MLX5_MKEY_MASK_LW |
1750 MLX5_MKEY_MASK_KEY |
1747 MLX5_MKEY_MASK_RR | 1751 MLX5_MKEY_MASK_RR |
1748 MLX5_MKEY_MASK_RW | 1752 MLX5_MKEY_MASK_RW |
1749 MLX5_MKEY_MASK_A | 1753 MLX5_MKEY_MASK_A |
@@ -1800,7 +1804,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
1800 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); 1804 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1801 seg->len = cpu_to_be64(wr->wr.fast_reg.length); 1805 seg->len = cpu_to_be64(wr->wr.fast_reg.length);
1802 seg->log2_page_size = wr->wr.fast_reg.page_shift; 1806 seg->log2_page_size = wr->wr.fast_reg.page_shift;
1803 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1807 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
1808 mlx5_mkey_variant(wr->wr.fast_reg.rkey));
1804} 1809}
1805 1810
1806static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, 1811static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
@@ -1913,6 +1918,10 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
1913 if (unlikely((*seg == qp->sq.qend))) 1918 if (unlikely((*seg == qp->sq.qend)))
1914 *seg = mlx5_get_send_wqe(qp, 0); 1919 *seg = mlx5_get_send_wqe(qp, 0);
1915 if (!li) { 1920 if (!li) {
1921 if (unlikely(wr->wr.fast_reg.page_list_len >
1922 wr->wr.fast_reg.page_list->max_page_list_len))
1923 return -ENOMEM;
1924
1916 set_frwr_pages(*seg, wr, mdev, pd, writ); 1925 set_frwr_pages(*seg, wr, mdev, pd, writ);
1917 *seg += sizeof(struct mlx5_wqe_data_seg); 1926 *seg += sizeof(struct mlx5_wqe_data_seg);
1918 *size += (sizeof(struct mlx5_wqe_data_seg) / 16); 1927 *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 0aa478bc291a..210b3eaf188a 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
123 goto err_in; 123 goto err_in;
124 } 124 }
125 125
126 (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; 126 (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
127 (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); 127 (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
128 128
129 return 0; 129 return 0;
@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
192 } 192 }
193 srq->wq_sig = !!srq_signature; 193 srq->wq_sig = !!srq_signature;
194 194
195 (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; 195 (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
196 196
197 return 0; 197 return 0;
198 198
@@ -390,9 +390,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
390 mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); 390 mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
391 ib_umem_release(msrq->umem); 391 ib_umem_release(msrq->umem);
392 } else { 392 } else {
393 kfree(msrq->wrid); 393 destroy_srq_kernel(dev, msrq);
394 mlx5_buf_free(&dev->mdev, &msrq->buf);
395 mlx5_db_free(&dev->mdev, &msrq->db);
396 } 394 }
397 395
398 kfree(srq); 396 kfree(srq);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5b53ca5a2284..8308e3634767 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2834,7 +2834,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2834 init_attr->qp_context = nesqp->ibqp.qp_context; 2834 init_attr->qp_context = nesqp->ibqp.qp_context;
2835 init_attr->send_cq = nesqp->ibqp.send_cq; 2835 init_attr->send_cq = nesqp->ibqp.send_cq;
2836 init_attr->recv_cq = nesqp->ibqp.recv_cq; 2836 init_attr->recv_cq = nesqp->ibqp.recv_cq;
2837 init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq; 2837 init_attr->srq = nesqp->ibqp.srq;
2838 init_attr->cap = attr->cap; 2838 init_attr->cap = attr->cap;
2839 2839
2840 return 0; 2840 return 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index adc11d14f878..294dd27b601e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -122,6 +122,32 @@ struct mqe_ctx {
122 bool cmd_done; 122 bool cmd_done;
123}; 123};
124 124
125struct ocrdma_hw_mr {
126 u32 lkey;
127 u8 fr_mr;
128 u8 remote_atomic;
129 u8 remote_rd;
130 u8 remote_wr;
131 u8 local_rd;
132 u8 local_wr;
133 u8 mw_bind;
134 u8 rsvd;
135 u64 len;
136 struct ocrdma_pbl *pbl_table;
137 u32 num_pbls;
138 u32 num_pbes;
139 u32 pbl_size;
140 u32 pbe_size;
141 u64 fbo;
142 u64 va;
143};
144
145struct ocrdma_mr {
146 struct ib_mr ibmr;
147 struct ib_umem *umem;
148 struct ocrdma_hw_mr hwmr;
149};
150
125struct ocrdma_dev { 151struct ocrdma_dev {
126 struct ib_device ibdev; 152 struct ib_device ibdev;
127 struct ocrdma_dev_attr attr; 153 struct ocrdma_dev_attr attr;
@@ -169,7 +195,7 @@ struct ocrdma_dev {
169 struct list_head entry; 195 struct list_head entry;
170 struct rcu_head rcu; 196 struct rcu_head rcu;
171 int id; 197 int id;
172 u64 stag_arr[OCRDMA_MAX_STAG]; 198 struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG];
173 u16 pvid; 199 u16 pvid;
174}; 200};
175 201
@@ -294,31 +320,6 @@ struct ocrdma_qp {
294 u16 db_cache; 320 u16 db_cache;
295}; 321};
296 322
297struct ocrdma_hw_mr {
298 u32 lkey;
299 u8 fr_mr;
300 u8 remote_atomic;
301 u8 remote_rd;
302 u8 remote_wr;
303 u8 local_rd;
304 u8 local_wr;
305 u8 mw_bind;
306 u8 rsvd;
307 u64 len;
308 struct ocrdma_pbl *pbl_table;
309 u32 num_pbls;
310 u32 num_pbes;
311 u32 pbl_size;
312 u32 pbe_size;
313 u64 fbo;
314 u64 va;
315};
316
317struct ocrdma_mr {
318 struct ib_mr ibmr;
319 struct ib_umem *umem;
320 struct ocrdma_hw_mr hwmr;
321};
322 323
323struct ocrdma_ucontext { 324struct ocrdma_ucontext {
324 struct ib_ucontext ibucontext; 325 struct ib_ucontext ibucontext;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 50219ab2279d..56bf32fcb62c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1783,7 +1783,7 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
1783 u32 max_sges = attrs->cap.max_send_sge; 1783 u32 max_sges = attrs->cap.max_send_sge;
1784 1784
1785 /* QP1 may exceed 127 */ 1785 /* QP1 may exceed 127 */
1786 max_wqe_allocated = min_t(int, attrs->cap.max_send_wr + 1, 1786 max_wqe_allocated = min_t(u32, attrs->cap.max_send_wr + 1,
1787 dev->attr.max_wqe); 1787 dev->attr.max_wqe);
1788 1788
1789 status = ocrdma_build_q_conf(&max_wqe_allocated, 1789 status = ocrdma_build_q_conf(&max_wqe_allocated,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 0ce7674621ea..91443bcb9e0e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -452,9 +452,6 @@ static void ocrdma_remove_free(struct rcu_head *rcu)
452{ 452{
453 struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu); 453 struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
454 454
455 ocrdma_free_resources(dev);
456 ocrdma_cleanup_hw(dev);
457
458 idr_remove(&ocrdma_dev_id, dev->id); 455 idr_remove(&ocrdma_dev_id, dev->id);
459 kfree(dev->mbx_cmd); 456 kfree(dev->mbx_cmd);
460 ib_dealloc_device(&dev->ibdev); 457 ib_dealloc_device(&dev->ibdev);
@@ -470,6 +467,10 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
470 spin_lock(&ocrdma_devlist_lock); 467 spin_lock(&ocrdma_devlist_lock);
471 list_del_rcu(&dev->entry); 468 list_del_rcu(&dev->entry);
472 spin_unlock(&ocrdma_devlist_lock); 469 spin_unlock(&ocrdma_devlist_lock);
470
471 ocrdma_free_resources(dev);
472 ocrdma_cleanup_hw(dev);
473
473 call_rcu(&dev->rcu, ocrdma_remove_free); 474 call_rcu(&dev->rcu, ocrdma_remove_free);
474} 475}
475 476
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 69f1d1221a6b..7686dceadd29 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1981,9 +1981,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1981 1981
1982 wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); 1982 wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
1983 1983
1984 if ((wr->wr.fast_reg.page_list_len > 1984 if (wr->wr.fast_reg.page_list_len > qp->dev->attr.max_pages_per_frmr)
1985 qp->dev->attr.max_pages_per_frmr) ||
1986 (wr->wr.fast_reg.length > 0xffffffffULL))
1987 return -EINVAL; 1985 return -EINVAL;
1988 1986
1989 hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); 1987 hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
@@ -2839,7 +2837,7 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
2839 goto mbx_err; 2837 goto mbx_err;
2840 mr->ibmr.rkey = mr->hwmr.lkey; 2838 mr->ibmr.rkey = mr->hwmr.lkey;
2841 mr->ibmr.lkey = mr->hwmr.lkey; 2839 mr->ibmr.lkey = mr->hwmr.lkey;
2842 dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (unsigned long) mr; 2840 dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = mr;
2843 return &mr->ibmr; 2841 return &mr->ibmr;
2844mbx_err: 2842mbx_err:
2845 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); 2843 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 016e7429adf6..5bfc02f450e6 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -6190,21 +6190,20 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
6190{ 6190{
6191 struct qib_devdata *dd; 6191 struct qib_devdata *dd;
6192 unsigned long val; 6192 unsigned long val;
6193 int ret; 6193 char *n;
6194
6195 if (strlen(str) >= MAX_ATTEN_LEN) { 6194 if (strlen(str) >= MAX_ATTEN_LEN) {
6196 pr_info("txselect_values string too long\n"); 6195 pr_info("txselect_values string too long\n");
6197 return -ENOSPC; 6196 return -ENOSPC;
6198 } 6197 }
6199 ret = kstrtoul(str, 0, &val); 6198 val = simple_strtoul(str, &n, 0);
6200 if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + 6199 if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
6201 TXDDS_MFG_SZ)) { 6200 TXDDS_MFG_SZ)) {
6202 pr_info("txselect_values must start with a number < %d\n", 6201 pr_info("txselect_values must start with a number < %d\n",
6203 TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); 6202 TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
6204 return ret ? ret : -EINVAL; 6203 return -EINVAL;
6205 } 6204 }
6206
6207 strcpy(txselect_list, str); 6205 strcpy(txselect_list, str);
6206
6208 list_for_each_entry(dd, &qib_dev_list, list) 6207 list_for_each_entry(dd, &qib_dev_list, list)
6209 if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) 6208 if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
6210 set_no_qsfp_atten(dd, 1); 6209 set_no_qsfp_atten(dd, 1);
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 28874f8606f8..941d4d50d8e7 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -54,7 +54,7 @@ struct ib_node_info {
54 __be32 revision; 54 __be32 revision;
55 u8 local_port_num; 55 u8 local_port_num;
56 u8 vendor_id[3]; 56 u8 vendor_id[3];
57} __attribute__ ((packed)); 57} __packed;
58 58
59struct ib_mad_notice_attr { 59struct ib_mad_notice_attr {
60 u8 generic_type; 60 u8 generic_type;
@@ -73,7 +73,7 @@ struct ib_mad_notice_attr {
73 __be16 reserved; 73 __be16 reserved;
74 __be16 lid; /* where violation happened */ 74 __be16 lid; /* where violation happened */
75 u8 port_num; /* where violation happened */ 75 u8 port_num; /* where violation happened */
76 } __attribute__ ((packed)) ntc_129_131; 76 } __packed ntc_129_131;
77 77
78 struct { 78 struct {
79 __be16 reserved; 79 __be16 reserved;
@@ -83,14 +83,14 @@ struct ib_mad_notice_attr {
83 __be32 new_cap_mask; /* new capability mask */ 83 __be32 new_cap_mask; /* new capability mask */
84 u8 reserved3; 84 u8 reserved3;
85 u8 change_flags; /* low 3 bits only */ 85 u8 change_flags; /* low 3 bits only */
86 } __attribute__ ((packed)) ntc_144; 86 } __packed ntc_144;
87 87
88 struct { 88 struct {
89 __be16 reserved; 89 __be16 reserved;
90 __be16 lid; /* lid where sys guid changed */ 90 __be16 lid; /* lid where sys guid changed */
91 __be16 reserved2; 91 __be16 reserved2;
92 __be64 new_sys_guid; 92 __be64 new_sys_guid;
93 } __attribute__ ((packed)) ntc_145; 93 } __packed ntc_145;
94 94
95 struct { 95 struct {
96 __be16 reserved; 96 __be16 reserved;
@@ -104,7 +104,7 @@ struct ib_mad_notice_attr {
104 u8 reserved3; 104 u8 reserved3;
105 u8 dr_trunc_hop; 105 u8 dr_trunc_hop;
106 u8 dr_rtn_path[30]; 106 u8 dr_rtn_path[30];
107 } __attribute__ ((packed)) ntc_256; 107 } __packed ntc_256;
108 108
109 struct { 109 struct {
110 __be16 reserved; 110 __be16 reserved;
@@ -115,7 +115,7 @@ struct ib_mad_notice_attr {
115 __be32 qp2; /* high 8 bits reserved */ 115 __be32 qp2; /* high 8 bits reserved */
116 union ib_gid gid1; 116 union ib_gid gid1;
117 union ib_gid gid2; 117 union ib_gid gid2;
118 } __attribute__ ((packed)) ntc_257_258; 118 } __packed ntc_257_258;
119 119
120 } details; 120 } details;
121}; 121};
@@ -209,7 +209,7 @@ struct ib_pma_portcounters_cong {
209 __be64 port_rcv_packets; 209 __be64 port_rcv_packets;
210 __be64 port_xmit_wait; 210 __be64 port_xmit_wait;
211 __be64 port_adr_events; 211 __be64 port_adr_events;
212} __attribute__ ((packed)); 212} __packed;
213 213
214#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00 214#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00
215#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01 215#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index d0a0ea0c14d6..165aee2ca8a0 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -594,8 +594,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
594 else 594 else
595 j = npages; 595 j = npages;
596 596
597 ret = get_user_pages(current, current->mm, addr, 597 ret = get_user_pages_fast(addr, j, 0, pages);
598 j, 0, 1, pages, NULL);
599 if (ret != j) { 598 if (ret != j) {
600 i = 0; 599 i = 0;
601 j = ret; 600 j = ret;
@@ -1294,11 +1293,8 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
1294 int mxp = 8; 1293 int mxp = 8;
1295 int ndesc = 0; 1294 int ndesc = 0;
1296 1295
1297 down_write(&current->mm->mmap_sem);
1298 ret = qib_user_sdma_queue_pkts(dd, ppd, pq, 1296 ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
1299 iov, dim, &list, &mxp, &ndesc); 1297 iov, dim, &list, &mxp, &ndesc);
1300 up_write(&current->mm->mmap_sem);
1301
1302 if (ret < 0) 1298 if (ret < 0)
1303 goto done_unlock; 1299 goto done_unlock;
1304 else { 1300 else {
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 012e2c7575ad..a01c7d2cf541 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -150,14 +150,14 @@ struct ib_reth {
150 __be64 vaddr; 150 __be64 vaddr;
151 __be32 rkey; 151 __be32 rkey;
152 __be32 length; 152 __be32 length;
153} __attribute__ ((packed)); 153} __packed;
154 154
155struct ib_atomic_eth { 155struct ib_atomic_eth {
156 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ 156 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
157 __be32 rkey; 157 __be32 rkey;
158 __be64 swap_data; 158 __be64 swap_data;
159 __be64 compare_data; 159 __be64 compare_data;
160} __attribute__ ((packed)); 160} __packed;
161 161
162struct qib_other_headers { 162struct qib_other_headers {
163 __be32 bth[3]; 163 __be32 bth[3];
@@ -178,7 +178,7 @@ struct qib_other_headers {
178 __be32 aeth; 178 __be32 aeth;
179 struct ib_atomic_eth atomic_eth; 179 struct ib_atomic_eth atomic_eth;
180 } u; 180 } u;
181} __attribute__ ((packed)); 181} __packed;
182 182
183/* 183/*
184 * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes 184 * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
@@ -195,12 +195,12 @@ struct qib_ib_header {
195 } l; 195 } l;
196 struct qib_other_headers oth; 196 struct qib_other_headers oth;
197 } u; 197 } u;
198} __attribute__ ((packed)); 198} __packed;
199 199
200struct qib_pio_header { 200struct qib_pio_header {
201 __le32 pbc[2]; 201 __le32 pbc[2];
202 struct qib_ib_header hdr; 202 struct qib_ib_header hdr;
203} __attribute__ ((packed)); 203} __packed;
204 204
205/* 205/*
206 * There is one struct qib_mcast for each multicast GID. 206 * There is one struct qib_mcast for each multicast GID.
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index f93baf8254c4..a88631918e85 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -46,6 +46,7 @@
46#include <scsi/scsi.h> 46#include <scsi/scsi.h>
47#include <scsi/scsi_device.h> 47#include <scsi/scsi_device.h>
48#include <scsi/scsi_dbg.h> 48#include <scsi/scsi_dbg.h>
49#include <scsi/scsi_tcq.h>
49#include <scsi/srp.h> 50#include <scsi/srp.h>
50#include <scsi/scsi_transport_srp.h> 51#include <scsi/scsi_transport_srp.h>
51 52
@@ -86,6 +87,32 @@ module_param(topspin_workarounds, int, 0444);
86MODULE_PARM_DESC(topspin_workarounds, 87MODULE_PARM_DESC(topspin_workarounds,
87 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 88 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
88 89
90static struct kernel_param_ops srp_tmo_ops;
91
92static int srp_reconnect_delay = 10;
93module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
94 S_IRUGO | S_IWUSR);
95MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
96
97static int srp_fast_io_fail_tmo = 15;
98module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
99 S_IRUGO | S_IWUSR);
100MODULE_PARM_DESC(fast_io_fail_tmo,
101 "Number of seconds between the observation of a transport"
102 " layer error and failing all I/O. \"off\" means that this"
103 " functionality is disabled.");
104
105static int srp_dev_loss_tmo = 600;
106module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
107 S_IRUGO | S_IWUSR);
108MODULE_PARM_DESC(dev_loss_tmo,
109 "Maximum number of seconds that the SRP transport should"
110 " insulate transport layer errors. After this time has been"
111 " exceeded the SCSI host is removed. Should be"
112 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
113 " if fast_io_fail_tmo has not been set. \"off\" means that"
114 " this functionality is disabled.");
115
89static void srp_add_one(struct ib_device *device); 116static void srp_add_one(struct ib_device *device);
90static void srp_remove_one(struct ib_device *device); 117static void srp_remove_one(struct ib_device *device);
91static void srp_recv_completion(struct ib_cq *cq, void *target_ptr); 118static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
@@ -102,6 +129,48 @@ static struct ib_client srp_client = {
102 129
103static struct ib_sa_client srp_sa_client; 130static struct ib_sa_client srp_sa_client;
104 131
132static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
133{
134 int tmo = *(int *)kp->arg;
135
136 if (tmo >= 0)
137 return sprintf(buffer, "%d", tmo);
138 else
139 return sprintf(buffer, "off");
140}
141
142static int srp_tmo_set(const char *val, const struct kernel_param *kp)
143{
144 int tmo, res;
145
146 if (strncmp(val, "off", 3) != 0) {
147 res = kstrtoint(val, 0, &tmo);
148 if (res)
149 goto out;
150 } else {
151 tmo = -1;
152 }
153 if (kp->arg == &srp_reconnect_delay)
154 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
155 srp_dev_loss_tmo);
156 else if (kp->arg == &srp_fast_io_fail_tmo)
157 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
158 else
159 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
160 tmo);
161 if (res)
162 goto out;
163 *(int *)kp->arg = tmo;
164
165out:
166 return res;
167}
168
169static struct kernel_param_ops srp_tmo_ops = {
170 .get = srp_tmo_get,
171 .set = srp_tmo_set,
172};
173
105static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 174static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
106{ 175{
107 return (struct srp_target_port *) host->hostdata; 176 return (struct srp_target_port *) host->hostdata;
@@ -231,16 +300,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
231 return -ENOMEM; 300 return -ENOMEM;
232 301
233 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, 302 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
234 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 303 srp_recv_completion, NULL, target,
235 target->comp_vector); 304 target->queue_size, target->comp_vector);
236 if (IS_ERR(recv_cq)) { 305 if (IS_ERR(recv_cq)) {
237 ret = PTR_ERR(recv_cq); 306 ret = PTR_ERR(recv_cq);
238 goto err; 307 goto err;
239 } 308 }
240 309
241 send_cq = ib_create_cq(target->srp_host->srp_dev->dev, 310 send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
242 srp_send_completion, NULL, target, SRP_SQ_SIZE, 311 srp_send_completion, NULL, target,
243 target->comp_vector); 312 target->queue_size, target->comp_vector);
244 if (IS_ERR(send_cq)) { 313 if (IS_ERR(send_cq)) {
245 ret = PTR_ERR(send_cq); 314 ret = PTR_ERR(send_cq);
246 goto err_recv_cq; 315 goto err_recv_cq;
@@ -249,8 +318,8 @@ static int srp_create_target_ib(struct srp_target_port *target)
249 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); 318 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
250 319
251 init_attr->event_handler = srp_qp_event; 320 init_attr->event_handler = srp_qp_event;
252 init_attr->cap.max_send_wr = SRP_SQ_SIZE; 321 init_attr->cap.max_send_wr = target->queue_size;
253 init_attr->cap.max_recv_wr = SRP_RQ_SIZE; 322 init_attr->cap.max_recv_wr = target->queue_size;
254 init_attr->cap.max_recv_sge = 1; 323 init_attr->cap.max_recv_sge = 1;
255 init_attr->cap.max_send_sge = 1; 324 init_attr->cap.max_send_sge = 1;
256 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 325 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -296,6 +365,10 @@ err:
296 return ret; 365 return ret;
297} 366}
298 367
368/*
369 * Note: this function may be called without srp_alloc_iu_bufs() having been
370 * invoked. Hence the target->[rt]x_ring checks.
371 */
299static void srp_free_target_ib(struct srp_target_port *target) 372static void srp_free_target_ib(struct srp_target_port *target)
300{ 373{
301 int i; 374 int i;
@@ -307,10 +380,18 @@ static void srp_free_target_ib(struct srp_target_port *target)
307 target->qp = NULL; 380 target->qp = NULL;
308 target->send_cq = target->recv_cq = NULL; 381 target->send_cq = target->recv_cq = NULL;
309 382
310 for (i = 0; i < SRP_RQ_SIZE; ++i) 383 if (target->rx_ring) {
311 srp_free_iu(target->srp_host, target->rx_ring[i]); 384 for (i = 0; i < target->queue_size; ++i)
312 for (i = 0; i < SRP_SQ_SIZE; ++i) 385 srp_free_iu(target->srp_host, target->rx_ring[i]);
313 srp_free_iu(target->srp_host, target->tx_ring[i]); 386 kfree(target->rx_ring);
387 target->rx_ring = NULL;
388 }
389 if (target->tx_ring) {
390 for (i = 0; i < target->queue_size; ++i)
391 srp_free_iu(target->srp_host, target->tx_ring[i]);
392 kfree(target->tx_ring);
393 target->tx_ring = NULL;
394 }
314} 395}
315 396
316static void srp_path_rec_completion(int status, 397static void srp_path_rec_completion(int status,
@@ -390,7 +471,7 @@ static int srp_send_req(struct srp_target_port *target)
390 req->param.responder_resources = 4; 471 req->param.responder_resources = 4;
391 req->param.remote_cm_response_timeout = 20; 472 req->param.remote_cm_response_timeout = 20;
392 req->param.local_cm_response_timeout = 20; 473 req->param.local_cm_response_timeout = 20;
393 req->param.retry_count = 7; 474 req->param.retry_count = target->tl_retry_count;
394 req->param.rnr_retry_count = 7; 475 req->param.rnr_retry_count = 7;
395 req->param.max_cm_retries = 15; 476 req->param.max_cm_retries = 15;
396 477
@@ -496,7 +577,11 @@ static void srp_free_req_data(struct srp_target_port *target)
496 struct srp_request *req; 577 struct srp_request *req;
497 int i; 578 int i;
498 579
499 for (i = 0, req = target->req_ring; i < SRP_CMD_SQ_SIZE; ++i, ++req) { 580 if (!target->req_ring)
581 return;
582
583 for (i = 0; i < target->req_ring_size; ++i) {
584 req = &target->req_ring[i];
500 kfree(req->fmr_list); 585 kfree(req->fmr_list);
501 kfree(req->map_page); 586 kfree(req->map_page);
502 if (req->indirect_dma_addr) { 587 if (req->indirect_dma_addr) {
@@ -506,6 +591,50 @@ static void srp_free_req_data(struct srp_target_port *target)
506 } 591 }
507 kfree(req->indirect_desc); 592 kfree(req->indirect_desc);
508 } 593 }
594
595 kfree(target->req_ring);
596 target->req_ring = NULL;
597}
598
599static int srp_alloc_req_data(struct srp_target_port *target)
600{
601 struct srp_device *srp_dev = target->srp_host->srp_dev;
602 struct ib_device *ibdev = srp_dev->dev;
603 struct srp_request *req;
604 dma_addr_t dma_addr;
605 int i, ret = -ENOMEM;
606
607 INIT_LIST_HEAD(&target->free_reqs);
608
609 target->req_ring = kzalloc(target->req_ring_size *
610 sizeof(*target->req_ring), GFP_KERNEL);
611 if (!target->req_ring)
612 goto out;
613
614 for (i = 0; i < target->req_ring_size; ++i) {
615 req = &target->req_ring[i];
616 req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
617 GFP_KERNEL);
618 req->map_page = kmalloc(SRP_FMR_SIZE * sizeof(void *),
619 GFP_KERNEL);
620 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
621 if (!req->fmr_list || !req->map_page || !req->indirect_desc)
622 goto out;
623
624 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
625 target->indirect_size,
626 DMA_TO_DEVICE);
627 if (ib_dma_mapping_error(ibdev, dma_addr))
628 goto out;
629
630 req->indirect_dma_addr = dma_addr;
631 req->index = i;
632 list_add_tail(&req->list, &target->free_reqs);
633 }
634 ret = 0;
635
636out:
637 return ret;
509} 638}
510 639
511/** 640/**
@@ -528,12 +657,20 @@ static void srp_remove_target(struct srp_target_port *target)
528 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 657 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
529 658
530 srp_del_scsi_host_attr(target->scsi_host); 659 srp_del_scsi_host_attr(target->scsi_host);
660 srp_rport_get(target->rport);
531 srp_remove_host(target->scsi_host); 661 srp_remove_host(target->scsi_host);
532 scsi_remove_host(target->scsi_host); 662 scsi_remove_host(target->scsi_host);
533 srp_disconnect_target(target); 663 srp_disconnect_target(target);
534 ib_destroy_cm_id(target->cm_id); 664 ib_destroy_cm_id(target->cm_id);
535 srp_free_target_ib(target); 665 srp_free_target_ib(target);
666 cancel_work_sync(&target->tl_err_work);
667 srp_rport_put(target->rport);
536 srp_free_req_data(target); 668 srp_free_req_data(target);
669
670 spin_lock(&target->srp_host->target_lock);
671 list_del(&target->list);
672 spin_unlock(&target->srp_host->target_lock);
673
537 scsi_host_put(target->scsi_host); 674 scsi_host_put(target->scsi_host);
538} 675}
539 676
@@ -545,10 +682,6 @@ static void srp_remove_work(struct work_struct *work)
545 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 682 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
546 683
547 srp_remove_target(target); 684 srp_remove_target(target);
548
549 spin_lock(&target->srp_host->target_lock);
550 list_del(&target->list);
551 spin_unlock(&target->srp_host->target_lock);
552} 685}
553 686
554static void srp_rport_delete(struct srp_rport *rport) 687static void srp_rport_delete(struct srp_rport *rport)
@@ -686,23 +819,42 @@ static void srp_free_req(struct srp_target_port *target,
686 spin_unlock_irqrestore(&target->lock, flags); 819 spin_unlock_irqrestore(&target->lock, flags);
687} 820}
688 821
689static void srp_reset_req(struct srp_target_port *target, struct srp_request *req) 822static void srp_finish_req(struct srp_target_port *target,
823 struct srp_request *req, int result)
690{ 824{
691 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); 825 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
692 826
693 if (scmnd) { 827 if (scmnd) {
694 srp_free_req(target, req, scmnd, 0); 828 srp_free_req(target, req, scmnd, 0);
695 scmnd->result = DID_RESET << 16; 829 scmnd->result = result;
696 scmnd->scsi_done(scmnd); 830 scmnd->scsi_done(scmnd);
697 } 831 }
698} 832}
699 833
700static int srp_reconnect_target(struct srp_target_port *target) 834static void srp_terminate_io(struct srp_rport *rport)
701{ 835{
702 struct Scsi_Host *shost = target->scsi_host; 836 struct srp_target_port *target = rport->lld_data;
703 int i, ret; 837 int i;
704 838
705 scsi_target_block(&shost->shost_gendev); 839 for (i = 0; i < target->req_ring_size; ++i) {
840 struct srp_request *req = &target->req_ring[i];
841 srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16);
842 }
843}
844
845/*
846 * It is up to the caller to ensure that srp_rport_reconnect() calls are
847 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
848 * srp_reset_device() or srp_reset_host() calls will occur while this function
849 * is in progress. One way to realize that is not to call this function
850 * directly but to call srp_reconnect_rport() instead since that last function
851 * serializes calls of this function via rport->mutex and also blocks
852 * srp_queuecommand() calls before invoking this function.
853 */
854static int srp_rport_reconnect(struct srp_rport *rport)
855{
856 struct srp_target_port *target = rport->lld_data;
857 int i, ret;
706 858
707 srp_disconnect_target(target); 859 srp_disconnect_target(target);
708 /* 860 /*
@@ -721,41 +873,21 @@ static int srp_reconnect_target(struct srp_target_port *target)
721 else 873 else
722 srp_create_target_ib(target); 874 srp_create_target_ib(target);
723 875
724 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 876 for (i = 0; i < target->req_ring_size; ++i) {
725 struct srp_request *req = &target->req_ring[i]; 877 struct srp_request *req = &target->req_ring[i];
726 if (req->scmnd) 878 srp_finish_req(target, req, DID_RESET << 16);
727 srp_reset_req(target, req);
728 } 879 }
729 880
730 INIT_LIST_HEAD(&target->free_tx); 881 INIT_LIST_HEAD(&target->free_tx);
731 for (i = 0; i < SRP_SQ_SIZE; ++i) 882 for (i = 0; i < target->queue_size; ++i)
732 list_add(&target->tx_ring[i]->list, &target->free_tx); 883 list_add(&target->tx_ring[i]->list, &target->free_tx);
733 884
734 if (ret == 0) 885 if (ret == 0)
735 ret = srp_connect_target(target); 886 ret = srp_connect_target(target);
736 887
737 scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : 888 if (ret == 0)
738 SDEV_TRANSPORT_OFFLINE); 889 shost_printk(KERN_INFO, target->scsi_host,
739 target->transport_offline = !!ret; 890 PFX "reconnect succeeded\n");
740
741 if (ret)
742 goto err;
743
744 shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n");
745
746 return ret;
747
748err:
749 shost_printk(KERN_ERR, target->scsi_host,
750 PFX "reconnect failed (%d), removing target port.\n", ret);
751
752 /*
753 * We couldn't reconnect, so kill our target port off.
754 * However, we have to defer the real removal because we
755 * are in the context of the SCSI error handler now, which
756 * will deadlock if we call scsi_remove_host().
757 */
758 srp_queue_remove_work(target);
759 891
760 return ret; 892 return ret;
761} 893}
@@ -1302,15 +1434,30 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
1302 PFX "Recv failed with error code %d\n", res); 1434 PFX "Recv failed with error code %d\n", res);
1303} 1435}
1304 1436
1305static void srp_handle_qp_err(enum ib_wc_status wc_status, 1437/**
1306 enum ib_wc_opcode wc_opcode, 1438 * srp_tl_err_work() - handle a transport layer error
1439 *
1440 * Note: This function may get invoked before the rport has been created,
1441 * hence the target->rport test.
1442 */
1443static void srp_tl_err_work(struct work_struct *work)
1444{
1445 struct srp_target_port *target;
1446
1447 target = container_of(work, struct srp_target_port, tl_err_work);
1448 if (target->rport)
1449 srp_start_tl_fail_timers(target->rport);
1450}
1451
1452static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err,
1307 struct srp_target_port *target) 1453 struct srp_target_port *target)
1308{ 1454{
1309 if (target->connected && !target->qp_in_error) { 1455 if (target->connected && !target->qp_in_error) {
1310 shost_printk(KERN_ERR, target->scsi_host, 1456 shost_printk(KERN_ERR, target->scsi_host,
1311 PFX "failed %s status %d\n", 1457 PFX "failed %s status %d\n",
1312 wc_opcode & IB_WC_RECV ? "receive" : "send", 1458 send_err ? "send" : "receive",
1313 wc_status); 1459 wc_status);
1460 queue_work(system_long_wq, &target->tl_err_work);
1314 } 1461 }
1315 target->qp_in_error = true; 1462 target->qp_in_error = true;
1316} 1463}
@@ -1325,7 +1472,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1325 if (likely(wc.status == IB_WC_SUCCESS)) { 1472 if (likely(wc.status == IB_WC_SUCCESS)) {
1326 srp_handle_recv(target, &wc); 1473 srp_handle_recv(target, &wc);
1327 } else { 1474 } else {
1328 srp_handle_qp_err(wc.status, wc.opcode, target); 1475 srp_handle_qp_err(wc.status, false, target);
1329 } 1476 }
1330 } 1477 }
1331} 1478}
@@ -1341,7 +1488,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1341 iu = (struct srp_iu *) (uintptr_t) wc.wr_id; 1488 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1342 list_add(&iu->list, &target->free_tx); 1489 list_add(&iu->list, &target->free_tx);
1343 } else { 1490 } else {
1344 srp_handle_qp_err(wc.status, wc.opcode, target); 1491 srp_handle_qp_err(wc.status, true, target);
1345 } 1492 }
1346 } 1493 }
1347} 1494}
@@ -1349,17 +1496,29 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1349static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 1496static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1350{ 1497{
1351 struct srp_target_port *target = host_to_target(shost); 1498 struct srp_target_port *target = host_to_target(shost);
1499 struct srp_rport *rport = target->rport;
1352 struct srp_request *req; 1500 struct srp_request *req;
1353 struct srp_iu *iu; 1501 struct srp_iu *iu;
1354 struct srp_cmd *cmd; 1502 struct srp_cmd *cmd;
1355 struct ib_device *dev; 1503 struct ib_device *dev;
1356 unsigned long flags; 1504 unsigned long flags;
1357 int len; 1505 int len, result;
1506 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1507
1508 /*
1509 * The SCSI EH thread is the only context from which srp_queuecommand()
1510 * can get invoked for blocked devices (SDEV_BLOCK /
1511 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
1512 * locking the rport mutex if invoked from inside the SCSI EH.
1513 */
1514 if (in_scsi_eh)
1515 mutex_lock(&rport->mutex);
1358 1516
1359 if (unlikely(target->transport_offline)) { 1517 result = srp_chkready(target->rport);
1360 scmnd->result = DID_NO_CONNECT << 16; 1518 if (unlikely(result)) {
1519 scmnd->result = result;
1361 scmnd->scsi_done(scmnd); 1520 scmnd->scsi_done(scmnd);
1362 return 0; 1521 goto unlock_rport;
1363 } 1522 }
1364 1523
1365 spin_lock_irqsave(&target->lock, flags); 1524 spin_lock_irqsave(&target->lock, flags);
@@ -1404,6 +1563,10 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1404 goto err_unmap; 1563 goto err_unmap;
1405 } 1564 }
1406 1565
1566unlock_rport:
1567 if (in_scsi_eh)
1568 mutex_unlock(&rport->mutex);
1569
1407 return 0; 1570 return 0;
1408 1571
1409err_unmap: 1572err_unmap:
@@ -1418,14 +1581,30 @@ err_iu:
1418err_unlock: 1581err_unlock:
1419 spin_unlock_irqrestore(&target->lock, flags); 1582 spin_unlock_irqrestore(&target->lock, flags);
1420 1583
1584 if (in_scsi_eh)
1585 mutex_unlock(&rport->mutex);
1586
1421 return SCSI_MLQUEUE_HOST_BUSY; 1587 return SCSI_MLQUEUE_HOST_BUSY;
1422} 1588}
1423 1589
1590/*
1591 * Note: the resources allocated in this function are freed in
1592 * srp_free_target_ib().
1593 */
1424static int srp_alloc_iu_bufs(struct srp_target_port *target) 1594static int srp_alloc_iu_bufs(struct srp_target_port *target)
1425{ 1595{
1426 int i; 1596 int i;
1427 1597
1428 for (i = 0; i < SRP_RQ_SIZE; ++i) { 1598 target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring),
1599 GFP_KERNEL);
1600 if (!target->rx_ring)
1601 goto err_no_ring;
1602 target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring),
1603 GFP_KERNEL);
1604 if (!target->tx_ring)
1605 goto err_no_ring;
1606
1607 for (i = 0; i < target->queue_size; ++i) {
1429 target->rx_ring[i] = srp_alloc_iu(target->srp_host, 1608 target->rx_ring[i] = srp_alloc_iu(target->srp_host,
1430 target->max_ti_iu_len, 1609 target->max_ti_iu_len,
1431 GFP_KERNEL, DMA_FROM_DEVICE); 1610 GFP_KERNEL, DMA_FROM_DEVICE);
@@ -1433,7 +1612,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
1433 goto err; 1612 goto err;
1434 } 1613 }
1435 1614
1436 for (i = 0; i < SRP_SQ_SIZE; ++i) { 1615 for (i = 0; i < target->queue_size; ++i) {
1437 target->tx_ring[i] = srp_alloc_iu(target->srp_host, 1616 target->tx_ring[i] = srp_alloc_iu(target->srp_host,
1438 target->max_iu_len, 1617 target->max_iu_len,
1439 GFP_KERNEL, DMA_TO_DEVICE); 1618 GFP_KERNEL, DMA_TO_DEVICE);
@@ -1446,16 +1625,18 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
1446 return 0; 1625 return 0;
1447 1626
1448err: 1627err:
1449 for (i = 0; i < SRP_RQ_SIZE; ++i) { 1628 for (i = 0; i < target->queue_size; ++i) {
1450 srp_free_iu(target->srp_host, target->rx_ring[i]); 1629 srp_free_iu(target->srp_host, target->rx_ring[i]);
1451 target->rx_ring[i] = NULL;
1452 }
1453
1454 for (i = 0; i < SRP_SQ_SIZE; ++i) {
1455 srp_free_iu(target->srp_host, target->tx_ring[i]); 1630 srp_free_iu(target->srp_host, target->tx_ring[i]);
1456 target->tx_ring[i] = NULL;
1457 } 1631 }
1458 1632
1633
1634err_no_ring:
1635 kfree(target->tx_ring);
1636 target->tx_ring = NULL;
1637 kfree(target->rx_ring);
1638 target->rx_ring = NULL;
1639
1459 return -ENOMEM; 1640 return -ENOMEM;
1460} 1641}
1461 1642
@@ -1506,6 +1687,9 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1506 target->scsi_host->can_queue 1687 target->scsi_host->can_queue
1507 = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE, 1688 = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
1508 target->scsi_host->can_queue); 1689 target->scsi_host->can_queue);
1690 target->scsi_host->cmd_per_lun
1691 = min_t(int, target->scsi_host->can_queue,
1692 target->scsi_host->cmd_per_lun);
1509 } else { 1693 } else {
1510 shost_printk(KERN_WARNING, target->scsi_host, 1694 shost_printk(KERN_WARNING, target->scsi_host,
1511 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); 1695 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
@@ -1513,7 +1697,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1513 goto error; 1697 goto error;
1514 } 1698 }
1515 1699
1516 if (!target->rx_ring[0]) { 1700 if (!target->rx_ring) {
1517 ret = srp_alloc_iu_bufs(target); 1701 ret = srp_alloc_iu_bufs(target);
1518 if (ret) 1702 if (ret)
1519 goto error; 1703 goto error;
@@ -1533,7 +1717,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1533 if (ret) 1717 if (ret)
1534 goto error_free; 1718 goto error_free;
1535 1719
1536 for (i = 0; i < SRP_RQ_SIZE; i++) { 1720 for (i = 0; i < target->queue_size; i++) {
1537 struct srp_iu *iu = target->rx_ring[i]; 1721 struct srp_iu *iu = target->rx_ring[i];
1538 ret = srp_post_recv(target, iu); 1722 ret = srp_post_recv(target, iu);
1539 if (ret) 1723 if (ret)
@@ -1672,6 +1856,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1672 if (ib_send_cm_drep(cm_id, NULL, 0)) 1856 if (ib_send_cm_drep(cm_id, NULL, 0))
1673 shost_printk(KERN_ERR, target->scsi_host, 1857 shost_printk(KERN_ERR, target->scsi_host,
1674 PFX "Sending CM DREP failed\n"); 1858 PFX "Sending CM DREP failed\n");
1859 queue_work(system_long_wq, &target->tl_err_work);
1675 break; 1860 break;
1676 1861
1677 case IB_CM_TIMEWAIT_EXIT: 1862 case IB_CM_TIMEWAIT_EXIT:
@@ -1698,9 +1883,61 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1698 return 0; 1883 return 0;
1699} 1884}
1700 1885
1886/**
1887 * srp_change_queue_type - changing device queue tag type
1888 * @sdev: scsi device struct
1889 * @tag_type: requested tag type
1890 *
1891 * Returns queue tag type.
1892 */
1893static int
1894srp_change_queue_type(struct scsi_device *sdev, int tag_type)
1895{
1896 if (sdev->tagged_supported) {
1897 scsi_set_tag_type(sdev, tag_type);
1898 if (tag_type)
1899 scsi_activate_tcq(sdev, sdev->queue_depth);
1900 else
1901 scsi_deactivate_tcq(sdev, sdev->queue_depth);
1902 } else
1903 tag_type = 0;
1904
1905 return tag_type;
1906}
1907
1908/**
1909 * srp_change_queue_depth - setting device queue depth
1910 * @sdev: scsi device struct
1911 * @qdepth: requested queue depth
1912 * @reason: SCSI_QDEPTH_DEFAULT/SCSI_QDEPTH_QFULL/SCSI_QDEPTH_RAMP_UP
1913 * (see include/scsi/scsi_host.h for definition)
1914 *
1915 * Returns queue depth.
1916 */
1917static int
1918srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
1919{
1920 struct Scsi_Host *shost = sdev->host;
1921 int max_depth;
1922 if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) {
1923 max_depth = shost->can_queue;
1924 if (!sdev->tagged_supported)
1925 max_depth = 1;
1926 if (qdepth > max_depth)
1927 qdepth = max_depth;
1928 scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
1929 } else if (reason == SCSI_QDEPTH_QFULL)
1930 scsi_track_queue_full(sdev, qdepth);
1931 else
1932 return -EOPNOTSUPP;
1933
1934 return sdev->queue_depth;
1935}
1936
1701static int srp_send_tsk_mgmt(struct srp_target_port *target, 1937static int srp_send_tsk_mgmt(struct srp_target_port *target,
1702 u64 req_tag, unsigned int lun, u8 func) 1938 u64 req_tag, unsigned int lun, u8 func)
1703{ 1939{
1940 struct srp_rport *rport = target->rport;
1704 struct ib_device *dev = target->srp_host->srp_dev->dev; 1941 struct ib_device *dev = target->srp_host->srp_dev->dev;
1705 struct srp_iu *iu; 1942 struct srp_iu *iu;
1706 struct srp_tsk_mgmt *tsk_mgmt; 1943 struct srp_tsk_mgmt *tsk_mgmt;
@@ -1710,12 +1947,20 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1710 1947
1711 init_completion(&target->tsk_mgmt_done); 1948 init_completion(&target->tsk_mgmt_done);
1712 1949
1950 /*
1951 * Lock the rport mutex to avoid that srp_create_target_ib() is
1952 * invoked while a task management function is being sent.
1953 */
1954 mutex_lock(&rport->mutex);
1713 spin_lock_irq(&target->lock); 1955 spin_lock_irq(&target->lock);
1714 iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT); 1956 iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
1715 spin_unlock_irq(&target->lock); 1957 spin_unlock_irq(&target->lock);
1716 1958
1717 if (!iu) 1959 if (!iu) {
1960 mutex_unlock(&rport->mutex);
1961
1718 return -1; 1962 return -1;
1963 }
1719 1964
1720 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 1965 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
1721 DMA_TO_DEVICE); 1966 DMA_TO_DEVICE);
@@ -1732,8 +1977,11 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1732 DMA_TO_DEVICE); 1977 DMA_TO_DEVICE);
1733 if (srp_post_send(target, iu, sizeof *tsk_mgmt)) { 1978 if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {
1734 srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT); 1979 srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT);
1980 mutex_unlock(&rport->mutex);
1981
1735 return -1; 1982 return -1;
1736 } 1983 }
1984 mutex_unlock(&rport->mutex);
1737 1985
1738 if (!wait_for_completion_timeout(&target->tsk_mgmt_done, 1986 if (!wait_for_completion_timeout(&target->tsk_mgmt_done,
1739 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) 1987 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
@@ -1751,11 +1999,11 @@ static int srp_abort(struct scsi_cmnd *scmnd)
1751 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 1999 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
1752 2000
1753 if (!req || !srp_claim_req(target, req, scmnd)) 2001 if (!req || !srp_claim_req(target, req, scmnd))
1754 return FAILED; 2002 return SUCCESS;
1755 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, 2003 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
1756 SRP_TSK_ABORT_TASK) == 0) 2004 SRP_TSK_ABORT_TASK) == 0)
1757 ret = SUCCESS; 2005 ret = SUCCESS;
1758 else if (target->transport_offline) 2006 else if (target->rport->state == SRP_RPORT_LOST)
1759 ret = FAST_IO_FAIL; 2007 ret = FAST_IO_FAIL;
1760 else 2008 else
1761 ret = FAILED; 2009 ret = FAILED;
@@ -1779,10 +2027,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
1779 if (target->tsk_mgmt_status) 2027 if (target->tsk_mgmt_status)
1780 return FAILED; 2028 return FAILED;
1781 2029
1782 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 2030 for (i = 0; i < target->req_ring_size; ++i) {
1783 struct srp_request *req = &target->req_ring[i]; 2031 struct srp_request *req = &target->req_ring[i];
1784 if (req->scmnd && req->scmnd->device == scmnd->device) 2032 if (req->scmnd && req->scmnd->device == scmnd->device)
1785 srp_reset_req(target, req); 2033 srp_finish_req(target, req, DID_RESET << 16);
1786 } 2034 }
1787 2035
1788 return SUCCESS; 2036 return SUCCESS;
@@ -1791,14 +2039,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
1791static int srp_reset_host(struct scsi_cmnd *scmnd) 2039static int srp_reset_host(struct scsi_cmnd *scmnd)
1792{ 2040{
1793 struct srp_target_port *target = host_to_target(scmnd->device->host); 2041 struct srp_target_port *target = host_to_target(scmnd->device->host);
1794 int ret = FAILED;
1795 2042
1796 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); 2043 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
1797 2044
1798 if (!srp_reconnect_target(target)) 2045 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
1799 ret = SUCCESS;
1800
1801 return ret;
1802} 2046}
1803 2047
1804static int srp_slave_configure(struct scsi_device *sdev) 2048static int srp_slave_configure(struct scsi_device *sdev)
@@ -1851,6 +2095,14 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
1851 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey)); 2095 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
1852} 2096}
1853 2097
2098static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2099 char *buf)
2100{
2101 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2102
2103 return sprintf(buf, "%pI6\n", target->path.sgid.raw);
2104}
2105
1854static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, 2106static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
1855 char *buf) 2107 char *buf)
1856{ 2108{
@@ -1907,6 +2159,14 @@ static ssize_t show_comp_vector(struct device *dev,
1907 return sprintf(buf, "%d\n", target->comp_vector); 2159 return sprintf(buf, "%d\n", target->comp_vector);
1908} 2160}
1909 2161
2162static ssize_t show_tl_retry_count(struct device *dev,
2163 struct device_attribute *attr, char *buf)
2164{
2165 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2166
2167 return sprintf(buf, "%d\n", target->tl_retry_count);
2168}
2169
1910static ssize_t show_cmd_sg_entries(struct device *dev, 2170static ssize_t show_cmd_sg_entries(struct device *dev,
1911 struct device_attribute *attr, char *buf) 2171 struct device_attribute *attr, char *buf)
1912{ 2172{
@@ -1927,6 +2187,7 @@ static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
1927static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 2187static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
1928static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 2188static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
1929static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 2189static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2190static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
1930static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 2191static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
1931static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); 2192static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
1932static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); 2193static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
@@ -1934,6 +2195,7 @@ static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
1934static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 2195static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
1935static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 2196static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
1936static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); 2197static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2198static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
1937static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 2199static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
1938static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 2200static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
1939 2201
@@ -1942,6 +2204,7 @@ static struct device_attribute *srp_host_attrs[] = {
1942 &dev_attr_ioc_guid, 2204 &dev_attr_ioc_guid,
1943 &dev_attr_service_id, 2205 &dev_attr_service_id,
1944 &dev_attr_pkey, 2206 &dev_attr_pkey,
2207 &dev_attr_sgid,
1945 &dev_attr_dgid, 2208 &dev_attr_dgid,
1946 &dev_attr_orig_dgid, 2209 &dev_attr_orig_dgid,
1947 &dev_attr_req_lim, 2210 &dev_attr_req_lim,
@@ -1949,6 +2212,7 @@ static struct device_attribute *srp_host_attrs[] = {
1949 &dev_attr_local_ib_port, 2212 &dev_attr_local_ib_port,
1950 &dev_attr_local_ib_device, 2213 &dev_attr_local_ib_device,
1951 &dev_attr_comp_vector, 2214 &dev_attr_comp_vector,
2215 &dev_attr_tl_retry_count,
1952 &dev_attr_cmd_sg_entries, 2216 &dev_attr_cmd_sg_entries,
1953 &dev_attr_allow_ext_sg, 2217 &dev_attr_allow_ext_sg,
1954 NULL 2218 NULL
@@ -1961,14 +2225,16 @@ static struct scsi_host_template srp_template = {
1961 .slave_configure = srp_slave_configure, 2225 .slave_configure = srp_slave_configure,
1962 .info = srp_target_info, 2226 .info = srp_target_info,
1963 .queuecommand = srp_queuecommand, 2227 .queuecommand = srp_queuecommand,
2228 .change_queue_depth = srp_change_queue_depth,
2229 .change_queue_type = srp_change_queue_type,
1964 .eh_abort_handler = srp_abort, 2230 .eh_abort_handler = srp_abort,
1965 .eh_device_reset_handler = srp_reset_device, 2231 .eh_device_reset_handler = srp_reset_device,
1966 .eh_host_reset_handler = srp_reset_host, 2232 .eh_host_reset_handler = srp_reset_host,
1967 .skip_settle_delay = true, 2233 .skip_settle_delay = true,
1968 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 2234 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
1969 .can_queue = SRP_CMD_SQ_SIZE, 2235 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
1970 .this_id = -1, 2236 .this_id = -1,
1971 .cmd_per_lun = SRP_CMD_SQ_SIZE, 2237 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
1972 .use_clustering = ENABLE_CLUSTERING, 2238 .use_clustering = ENABLE_CLUSTERING,
1973 .shost_attrs = srp_host_attrs 2239 .shost_attrs = srp_host_attrs
1974}; 2240};
@@ -1994,6 +2260,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
1994 } 2260 }
1995 2261
1996 rport->lld_data = target; 2262 rport->lld_data = target;
2263 target->rport = rport;
1997 2264
1998 spin_lock(&host->target_lock); 2265 spin_lock(&host->target_lock);
1999 list_add_tail(&target->list, &host->target_list); 2266 list_add_tail(&target->list, &host->target_list);
@@ -2073,6 +2340,8 @@ enum {
2073 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 2340 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2074 SRP_OPT_SG_TABLESIZE = 1 << 11, 2341 SRP_OPT_SG_TABLESIZE = 1 << 11,
2075 SRP_OPT_COMP_VECTOR = 1 << 12, 2342 SRP_OPT_COMP_VECTOR = 1 << 12,
2343 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2344 SRP_OPT_QUEUE_SIZE = 1 << 14,
2076 SRP_OPT_ALL = (SRP_OPT_ID_EXT | 2345 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2077 SRP_OPT_IOC_GUID | 2346 SRP_OPT_IOC_GUID |
2078 SRP_OPT_DGID | 2347 SRP_OPT_DGID |
@@ -2094,6 +2363,8 @@ static const match_table_t srp_opt_tokens = {
2094 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 2363 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2095 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 2364 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2096 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 2365 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2366 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2367 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2097 { SRP_OPT_ERR, NULL } 2368 { SRP_OPT_ERR, NULL }
2098}; 2369};
2099 2370
@@ -2188,13 +2459,25 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2188 target->scsi_host->max_sectors = token; 2459 target->scsi_host->max_sectors = token;
2189 break; 2460 break;
2190 2461
2462 case SRP_OPT_QUEUE_SIZE:
2463 if (match_int(args, &token) || token < 1) {
2464 pr_warn("bad queue_size parameter '%s'\n", p);
2465 goto out;
2466 }
2467 target->scsi_host->can_queue = token;
2468 target->queue_size = token + SRP_RSP_SQ_SIZE +
2469 SRP_TSK_MGMT_SQ_SIZE;
2470 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
2471 target->scsi_host->cmd_per_lun = token;
2472 break;
2473
2191 case SRP_OPT_MAX_CMD_PER_LUN: 2474 case SRP_OPT_MAX_CMD_PER_LUN:
2192 if (match_int(args, &token)) { 2475 if (match_int(args, &token) || token < 1) {
2193 pr_warn("bad max cmd_per_lun parameter '%s'\n", 2476 pr_warn("bad max cmd_per_lun parameter '%s'\n",
2194 p); 2477 p);
2195 goto out; 2478 goto out;
2196 } 2479 }
2197 target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE); 2480 target->scsi_host->cmd_per_lun = token;
2198 break; 2481 break;
2199 2482
2200 case SRP_OPT_IO_CLASS: 2483 case SRP_OPT_IO_CLASS:
@@ -2257,6 +2540,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2257 target->comp_vector = token; 2540 target->comp_vector = token;
2258 break; 2541 break;
2259 2542
2543 case SRP_OPT_TL_RETRY_COUNT:
2544 if (match_int(args, &token) || token < 2 || token > 7) {
2545 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
2546 p);
2547 goto out;
2548 }
2549 target->tl_retry_count = token;
2550 break;
2551
2260 default: 2552 default:
2261 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 2553 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
2262 p); 2554 p);
@@ -2273,6 +2565,12 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2273 pr_warn("target creation request is missing parameter '%s'\n", 2565 pr_warn("target creation request is missing parameter '%s'\n",
2274 srp_opt_tokens[i].pattern); 2566 srp_opt_tokens[i].pattern);
2275 2567
2568 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
2569 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
2570 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
2571 target->scsi_host->cmd_per_lun,
2572 target->scsi_host->can_queue);
2573
2276out: 2574out:
2277 kfree(options); 2575 kfree(options);
2278 return ret; 2576 return ret;
@@ -2287,8 +2585,7 @@ static ssize_t srp_create_target(struct device *dev,
2287 struct Scsi_Host *target_host; 2585 struct Scsi_Host *target_host;
2288 struct srp_target_port *target; 2586 struct srp_target_port *target;
2289 struct ib_device *ibdev = host->srp_dev->dev; 2587 struct ib_device *ibdev = host->srp_dev->dev;
2290 dma_addr_t dma_addr; 2588 int ret;
2291 int i, ret;
2292 2589
2293 target_host = scsi_host_alloc(&srp_template, 2590 target_host = scsi_host_alloc(&srp_template,
2294 sizeof (struct srp_target_port)); 2591 sizeof (struct srp_target_port));
@@ -2311,11 +2608,15 @@ static ssize_t srp_create_target(struct device *dev,
2311 target->cmd_sg_cnt = cmd_sg_entries; 2608 target->cmd_sg_cnt = cmd_sg_entries;
2312 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; 2609 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
2313 target->allow_ext_sg = allow_ext_sg; 2610 target->allow_ext_sg = allow_ext_sg;
2611 target->tl_retry_count = 7;
2612 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
2314 2613
2315 ret = srp_parse_options(buf, target); 2614 ret = srp_parse_options(buf, target);
2316 if (ret) 2615 if (ret)
2317 goto err; 2616 goto err;
2318 2617
2618 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
2619
2319 if (!srp_conn_unique(target->srp_host, target)) { 2620 if (!srp_conn_unique(target->srp_host, target)) {
2320 shost_printk(KERN_INFO, target->scsi_host, 2621 shost_printk(KERN_INFO, target->scsi_host,
2321 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 2622 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
@@ -2339,31 +2640,13 @@ static ssize_t srp_create_target(struct device *dev,
2339 sizeof (struct srp_indirect_buf) + 2640 sizeof (struct srp_indirect_buf) +
2340 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 2641 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
2341 2642
2643 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
2342 INIT_WORK(&target->remove_work, srp_remove_work); 2644 INIT_WORK(&target->remove_work, srp_remove_work);
2343 spin_lock_init(&target->lock); 2645 spin_lock_init(&target->lock);
2344 INIT_LIST_HEAD(&target->free_tx); 2646 INIT_LIST_HEAD(&target->free_tx);
2345 INIT_LIST_HEAD(&target->free_reqs); 2647 ret = srp_alloc_req_data(target);
2346 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 2648 if (ret)
2347 struct srp_request *req = &target->req_ring[i]; 2649 goto err_free_mem;
2348
2349 req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof (void *),
2350 GFP_KERNEL);
2351 req->map_page = kmalloc(SRP_FMR_SIZE * sizeof (void *),
2352 GFP_KERNEL);
2353 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
2354 if (!req->fmr_list || !req->map_page || !req->indirect_desc)
2355 goto err_free_mem;
2356
2357 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
2358 target->indirect_size,
2359 DMA_TO_DEVICE);
2360 if (ib_dma_mapping_error(ibdev, dma_addr))
2361 goto err_free_mem;
2362
2363 req->indirect_dma_addr = dma_addr;
2364 req->index = i;
2365 list_add_tail(&req->list, &target->free_reqs);
2366 }
2367 2650
2368 ib_query_gid(ibdev, host->port, 0, &target->path.sgid); 2651 ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
2369 2652
@@ -2612,7 +2895,14 @@ static void srp_remove_one(struct ib_device *device)
2612} 2895}
2613 2896
2614static struct srp_function_template ib_srp_transport_functions = { 2897static struct srp_function_template ib_srp_transport_functions = {
2898 .has_rport_state = true,
2899 .reset_timer_if_blocked = true,
2900 .reconnect_delay = &srp_reconnect_delay,
2901 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
2902 .dev_loss_tmo = &srp_dev_loss_tmo,
2903 .reconnect = srp_rport_reconnect,
2615 .rport_delete = srp_rport_delete, 2904 .rport_delete = srp_rport_delete,
2905 .terminate_rport_io = srp_terminate_io,
2616}; 2906};
2617 2907
2618static int __init srp_init_module(void) 2908static int __init srp_init_module(void)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index e641088c14dc..575681063f38 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -57,14 +57,11 @@ enum {
57 SRP_MAX_LUN = 512, 57 SRP_MAX_LUN = 512,
58 SRP_DEF_SG_TABLESIZE = 12, 58 SRP_DEF_SG_TABLESIZE = 12,
59 59
60 SRP_RQ_SHIFT = 6, 60 SRP_DEFAULT_QUEUE_SIZE = 1 << 6,
61 SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT,
62
63 SRP_SQ_SIZE = SRP_RQ_SIZE,
64 SRP_RSP_SQ_SIZE = 1, 61 SRP_RSP_SQ_SIZE = 1,
65 SRP_REQ_SQ_SIZE = SRP_SQ_SIZE - SRP_RSP_SQ_SIZE,
66 SRP_TSK_MGMT_SQ_SIZE = 1, 62 SRP_TSK_MGMT_SQ_SIZE = 1,
67 SRP_CMD_SQ_SIZE = SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE, 63 SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE -
64 SRP_TSK_MGMT_SQ_SIZE,
68 65
69 SRP_TAG_NO_REQ = ~0U, 66 SRP_TAG_NO_REQ = ~0U,
70 SRP_TAG_TSK_MGMT = 1U << 31, 67 SRP_TAG_TSK_MGMT = 1U << 31,
@@ -140,7 +137,6 @@ struct srp_target_port {
140 unsigned int cmd_sg_cnt; 137 unsigned int cmd_sg_cnt;
141 unsigned int indirect_size; 138 unsigned int indirect_size;
142 bool allow_ext_sg; 139 bool allow_ext_sg;
143 bool transport_offline;
144 140
145 /* Everything above this point is used in the hot path of 141 /* Everything above this point is used in the hot path of
146 * command processing. Try to keep them packed into cachelines. 142 * command processing. Try to keep them packed into cachelines.
@@ -153,10 +149,14 @@ struct srp_target_port {
153 u16 io_class; 149 u16 io_class;
154 struct srp_host *srp_host; 150 struct srp_host *srp_host;
155 struct Scsi_Host *scsi_host; 151 struct Scsi_Host *scsi_host;
152 struct srp_rport *rport;
156 char target_name[32]; 153 char target_name[32];
157 unsigned int scsi_id; 154 unsigned int scsi_id;
158 unsigned int sg_tablesize; 155 unsigned int sg_tablesize;
156 int queue_size;
157 int req_ring_size;
159 int comp_vector; 158 int comp_vector;
159 int tl_retry_count;
160 160
161 struct ib_sa_path_rec path; 161 struct ib_sa_path_rec path;
162 __be16 orig_dgid[8]; 162 __be16 orig_dgid[8];
@@ -172,10 +172,11 @@ struct srp_target_port {
172 172
173 int zero_req_lim; 173 int zero_req_lim;
174 174
175 struct srp_iu *tx_ring[SRP_SQ_SIZE]; 175 struct srp_iu **tx_ring;
176 struct srp_iu *rx_ring[SRP_RQ_SIZE]; 176 struct srp_iu **rx_ring;
177 struct srp_request req_ring[SRP_CMD_SQ_SIZE]; 177 struct srp_request *req_ring;
178 178
179 struct work_struct tl_err_work;
179 struct work_struct remove_work; 180 struct work_struct remove_work;
180 181
181 struct list_head list; 182 struct list_head list;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 6ca30739625f..8675d26a678b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -98,6 +98,7 @@ enum {
98static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, 98static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
99 struct mlx5_cmd_msg *in, 99 struct mlx5_cmd_msg *in,
100 struct mlx5_cmd_msg *out, 100 struct mlx5_cmd_msg *out,
101 void *uout, int uout_size,
101 mlx5_cmd_cbk_t cbk, 102 mlx5_cmd_cbk_t cbk,
102 void *context, int page_queue) 103 void *context, int page_queue)
103{ 104{
@@ -110,6 +111,8 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
110 111
111 ent->in = in; 112 ent->in = in;
112 ent->out = out; 113 ent->out = out;
114 ent->uout = uout;
115 ent->uout_size = uout_size;
113 ent->callback = cbk; 116 ent->callback = cbk;
114 ent->context = context; 117 ent->context = context;
115 ent->cmd = cmd; 118 ent->cmd = cmd;
@@ -534,6 +537,7 @@ static void cmd_work_handler(struct work_struct *work)
534 ent->lay = lay; 537 ent->lay = lay;
535 memset(lay, 0, sizeof(*lay)); 538 memset(lay, 0, sizeof(*lay));
536 memcpy(lay->in, ent->in->first.data, sizeof(lay->in)); 539 memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
540 ent->op = be32_to_cpu(lay->in[0]) >> 16;
537 if (ent->in->next) 541 if (ent->in->next)
538 lay->in_ptr = cpu_to_be64(ent->in->next->dma); 542 lay->in_ptr = cpu_to_be64(ent->in->next->dma);
539 lay->inlen = cpu_to_be32(ent->in->len); 543 lay->inlen = cpu_to_be32(ent->in->len);
@@ -628,7 +632,8 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
628 * 2. page queue commands do not support asynchrous completion 632 * 2. page queue commands do not support asynchrous completion
629 */ 633 */
630static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, 634static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
631 struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback, 635 struct mlx5_cmd_msg *out, void *uout, int uout_size,
636 mlx5_cmd_cbk_t callback,
632 void *context, int page_queue, u8 *status) 637 void *context, int page_queue, u8 *status)
633{ 638{
634 struct mlx5_cmd *cmd = &dev->cmd; 639 struct mlx5_cmd *cmd = &dev->cmd;
@@ -642,7 +647,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
642 if (callback && page_queue) 647 if (callback && page_queue)
643 return -EINVAL; 648 return -EINVAL;
644 649
645 ent = alloc_cmd(cmd, in, out, callback, context, page_queue); 650 ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
651 page_queue);
646 if (IS_ERR(ent)) 652 if (IS_ERR(ent))
647 return PTR_ERR(ent); 653 return PTR_ERR(ent);
648 654
@@ -670,10 +676,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
670 op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); 676 op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
671 if (op < ARRAY_SIZE(cmd->stats)) { 677 if (op < ARRAY_SIZE(cmd->stats)) {
672 stats = &cmd->stats[op]; 678 stats = &cmd->stats[op];
673 spin_lock(&stats->lock); 679 spin_lock_irq(&stats->lock);
674 stats->sum += ds; 680 stats->sum += ds;
675 ++stats->n; 681 ++stats->n;
676 spin_unlock(&stats->lock); 682 spin_unlock_irq(&stats->lock);
677 } 683 }
678 mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, 684 mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
679 "fw exec time for %s is %lld nsec\n", 685 "fw exec time for %s is %lld nsec\n",
@@ -826,7 +832,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
826 int n; 832 int n;
827 int i; 833 int i;
828 834
829 msg = kzalloc(sizeof(*msg), GFP_KERNEL); 835 msg = kzalloc(sizeof(*msg), flags);
830 if (!msg) 836 if (!msg)
831 return ERR_PTR(-ENOMEM); 837 return ERR_PTR(-ENOMEM);
832 838
@@ -1109,6 +1115,19 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
1109 up(&cmd->sem); 1115 up(&cmd->sem);
1110} 1116}
1111 1117
1118static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
1119{
1120 unsigned long flags;
1121
1122 if (msg->cache) {
1123 spin_lock_irqsave(&msg->cache->lock, flags);
1124 list_add_tail(&msg->list, &msg->cache->head);
1125 spin_unlock_irqrestore(&msg->cache->lock, flags);
1126 } else {
1127 mlx5_free_cmd_msg(dev, msg);
1128 }
1129}
1130
1112void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector) 1131void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
1113{ 1132{
1114 struct mlx5_cmd *cmd = &dev->cmd; 1133 struct mlx5_cmd *cmd = &dev->cmd;
@@ -1117,6 +1136,10 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
1117 void *context; 1136 void *context;
1118 int err; 1137 int err;
1119 int i; 1138 int i;
1139 ktime_t t1, t2, delta;
1140 s64 ds;
1141 struct mlx5_cmd_stats *stats;
1142 unsigned long flags;
1120 1143
1121 for (i = 0; i < (1 << cmd->log_sz); i++) { 1144 for (i = 0; i < (1 << cmd->log_sz); i++) {
1122 if (test_bit(i, &vector)) { 1145 if (test_bit(i, &vector)) {
@@ -1141,9 +1164,29 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
1141 } 1164 }
1142 free_ent(cmd, ent->idx); 1165 free_ent(cmd, ent->idx);
1143 if (ent->callback) { 1166 if (ent->callback) {
1167 t1 = timespec_to_ktime(ent->ts1);
1168 t2 = timespec_to_ktime(ent->ts2);
1169 delta = ktime_sub(t2, t1);
1170 ds = ktime_to_ns(delta);
1171 if (ent->op < ARRAY_SIZE(cmd->stats)) {
1172 stats = &cmd->stats[ent->op];
1173 spin_lock_irqsave(&stats->lock, flags);
1174 stats->sum += ds;
1175 ++stats->n;
1176 spin_unlock_irqrestore(&stats->lock, flags);
1177 }
1178
1144 callback = ent->callback; 1179 callback = ent->callback;
1145 context = ent->context; 1180 context = ent->context;
1146 err = ent->ret; 1181 err = ent->ret;
1182 if (!err)
1183 err = mlx5_copy_from_msg(ent->uout,
1184 ent->out,
1185 ent->uout_size);
1186
1187 mlx5_free_cmd_msg(dev, ent->out);
1188 free_msg(dev, ent->in);
1189
1147 free_cmd(ent); 1190 free_cmd(ent);
1148 callback(err, context); 1191 callback(err, context);
1149 } else { 1192 } else {
@@ -1160,7 +1203,8 @@ static int status_to_err(u8 status)
1160 return status ? -1 : 0; /* TBD more meaningful codes */ 1203 return status ? -1 : 0; /* TBD more meaningful codes */
1161} 1204}
1162 1205
1163static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size) 1206static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
1207 gfp_t gfp)
1164{ 1208{
1165 struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM); 1209 struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
1166 struct mlx5_cmd *cmd = &dev->cmd; 1210 struct mlx5_cmd *cmd = &dev->cmd;
@@ -1172,7 +1216,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
1172 ent = &cmd->cache.med; 1216 ent = &cmd->cache.med;
1173 1217
1174 if (ent) { 1218 if (ent) {
1175 spin_lock(&ent->lock); 1219 spin_lock_irq(&ent->lock);
1176 if (!list_empty(&ent->head)) { 1220 if (!list_empty(&ent->head)) {
1177 msg = list_entry(ent->head.next, typeof(*msg), list); 1221 msg = list_entry(ent->head.next, typeof(*msg), list);
1178 /* For cached lists, we must explicitly state what is 1222 /* For cached lists, we must explicitly state what is
@@ -1181,43 +1225,34 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
1181 msg->len = in_size; 1225 msg->len = in_size;
1182 list_del(&msg->list); 1226 list_del(&msg->list);
1183 } 1227 }
1184 spin_unlock(&ent->lock); 1228 spin_unlock_irq(&ent->lock);
1185 } 1229 }
1186 1230
1187 if (IS_ERR(msg)) 1231 if (IS_ERR(msg))
1188 msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size); 1232 msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
1189 1233
1190 return msg; 1234 return msg;
1191} 1235}
1192 1236
1193static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
1194{
1195 if (msg->cache) {
1196 spin_lock(&msg->cache->lock);
1197 list_add_tail(&msg->list, &msg->cache->head);
1198 spin_unlock(&msg->cache->lock);
1199 } else {
1200 mlx5_free_cmd_msg(dev, msg);
1201 }
1202}
1203
1204static int is_manage_pages(struct mlx5_inbox_hdr *in) 1237static int is_manage_pages(struct mlx5_inbox_hdr *in)
1205{ 1238{
1206 return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES; 1239 return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
1207} 1240}
1208 1241
1209int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, 1242static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
1210 int out_size) 1243 int out_size, mlx5_cmd_cbk_t callback, void *context)
1211{ 1244{
1212 struct mlx5_cmd_msg *inb; 1245 struct mlx5_cmd_msg *inb;
1213 struct mlx5_cmd_msg *outb; 1246 struct mlx5_cmd_msg *outb;
1214 int pages_queue; 1247 int pages_queue;
1248 gfp_t gfp;
1215 int err; 1249 int err;
1216 u8 status = 0; 1250 u8 status = 0;
1217 1251
1218 pages_queue = is_manage_pages(in); 1252 pages_queue = is_manage_pages(in);
1253 gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
1219 1254
1220 inb = alloc_msg(dev, in_size); 1255 inb = alloc_msg(dev, in_size, gfp);
1221 if (IS_ERR(inb)) { 1256 if (IS_ERR(inb)) {
1222 err = PTR_ERR(inb); 1257 err = PTR_ERR(inb);
1223 return err; 1258 return err;
@@ -1229,13 +1264,14 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
1229 goto out_in; 1264 goto out_in;
1230 } 1265 }
1231 1266
1232 outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size); 1267 outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
1233 if (IS_ERR(outb)) { 1268 if (IS_ERR(outb)) {
1234 err = PTR_ERR(outb); 1269 err = PTR_ERR(outb);
1235 goto out_in; 1270 goto out_in;
1236 } 1271 }
1237 1272
1238 err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status); 1273 err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
1274 pages_queue, &status);
1239 if (err) 1275 if (err)
1240 goto out_out; 1276 goto out_out;
1241 1277
@@ -1248,14 +1284,30 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
1248 err = mlx5_copy_from_msg(out, outb, out_size); 1284 err = mlx5_copy_from_msg(out, outb, out_size);
1249 1285
1250out_out: 1286out_out:
1251 mlx5_free_cmd_msg(dev, outb); 1287 if (!callback)
1288 mlx5_free_cmd_msg(dev, outb);
1252 1289
1253out_in: 1290out_in:
1254 free_msg(dev, inb); 1291 if (!callback)
1292 free_msg(dev, inb);
1255 return err; 1293 return err;
1256} 1294}
1295
1296int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
1297 int out_size)
1298{
1299 return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
1300}
1257EXPORT_SYMBOL(mlx5_cmd_exec); 1301EXPORT_SYMBOL(mlx5_cmd_exec);
1258 1302
1303int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
1304 void *out, int out_size, mlx5_cmd_cbk_t callback,
1305 void *context)
1306{
1307 return cmd_exec(dev, in, in_size, out, out_size, callback, context);
1308}
1309EXPORT_SYMBOL(mlx5_cmd_exec_cb);
1310
1259static void destroy_msg_cache(struct mlx5_core_dev *dev) 1311static void destroy_msg_cache(struct mlx5_core_dev *dev)
1260{ 1312{
1261 struct mlx5_cmd *cmd = &dev->cmd; 1313 struct mlx5_cmd *cmd = &dev->cmd;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 9c7194b26ee2..80f6d127257a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -154,10 +154,10 @@ static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
154 return 0; 154 return 0;
155 155
156 stats = filp->private_data; 156 stats = filp->private_data;
157 spin_lock(&stats->lock); 157 spin_lock_irq(&stats->lock);
158 if (stats->n) 158 if (stats->n)
159 field = div64_u64(stats->sum, stats->n); 159 field = div64_u64(stats->sum, stats->n);
160 spin_unlock(&stats->lock); 160 spin_unlock_irq(&stats->lock);
161 ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field); 161 ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
162 if (ret > 0) { 162 if (ret > 0) {
163 if (copy_to_user(buf, tbuf, ret)) 163 if (copy_to_user(buf, tbuf, ret))
@@ -175,10 +175,10 @@ static ssize_t average_write(struct file *filp, const char __user *buf,
175 struct mlx5_cmd_stats *stats; 175 struct mlx5_cmd_stats *stats;
176 176
177 stats = filp->private_data; 177 stats = filp->private_data;
178 spin_lock(&stats->lock); 178 spin_lock_irq(&stats->lock);
179 stats->sum = 0; 179 stats->sum = 0;
180 stats->n = 0; 180 stats->n = 0;
181 spin_unlock(&stats->lock); 181 spin_unlock_irq(&stats->lock);
182 182
183 *pos += count; 183 *pos += count;
184 184
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 2231d93cc7ad..64a61b286b2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
354 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ); 354 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
355 in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index); 355 in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
356 in->ctx.intr = vecidx; 356 in->ctx.intr = vecidx;
357 in->ctx.log_page_size = PAGE_SHIFT - 12; 357 in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
358 in->events_mask = cpu_to_be64(mask); 358 in->events_mask = cpu_to_be64(mask);
359 359
360 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); 360 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index bc0f5fb66e24..40a9f5ed814d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -159,6 +159,36 @@ struct mlx5_reg_host_endianess {
159 u8 rsvd[15]; 159 u8 rsvd[15];
160}; 160};
161 161
162
163#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
164
165enum {
166 MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
167 CAP_MASK(MLX5_CAP_OFF_DCT, 1),
168};
169
170/* selectively copy writable fields clearing any reserved area
171 */
172static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from)
173{
174 u64 v64;
175
176 to->log_max_qp = from->log_max_qp & 0x1f;
177 to->log_max_ra_req_dc = from->log_max_ra_req_dc & 0x3f;
178 to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f;
179 to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f;
180 to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f;
181 to->log_max_atomic_size_qp = from->log_max_atomic_size_qp;
182 to->log_max_atomic_size_dc = from->log_max_atomic_size_dc;
183 v64 = be64_to_cpu(from->flags) & MLX5_CAP_BITS_RW_MASK;
184 to->flags = cpu_to_be64(v64);
185}
186
187enum {
188 HCA_CAP_OPMOD_GET_MAX = 0,
189 HCA_CAP_OPMOD_GET_CUR = 1,
190};
191
162static int handle_hca_cap(struct mlx5_core_dev *dev) 192static int handle_hca_cap(struct mlx5_core_dev *dev)
163{ 193{
164 struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL; 194 struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
@@ -180,7 +210,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
180 } 210 }
181 211
182 query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP); 212 query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
183 query_ctx.hdr.opmod = cpu_to_be16(0x1); 213 query_ctx.hdr.opmod = cpu_to_be16(HCA_CAP_OPMOD_GET_CUR);
184 err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx), 214 err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
185 query_out, sizeof(*query_out)); 215 query_out, sizeof(*query_out));
186 if (err) 216 if (err)
@@ -192,8 +222,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
192 goto query_ex; 222 goto query_ex;
193 } 223 }
194 224
195 memcpy(&set_ctx->hca_cap, &query_out->hca_cap, 225 copy_rw_fields(&set_ctx->hca_cap, &query_out->hca_cap);
196 sizeof(set_ctx->hca_cap));
197 226
198 if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE) 227 if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
199 set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp; 228 set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 5b44e2e46daf..35e514dc7b7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -37,31 +37,41 @@
37#include "mlx5_core.h" 37#include "mlx5_core.h"
38 38
39int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 39int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
40 struct mlx5_create_mkey_mbox_in *in, int inlen) 40 struct mlx5_create_mkey_mbox_in *in, int inlen,
41 mlx5_cmd_cbk_t callback, void *context,
42 struct mlx5_create_mkey_mbox_out *out)
41{ 43{
42 struct mlx5_create_mkey_mbox_out out; 44 struct mlx5_create_mkey_mbox_out lout;
43 int err; 45 int err;
44 u8 key; 46 u8 key;
45 47
46 memset(&out, 0, sizeof(out)); 48 memset(&lout, 0, sizeof(lout));
47 spin_lock(&dev->priv.mkey_lock); 49 spin_lock_irq(&dev->priv.mkey_lock);
48 key = dev->priv.mkey_key++; 50 key = dev->priv.mkey_key++;
49 spin_unlock(&dev->priv.mkey_lock); 51 spin_unlock_irq(&dev->priv.mkey_lock);
50 in->seg.qpn_mkey7_0 |= cpu_to_be32(key); 52 in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
51 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY); 53 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
52 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); 54 if (callback) {
55 err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out),
56 callback, context);
57 return err;
58 } else {
59 err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout));
60 }
61
53 if (err) { 62 if (err) {
54 mlx5_core_dbg(dev, "cmd exec faile %d\n", err); 63 mlx5_core_dbg(dev, "cmd exec faile %d\n", err);
55 return err; 64 return err;
56 } 65 }
57 66
58 if (out.hdr.status) { 67 if (lout.hdr.status) {
59 mlx5_core_dbg(dev, "status %d\n", out.hdr.status); 68 mlx5_core_dbg(dev, "status %d\n", lout.hdr.status);
60 return mlx5_cmd_status_to_err(&out.hdr); 69 return mlx5_cmd_status_to_err(&lout.hdr);
61 } 70 }
62 71
63 mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key; 72 mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
64 mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key); 73 mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
74 be32_to_cpu(lout.mkey), key, mr->key);
65 75
66 return err; 76 return err;
67} 77}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 7b12acf210f8..37b6ad1f9a1b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -57,10 +57,13 @@ struct mlx5_pages_req {
57}; 57};
58 58
59struct fw_page { 59struct fw_page {
60 struct rb_node rb_node; 60 struct rb_node rb_node;
61 u64 addr; 61 u64 addr;
62 struct page *page; 62 struct page *page;
63 u16 func_id; 63 u16 func_id;
64 unsigned long bitmask;
65 struct list_head list;
66 unsigned free_count;
64}; 67};
65 68
66struct mlx5_query_pages_inbox { 69struct mlx5_query_pages_inbox {
@@ -94,6 +97,11 @@ enum {
94 MAX_RECLAIM_TIME_MSECS = 5000, 97 MAX_RECLAIM_TIME_MSECS = 5000,
95}; 98};
96 99
100enum {
101 MLX5_MAX_RECLAIM_TIME_MILI = 5000,
102 MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / 4096,
103};
104
97static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) 105static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
98{ 106{
99 struct rb_root *root = &dev->priv.page_root; 107 struct rb_root *root = &dev->priv.page_root;
@@ -101,6 +109,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
101 struct rb_node *parent = NULL; 109 struct rb_node *parent = NULL;
102 struct fw_page *nfp; 110 struct fw_page *nfp;
103 struct fw_page *tfp; 111 struct fw_page *tfp;
112 int i;
104 113
105 while (*new) { 114 while (*new) {
106 parent = *new; 115 parent = *new;
@@ -113,25 +122,29 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
113 return -EEXIST; 122 return -EEXIST;
114 } 123 }
115 124
116 nfp = kmalloc(sizeof(*nfp), GFP_KERNEL); 125 nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
117 if (!nfp) 126 if (!nfp)
118 return -ENOMEM; 127 return -ENOMEM;
119 128
120 nfp->addr = addr; 129 nfp->addr = addr;
121 nfp->page = page; 130 nfp->page = page;
122 nfp->func_id = func_id; 131 nfp->func_id = func_id;
132 nfp->free_count = MLX5_NUM_4K_IN_PAGE;
133 for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
134 set_bit(i, &nfp->bitmask);
123 135
124 rb_link_node(&nfp->rb_node, parent, new); 136 rb_link_node(&nfp->rb_node, parent, new);
125 rb_insert_color(&nfp->rb_node, root); 137 rb_insert_color(&nfp->rb_node, root);
138 list_add(&nfp->list, &dev->priv.free_list);
126 139
127 return 0; 140 return 0;
128} 141}
129 142
130static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr) 143static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
131{ 144{
132 struct rb_root *root = &dev->priv.page_root; 145 struct rb_root *root = &dev->priv.page_root;
133 struct rb_node *tmp = root->rb_node; 146 struct rb_node *tmp = root->rb_node;
134 struct page *result = NULL; 147 struct fw_page *result = NULL;
135 struct fw_page *tfp; 148 struct fw_page *tfp;
136 149
137 while (tmp) { 150 while (tmp) {
@@ -141,9 +154,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
141 } else if (tfp->addr > addr) { 154 } else if (tfp->addr > addr) {
142 tmp = tmp->rb_right; 155 tmp = tmp->rb_right;
143 } else { 156 } else {
144 rb_erase(&tfp->rb_node, root); 157 result = tfp;
145 result = tfp->page;
146 kfree(tfp);
147 break; 158 break;
148 } 159 }
149 } 160 }
@@ -176,12 +187,98 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
176 return err; 187 return err;
177} 188}
178 189
190static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
191{
192 struct fw_page *fp;
193 unsigned n;
194
195 if (list_empty(&dev->priv.free_list)) {
196 return -ENOMEM;
197 mlx5_core_warn(dev, "\n");
198 }
199
200 fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
201 n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
202 if (n >= MLX5_NUM_4K_IN_PAGE) {
203 mlx5_core_warn(dev, "alloc 4k bug\n");
204 return -ENOENT;
205 }
206 clear_bit(n, &fp->bitmask);
207 fp->free_count--;
208 if (!fp->free_count)
209 list_del(&fp->list);
210
211 *addr = fp->addr + n * 4096;
212
213 return 0;
214}
215
216static void free_4k(struct mlx5_core_dev *dev, u64 addr)
217{
218 struct fw_page *fwp;
219 int n;
220
221 fwp = find_fw_page(dev, addr & PAGE_MASK);
222 if (!fwp) {
223 mlx5_core_warn(dev, "page not found\n");
224 return;
225 }
226
227 n = (addr & ~PAGE_MASK) % 4096;
228 fwp->free_count++;
229 set_bit(n, &fwp->bitmask);
230 if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
231 rb_erase(&fwp->rb_node, &dev->priv.page_root);
232 if (fwp->free_count != 1)
233 list_del(&fwp->list);
234 dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
235 __free_page(fwp->page);
236 kfree(fwp);
237 } else if (fwp->free_count == 1) {
238 list_add(&fwp->list, &dev->priv.free_list);
239 }
240}
241
242static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
243{
244 struct page *page;
245 u64 addr;
246 int err;
247
248 page = alloc_page(GFP_HIGHUSER);
249 if (!page) {
250 mlx5_core_warn(dev, "failed to allocate page\n");
251 return -ENOMEM;
252 }
253 addr = dma_map_page(&dev->pdev->dev, page, 0,
254 PAGE_SIZE, DMA_BIDIRECTIONAL);
255 if (dma_mapping_error(&dev->pdev->dev, addr)) {
256 mlx5_core_warn(dev, "failed dma mapping page\n");
257 err = -ENOMEM;
258 goto out_alloc;
259 }
260 err = insert_page(dev, addr, page, func_id);
261 if (err) {
262 mlx5_core_err(dev, "failed to track allocated page\n");
263 goto out_mapping;
264 }
265
266 return 0;
267
268out_mapping:
269 dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
270
271out_alloc:
272 __free_page(page);
273
274 return err;
275}
179static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, 276static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
180 int notify_fail) 277 int notify_fail)
181{ 278{
182 struct mlx5_manage_pages_inbox *in; 279 struct mlx5_manage_pages_inbox *in;
183 struct mlx5_manage_pages_outbox out; 280 struct mlx5_manage_pages_outbox out;
184 struct page *page; 281 struct mlx5_manage_pages_inbox *nin;
185 int inlen; 282 int inlen;
186 u64 addr; 283 u64 addr;
187 int err; 284 int err;
@@ -196,27 +293,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
196 memset(&out, 0, sizeof(out)); 293 memset(&out, 0, sizeof(out));
197 294
198 for (i = 0; i < npages; i++) { 295 for (i = 0; i < npages; i++) {
199 page = alloc_page(GFP_HIGHUSER); 296retry:
200 if (!page) { 297 err = alloc_4k(dev, &addr);
201 err = -ENOMEM;
202 mlx5_core_warn(dev, "failed to allocate page\n");
203 goto out_alloc;
204 }
205 addr = dma_map_page(&dev->pdev->dev, page, 0,
206 PAGE_SIZE, DMA_BIDIRECTIONAL);
207 if (dma_mapping_error(&dev->pdev->dev, addr)) {
208 mlx5_core_warn(dev, "failed dma mapping page\n");
209 __free_page(page);
210 err = -ENOMEM;
211 goto out_alloc;
212 }
213 err = insert_page(dev, addr, page, func_id);
214 if (err) { 298 if (err) {
215 mlx5_core_err(dev, "failed to track allocated page\n"); 299 if (err == -ENOMEM)
216 dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); 300 err = alloc_system_page(dev, func_id);
217 __free_page(page); 301 if (err)
218 err = -ENOMEM; 302 goto out_4k;
219 goto out_alloc; 303
304 goto retry;
220 } 305 }
221 in->pas[i] = cpu_to_be64(addr); 306 in->pas[i] = cpu_to_be64(addr);
222 } 307 }
@@ -226,7 +311,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
226 in->func_id = cpu_to_be16(func_id); 311 in->func_id = cpu_to_be16(func_id);
227 in->num_entries = cpu_to_be32(npages); 312 in->num_entries = cpu_to_be32(npages);
228 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); 313 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
229 mlx5_core_dbg(dev, "err %d\n", err);
230 if (err) { 314 if (err) {
231 mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); 315 mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
232 goto out_alloc; 316 goto out_alloc;
@@ -247,25 +331,22 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
247 331
248out_alloc: 332out_alloc:
249 if (notify_fail) { 333 if (notify_fail) {
250 memset(in, 0, inlen); 334 nin = kzalloc(sizeof(*nin), GFP_KERNEL);
251 memset(&out, 0, sizeof(out)); 335 if (!nin) {
252 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); 336 mlx5_core_warn(dev, "allocation failed\n");
253 in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); 337 goto out_4k;
254 if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)))
255 mlx5_core_warn(dev, "\n");
256 }
257 for (i--; i >= 0; i--) {
258 addr = be64_to_cpu(in->pas[i]);
259 page = remove_page(dev, addr);
260 if (!page) {
261 mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
262 addr);
263 continue;
264 } 338 }
265 dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); 339 memset(&out, 0, sizeof(out));
266 __free_page(page); 340 nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
341 nin->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
342 if (mlx5_cmd_exec(dev, nin, sizeof(*nin), &out, sizeof(out)))
343 mlx5_core_warn(dev, "page notify failed\n");
344 kfree(nin);
267 } 345 }
268 346
347out_4k:
348 for (i--; i >= 0; i--)
349 free_4k(dev, be64_to_cpu(in->pas[i]));
269out_free: 350out_free:
270 mlx5_vfree(in); 351 mlx5_vfree(in);
271 return err; 352 return err;
@@ -276,7 +357,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
276{ 357{
277 struct mlx5_manage_pages_inbox in; 358 struct mlx5_manage_pages_inbox in;
278 struct mlx5_manage_pages_outbox *out; 359 struct mlx5_manage_pages_outbox *out;
279 struct page *page;
280 int num_claimed; 360 int num_claimed;
281 int outlen; 361 int outlen;
282 u64 addr; 362 u64 addr;
@@ -315,13 +395,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
315 395
316 for (i = 0; i < num_claimed; i++) { 396 for (i = 0; i < num_claimed; i++) {
317 addr = be64_to_cpu(out->pas[i]); 397 addr = be64_to_cpu(out->pas[i]);
318 page = remove_page(dev, addr); 398 free_4k(dev, addr);
319 if (!page) {
320 mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
321 } else {
322 dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
323 __free_page(page);
324 }
325 } 399 }
326 400
327out_free: 401out_free:
@@ -381,14 +455,19 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
381 return give_pages(dev, func_id, npages, 0); 455 return give_pages(dev, func_id, npages, 0);
382} 456}
383 457
458enum {
459 MLX5_BLKS_FOR_RECLAIM_PAGES = 12
460};
461
384static int optimal_reclaimed_pages(void) 462static int optimal_reclaimed_pages(void)
385{ 463{
386 struct mlx5_cmd_prot_block *block; 464 struct mlx5_cmd_prot_block *block;
387 struct mlx5_cmd_layout *lay; 465 struct mlx5_cmd_layout *lay;
388 int ret; 466 int ret;
389 467
390 ret = (sizeof(lay->in) + sizeof(block->data) - 468 ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
391 sizeof(struct mlx5_manage_pages_outbox)) / 8; 469 sizeof(struct mlx5_manage_pages_outbox)) /
470 FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]);
392 471
393 return ret; 472 return ret;
394} 473}
@@ -427,6 +506,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
427void mlx5_pagealloc_init(struct mlx5_core_dev *dev) 506void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
428{ 507{
429 dev->priv.page_root = RB_ROOT; 508 dev->priv.page_root = RB_ROOT;
509 INIT_LIST_HEAD(&dev->priv.free_list);
430} 510}
431 511
432void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) 512void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index f379c7f3034c..2700a5a09bd4 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -24,12 +24,15 @@
24#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/delay.h>
27 28
28#include <scsi/scsi.h> 29#include <scsi/scsi.h>
30#include <scsi/scsi_cmnd.h>
29#include <scsi/scsi_device.h> 31#include <scsi/scsi_device.h>
30#include <scsi/scsi_host.h> 32#include <scsi/scsi_host.h>
31#include <scsi/scsi_transport.h> 33#include <scsi/scsi_transport.h>
32#include <scsi/scsi_transport_srp.h> 34#include <scsi/scsi_transport_srp.h>
35#include "scsi_priv.h"
33#include "scsi_transport_srp_internal.h" 36#include "scsi_transport_srp_internal.h"
34 37
35struct srp_host_attrs { 38struct srp_host_attrs {
@@ -38,7 +41,7 @@ struct srp_host_attrs {
38#define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data) 41#define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data)
39 42
40#define SRP_HOST_ATTRS 0 43#define SRP_HOST_ATTRS 0
41#define SRP_RPORT_ATTRS 3 44#define SRP_RPORT_ATTRS 8
42 45
43struct srp_internal { 46struct srp_internal {
44 struct scsi_transport_template t; 47 struct scsi_transport_template t;
@@ -54,6 +57,36 @@ struct srp_internal {
54 57
55#define dev_to_rport(d) container_of(d, struct srp_rport, dev) 58#define dev_to_rport(d) container_of(d, struct srp_rport, dev)
56#define transport_class_to_srp_rport(dev) dev_to_rport((dev)->parent) 59#define transport_class_to_srp_rport(dev) dev_to_rport((dev)->parent)
60static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r)
61{
62 return dev_to_shost(r->dev.parent);
63}
64
65/**
66 * srp_tmo_valid() - check timeout combination validity
67 *
68 * The combination of the timeout parameters must be such that SCSI commands
69 * are finished in a reasonable time. Hence do not allow the fast I/O fail
70 * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT. Furthermore, these
71 * parameters must be such that multipath can detect failed paths timely.
72 * Hence do not allow all three parameters to be disabled simultaneously.
73 */
74int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, int dev_loss_tmo)
75{
76 if (reconnect_delay < 0 && fast_io_fail_tmo < 0 && dev_loss_tmo < 0)
77 return -EINVAL;
78 if (reconnect_delay == 0)
79 return -EINVAL;
80 if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
81 return -EINVAL;
82 if (dev_loss_tmo >= LONG_MAX / HZ)
83 return -EINVAL;
84 if (fast_io_fail_tmo >= 0 && dev_loss_tmo >= 0 &&
85 fast_io_fail_tmo >= dev_loss_tmo)
86 return -EINVAL;
87 return 0;
88}
89EXPORT_SYMBOL_GPL(srp_tmo_valid);
57 90
58static int srp_host_setup(struct transport_container *tc, struct device *dev, 91static int srp_host_setup(struct transport_container *tc, struct device *dev,
59 struct device *cdev) 92 struct device *cdev)
@@ -134,10 +167,465 @@ static ssize_t store_srp_rport_delete(struct device *dev,
134 167
135static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete); 168static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete);
136 169
170static ssize_t show_srp_rport_state(struct device *dev,
171 struct device_attribute *attr,
172 char *buf)
173{
174 static const char *const state_name[] = {
175 [SRP_RPORT_RUNNING] = "running",
176 [SRP_RPORT_BLOCKED] = "blocked",
177 [SRP_RPORT_FAIL_FAST] = "fail-fast",
178 [SRP_RPORT_LOST] = "lost",
179 };
180 struct srp_rport *rport = transport_class_to_srp_rport(dev);
181 enum srp_rport_state state = rport->state;
182
183 return sprintf(buf, "%s\n",
184 (unsigned)state < ARRAY_SIZE(state_name) ?
185 state_name[state] : "???");
186}
187
188static DEVICE_ATTR(state, S_IRUGO, show_srp_rport_state, NULL);
189
190static ssize_t srp_show_tmo(char *buf, int tmo)
191{
192 return tmo >= 0 ? sprintf(buf, "%d\n", tmo) : sprintf(buf, "off\n");
193}
194
195static int srp_parse_tmo(int *tmo, const char *buf)
196{
197 int res = 0;
198
199 if (strncmp(buf, "off", 3) != 0)
200 res = kstrtoint(buf, 0, tmo);
201 else
202 *tmo = -1;
203
204 return res;
205}
206
207static ssize_t show_reconnect_delay(struct device *dev,
208 struct device_attribute *attr, char *buf)
209{
210 struct srp_rport *rport = transport_class_to_srp_rport(dev);
211
212 return srp_show_tmo(buf, rport->reconnect_delay);
213}
214
215static ssize_t store_reconnect_delay(struct device *dev,
216 struct device_attribute *attr,
217 const char *buf, const size_t count)
218{
219 struct srp_rport *rport = transport_class_to_srp_rport(dev);
220 int res, delay;
221
222 res = srp_parse_tmo(&delay, buf);
223 if (res)
224 goto out;
225 res = srp_tmo_valid(delay, rport->fast_io_fail_tmo,
226 rport->dev_loss_tmo);
227 if (res)
228 goto out;
229
230 if (rport->reconnect_delay <= 0 && delay > 0 &&
231 rport->state != SRP_RPORT_RUNNING) {
232 queue_delayed_work(system_long_wq, &rport->reconnect_work,
233 delay * HZ);
234 } else if (delay <= 0) {
235 cancel_delayed_work(&rport->reconnect_work);
236 }
237 rport->reconnect_delay = delay;
238 res = count;
239
240out:
241 return res;
242}
243
244static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR, show_reconnect_delay,
245 store_reconnect_delay);
246
247static ssize_t show_failed_reconnects(struct device *dev,
248 struct device_attribute *attr, char *buf)
249{
250 struct srp_rport *rport = transport_class_to_srp_rport(dev);
251
252 return sprintf(buf, "%d\n", rport->failed_reconnects);
253}
254
255static DEVICE_ATTR(failed_reconnects, S_IRUGO, show_failed_reconnects, NULL);
256
257static ssize_t show_srp_rport_fast_io_fail_tmo(struct device *dev,
258 struct device_attribute *attr,
259 char *buf)
260{
261 struct srp_rport *rport = transport_class_to_srp_rport(dev);
262
263 return srp_show_tmo(buf, rport->fast_io_fail_tmo);
264}
265
266static ssize_t store_srp_rport_fast_io_fail_tmo(struct device *dev,
267 struct device_attribute *attr,
268 const char *buf, size_t count)
269{
270 struct srp_rport *rport = transport_class_to_srp_rport(dev);
271 int res;
272 int fast_io_fail_tmo;
273
274 res = srp_parse_tmo(&fast_io_fail_tmo, buf);
275 if (res)
276 goto out;
277 res = srp_tmo_valid(rport->reconnect_delay, fast_io_fail_tmo,
278 rport->dev_loss_tmo);
279 if (res)
280 goto out;
281 rport->fast_io_fail_tmo = fast_io_fail_tmo;
282 res = count;
283
284out:
285 return res;
286}
287
288static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR,
289 show_srp_rport_fast_io_fail_tmo,
290 store_srp_rport_fast_io_fail_tmo);
291
292static ssize_t show_srp_rport_dev_loss_tmo(struct device *dev,
293 struct device_attribute *attr,
294 char *buf)
295{
296 struct srp_rport *rport = transport_class_to_srp_rport(dev);
297
298 return srp_show_tmo(buf, rport->dev_loss_tmo);
299}
300
301static ssize_t store_srp_rport_dev_loss_tmo(struct device *dev,
302 struct device_attribute *attr,
303 const char *buf, size_t count)
304{
305 struct srp_rport *rport = transport_class_to_srp_rport(dev);
306 int res;
307 int dev_loss_tmo;
308
309 res = srp_parse_tmo(&dev_loss_tmo, buf);
310 if (res)
311 goto out;
312 res = srp_tmo_valid(rport->reconnect_delay, rport->fast_io_fail_tmo,
313 dev_loss_tmo);
314 if (res)
315 goto out;
316 rport->dev_loss_tmo = dev_loss_tmo;
317 res = count;
318
319out:
320 return res;
321}
322
323static DEVICE_ATTR(dev_loss_tmo, S_IRUGO | S_IWUSR,
324 show_srp_rport_dev_loss_tmo,
325 store_srp_rport_dev_loss_tmo);
326
327static int srp_rport_set_state(struct srp_rport *rport,
328 enum srp_rport_state new_state)
329{
330 enum srp_rport_state old_state = rport->state;
331
332 lockdep_assert_held(&rport->mutex);
333
334 switch (new_state) {
335 case SRP_RPORT_RUNNING:
336 switch (old_state) {
337 case SRP_RPORT_LOST:
338 goto invalid;
339 default:
340 break;
341 }
342 break;
343 case SRP_RPORT_BLOCKED:
344 switch (old_state) {
345 case SRP_RPORT_RUNNING:
346 break;
347 default:
348 goto invalid;
349 }
350 break;
351 case SRP_RPORT_FAIL_FAST:
352 switch (old_state) {
353 case SRP_RPORT_LOST:
354 goto invalid;
355 default:
356 break;
357 }
358 break;
359 case SRP_RPORT_LOST:
360 break;
361 }
362 rport->state = new_state;
363 return 0;
364
365invalid:
366 return -EINVAL;
367}
368
369/**
370 * srp_reconnect_work() - reconnect and schedule a new attempt if necessary
371 */
372static void srp_reconnect_work(struct work_struct *work)
373{
374 struct srp_rport *rport = container_of(to_delayed_work(work),
375 struct srp_rport, reconnect_work);
376 struct Scsi_Host *shost = rport_to_shost(rport);
377 int delay, res;
378
379 res = srp_reconnect_rport(rport);
380 if (res != 0) {
381 shost_printk(KERN_ERR, shost,
382 "reconnect attempt %d failed (%d)\n",
383 ++rport->failed_reconnects, res);
384 delay = rport->reconnect_delay *
385 min(100, max(1, rport->failed_reconnects - 10));
386 if (delay > 0)
387 queue_delayed_work(system_long_wq,
388 &rport->reconnect_work, delay * HZ);
389 }
390}
391
392static void __rport_fail_io_fast(struct srp_rport *rport)
393{
394 struct Scsi_Host *shost = rport_to_shost(rport);
395 struct srp_internal *i;
396
397 lockdep_assert_held(&rport->mutex);
398
399 if (srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST))
400 return;
401 scsi_target_unblock(rport->dev.parent, SDEV_TRANSPORT_OFFLINE);
402
403 /* Involve the LLD if possible to terminate all I/O on the rport. */
404 i = to_srp_internal(shost->transportt);
405 if (i->f->terminate_rport_io)
406 i->f->terminate_rport_io(rport);
407}
408
409/**
410 * rport_fast_io_fail_timedout() - fast I/O failure timeout handler
411 */
412static void rport_fast_io_fail_timedout(struct work_struct *work)
413{
414 struct srp_rport *rport = container_of(to_delayed_work(work),
415 struct srp_rport, fast_io_fail_work);
416 struct Scsi_Host *shost = rport_to_shost(rport);
417
418 pr_info("fast_io_fail_tmo expired for SRP %s / %s.\n",
419 dev_name(&rport->dev), dev_name(&shost->shost_gendev));
420
421 mutex_lock(&rport->mutex);
422 if (rport->state == SRP_RPORT_BLOCKED)
423 __rport_fail_io_fast(rport);
424 mutex_unlock(&rport->mutex);
425}
426
427/**
428 * rport_dev_loss_timedout() - device loss timeout handler
429 */
430static void rport_dev_loss_timedout(struct work_struct *work)
431{
432 struct srp_rport *rport = container_of(to_delayed_work(work),
433 struct srp_rport, dev_loss_work);
434 struct Scsi_Host *shost = rport_to_shost(rport);
435 struct srp_internal *i = to_srp_internal(shost->transportt);
436
437 pr_info("dev_loss_tmo expired for SRP %s / %s.\n",
438 dev_name(&rport->dev), dev_name(&shost->shost_gendev));
439
440 mutex_lock(&rport->mutex);
441 WARN_ON(srp_rport_set_state(rport, SRP_RPORT_LOST) != 0);
442 scsi_target_unblock(rport->dev.parent, SDEV_TRANSPORT_OFFLINE);
443 mutex_unlock(&rport->mutex);
444
445 i->f->rport_delete(rport);
446}
447
448static void __srp_start_tl_fail_timers(struct srp_rport *rport)
449{
450 struct Scsi_Host *shost = rport_to_shost(rport);
451 int delay, fast_io_fail_tmo, dev_loss_tmo;
452
453 lockdep_assert_held(&rport->mutex);
454
455 if (!rport->deleted) {
456 delay = rport->reconnect_delay;
457 fast_io_fail_tmo = rport->fast_io_fail_tmo;
458 dev_loss_tmo = rport->dev_loss_tmo;
459 pr_debug("%s current state: %d\n",
460 dev_name(&shost->shost_gendev), rport->state);
461
462 if (delay > 0)
463 queue_delayed_work(system_long_wq,
464 &rport->reconnect_work,
465 1UL * delay * HZ);
466 if (fast_io_fail_tmo >= 0 &&
467 srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
468 pr_debug("%s new state: %d\n",
469 dev_name(&shost->shost_gendev),
470 rport->state);
471 scsi_target_block(&shost->shost_gendev);
472 queue_delayed_work(system_long_wq,
473 &rport->fast_io_fail_work,
474 1UL * fast_io_fail_tmo * HZ);
475 }
476 if (dev_loss_tmo >= 0)
477 queue_delayed_work(system_long_wq,
478 &rport->dev_loss_work,
479 1UL * dev_loss_tmo * HZ);
480 } else {
481 pr_debug("%s has already been deleted\n",
482 dev_name(&shost->shost_gendev));
483 srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST);
484 scsi_target_unblock(&shost->shost_gendev,
485 SDEV_TRANSPORT_OFFLINE);
486 }
487}
488
489/**
490 * srp_start_tl_fail_timers() - start the transport layer failure timers
491 *
492 * Start the transport layer fast I/O failure and device loss timers. Do not
493 * modify a timer that was already started.
494 */
495void srp_start_tl_fail_timers(struct srp_rport *rport)
496{
497 mutex_lock(&rport->mutex);
498 __srp_start_tl_fail_timers(rport);
499 mutex_unlock(&rport->mutex);
500}
501EXPORT_SYMBOL(srp_start_tl_fail_timers);
502
503/**
504 * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn()
505 */
506static int scsi_request_fn_active(struct Scsi_Host *shost)
507{
508 struct scsi_device *sdev;
509 struct request_queue *q;
510 int request_fn_active = 0;
511
512 shost_for_each_device(sdev, shost) {
513 q = sdev->request_queue;
514
515 spin_lock_irq(q->queue_lock);
516 request_fn_active += q->request_fn_active;
517 spin_unlock_irq(q->queue_lock);
518 }
519
520 return request_fn_active;
521}
522
523/**
524 * srp_reconnect_rport() - reconnect to an SRP target port
525 *
526 * Blocks SCSI command queueing before invoking reconnect() such that
527 * queuecommand() won't be invoked concurrently with reconnect() from outside
528 * the SCSI EH. This is important since a reconnect() implementation may
529 * reallocate resources needed by queuecommand().
530 *
531 * Notes:
532 * - This function neither waits until outstanding requests have finished nor
533 * tries to abort these. It is the responsibility of the reconnect()
534 * function to finish outstanding commands before reconnecting to the target
535 * port.
536 * - It is the responsibility of the caller to ensure that the resources
537 * reallocated by the reconnect() function won't be used while this function
538 * is in progress. One possible strategy is to invoke this function from
539 * the context of the SCSI EH thread only. Another possible strategy is to
540 * lock the rport mutex inside each SCSI LLD callback that can be invoked by
541 * the SCSI EH (the scsi_host_template.eh_*() functions and also the
542 * scsi_host_template.queuecommand() function).
543 */
544int srp_reconnect_rport(struct srp_rport *rport)
545{
546 struct Scsi_Host *shost = rport_to_shost(rport);
547 struct srp_internal *i = to_srp_internal(shost->transportt);
548 struct scsi_device *sdev;
549 int res;
550
551 pr_debug("SCSI host %s\n", dev_name(&shost->shost_gendev));
552
553 res = mutex_lock_interruptible(&rport->mutex);
554 if (res)
555 goto out;
556 scsi_target_block(&shost->shost_gendev);
557 while (scsi_request_fn_active(shost))
558 msleep(20);
559 res = i->f->reconnect(rport);
560 pr_debug("%s (state %d): transport.reconnect() returned %d\n",
561 dev_name(&shost->shost_gendev), rport->state, res);
562 if (res == 0) {
563 cancel_delayed_work(&rport->fast_io_fail_work);
564 cancel_delayed_work(&rport->dev_loss_work);
565
566 rport->failed_reconnects = 0;
567 srp_rport_set_state(rport, SRP_RPORT_RUNNING);
568 scsi_target_unblock(&shost->shost_gendev, SDEV_RUNNING);
569 /*
570 * If the SCSI error handler has offlined one or more devices,
571 * invoking scsi_target_unblock() won't change the state of
572 * these devices into running so do that explicitly.
573 */
574 spin_lock_irq(shost->host_lock);
575 __shost_for_each_device(sdev, shost)
576 if (sdev->sdev_state == SDEV_OFFLINE)
577 sdev->sdev_state = SDEV_RUNNING;
578 spin_unlock_irq(shost->host_lock);
579 } else if (rport->state == SRP_RPORT_RUNNING) {
580 /*
581 * srp_reconnect_rport() was invoked with fast_io_fail
582 * off. Mark the port as failed and start the TL failure
583 * timers if these had not yet been started.
584 */
585 __rport_fail_io_fast(rport);
586 scsi_target_unblock(&shost->shost_gendev,
587 SDEV_TRANSPORT_OFFLINE);
588 __srp_start_tl_fail_timers(rport);
589 } else if (rport->state != SRP_RPORT_BLOCKED) {
590 scsi_target_unblock(&shost->shost_gendev,
591 SDEV_TRANSPORT_OFFLINE);
592 }
593 mutex_unlock(&rport->mutex);
594
595out:
596 return res;
597}
598EXPORT_SYMBOL(srp_reconnect_rport);
599
600/**
601 * srp_timed_out() - SRP transport intercept of the SCSI timeout EH
602 *
603 * If a timeout occurs while an rport is in the blocked state, ask the SCSI
604 * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core
605 * handle the timeout (BLK_EH_NOT_HANDLED).
606 *
607 * Note: This function is called from soft-IRQ context and with the request
608 * queue lock held.
609 */
610static enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd)
611{
612 struct scsi_device *sdev = scmd->device;
613 struct Scsi_Host *shost = sdev->host;
614 struct srp_internal *i = to_srp_internal(shost->transportt);
615
616 pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev));
617 return i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ?
618 BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED;
619}
620
137static void srp_rport_release(struct device *dev) 621static void srp_rport_release(struct device *dev)
138{ 622{
139 struct srp_rport *rport = dev_to_rport(dev); 623 struct srp_rport *rport = dev_to_rport(dev);
140 624
625 cancel_delayed_work_sync(&rport->reconnect_work);
626 cancel_delayed_work_sync(&rport->fast_io_fail_work);
627 cancel_delayed_work_sync(&rport->dev_loss_work);
628
141 put_device(dev->parent); 629 put_device(dev->parent);
142 kfree(rport); 630 kfree(rport);
143} 631}
@@ -185,6 +673,24 @@ static int srp_host_match(struct attribute_container *cont, struct device *dev)
185} 673}
186 674
187/** 675/**
676 * srp_rport_get() - increment rport reference count
677 */
678void srp_rport_get(struct srp_rport *rport)
679{
680 get_device(&rport->dev);
681}
682EXPORT_SYMBOL(srp_rport_get);
683
684/**
685 * srp_rport_put() - decrement rport reference count
686 */
687void srp_rport_put(struct srp_rport *rport)
688{
689 put_device(&rport->dev);
690}
691EXPORT_SYMBOL(srp_rport_put);
692
693/**
188 * srp_rport_add - add a SRP remote port to the device hierarchy 694 * srp_rport_add - add a SRP remote port to the device hierarchy
189 * @shost: scsi host the remote port is connected to. 695 * @shost: scsi host the remote port is connected to.
190 * @ids: The port id for the remote port. 696 * @ids: The port id for the remote port.
@@ -196,12 +702,15 @@ struct srp_rport *srp_rport_add(struct Scsi_Host *shost,
196{ 702{
197 struct srp_rport *rport; 703 struct srp_rport *rport;
198 struct device *parent = &shost->shost_gendev; 704 struct device *parent = &shost->shost_gendev;
705 struct srp_internal *i = to_srp_internal(shost->transportt);
199 int id, ret; 706 int id, ret;
200 707
201 rport = kzalloc(sizeof(*rport), GFP_KERNEL); 708 rport = kzalloc(sizeof(*rport), GFP_KERNEL);
202 if (!rport) 709 if (!rport)
203 return ERR_PTR(-ENOMEM); 710 return ERR_PTR(-ENOMEM);
204 711
712 mutex_init(&rport->mutex);
713
205 device_initialize(&rport->dev); 714 device_initialize(&rport->dev);
206 715
207 rport->dev.parent = get_device(parent); 716 rport->dev.parent = get_device(parent);
@@ -210,6 +719,17 @@ struct srp_rport *srp_rport_add(struct Scsi_Host *shost,
210 memcpy(rport->port_id, ids->port_id, sizeof(rport->port_id)); 719 memcpy(rport->port_id, ids->port_id, sizeof(rport->port_id));
211 rport->roles = ids->roles; 720 rport->roles = ids->roles;
212 721
722 if (i->f->reconnect)
723 rport->reconnect_delay = i->f->reconnect_delay ?
724 *i->f->reconnect_delay : 10;
725 INIT_DELAYED_WORK(&rport->reconnect_work, srp_reconnect_work);
726 rport->fast_io_fail_tmo = i->f->fast_io_fail_tmo ?
727 *i->f->fast_io_fail_tmo : 15;
728 rport->dev_loss_tmo = i->f->dev_loss_tmo ? *i->f->dev_loss_tmo : 60;
729 INIT_DELAYED_WORK(&rport->fast_io_fail_work,
730 rport_fast_io_fail_timedout);
731 INIT_DELAYED_WORK(&rport->dev_loss_work, rport_dev_loss_timedout);
732
213 id = atomic_inc_return(&to_srp_host_attrs(shost)->next_port_id); 733 id = atomic_inc_return(&to_srp_host_attrs(shost)->next_port_id);
214 dev_set_name(&rport->dev, "port-%d:%d", shost->host_no, id); 734 dev_set_name(&rport->dev, "port-%d:%d", shost->host_no, id);
215 735
@@ -259,6 +779,13 @@ void srp_rport_del(struct srp_rport *rport)
259 transport_remove_device(dev); 779 transport_remove_device(dev);
260 device_del(dev); 780 device_del(dev);
261 transport_destroy_device(dev); 781 transport_destroy_device(dev);
782
783 mutex_lock(&rport->mutex);
784 if (rport->state == SRP_RPORT_BLOCKED)
785 __rport_fail_io_fast(rport);
786 rport->deleted = true;
787 mutex_unlock(&rport->mutex);
788
262 put_device(dev); 789 put_device(dev);
263} 790}
264EXPORT_SYMBOL_GPL(srp_rport_del); 791EXPORT_SYMBOL_GPL(srp_rport_del);
@@ -310,6 +837,8 @@ srp_attach_transport(struct srp_function_template *ft)
310 if (!i) 837 if (!i)
311 return NULL; 838 return NULL;
312 839
840 i->t.eh_timed_out = srp_timed_out;
841
313 i->t.tsk_mgmt_response = srp_tsk_mgmt_response; 842 i->t.tsk_mgmt_response = srp_tsk_mgmt_response;
314 i->t.it_nexus_response = srp_it_nexus_response; 843 i->t.it_nexus_response = srp_it_nexus_response;
315 844
@@ -327,6 +856,15 @@ srp_attach_transport(struct srp_function_template *ft)
327 count = 0; 856 count = 0;
328 i->rport_attrs[count++] = &dev_attr_port_id; 857 i->rport_attrs[count++] = &dev_attr_port_id;
329 i->rport_attrs[count++] = &dev_attr_roles; 858 i->rport_attrs[count++] = &dev_attr_roles;
859 if (ft->has_rport_state) {
860 i->rport_attrs[count++] = &dev_attr_state;
861 i->rport_attrs[count++] = &dev_attr_fast_io_fail_tmo;
862 i->rport_attrs[count++] = &dev_attr_dev_loss_tmo;
863 }
864 if (ft->reconnect) {
865 i->rport_attrs[count++] = &dev_attr_reconnect_delay;
866 i->rport_attrs[count++] = &dev_attr_failed_reconnects;
867 }
330 if (ft->rport_delete) 868 if (ft->rport_delete)
331 i->rport_attrs[count++] = &dev_attr_delete; 869 i->rport_attrs[count++] = &dev_attr_delete;
332 i->rport_attrs[count++] = NULL; 870 i->rport_attrs[count++] = NULL;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 5eb4e31af22b..da78875807fc 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -230,6 +230,15 @@ enum {
230 MLX5_MAX_PAGE_SHIFT = 31 230 MLX5_MAX_PAGE_SHIFT = 31
231}; 231};
232 232
233enum {
234 MLX5_ADAPTER_PAGE_SHIFT = 12
235};
236
237enum {
238 MLX5_CAP_OFF_DCT = 41,
239 MLX5_CAP_OFF_CMDIF_CSUM = 46,
240};
241
233struct mlx5_inbox_hdr { 242struct mlx5_inbox_hdr {
234 __be16 opcode; 243 __be16 opcode;
235 u8 rsvd[4]; 244 u8 rsvd[4];
@@ -319,9 +328,9 @@ struct mlx5_hca_cap {
319 u8 rsvd25[42]; 328 u8 rsvd25[42];
320 __be16 log_uar_page_sz; 329 __be16 log_uar_page_sz;
321 u8 rsvd26[28]; 330 u8 rsvd26[28];
322 u8 log_msx_atomic_size_qp; 331 u8 log_max_atomic_size_qp;
323 u8 rsvd27[2]; 332 u8 rsvd27[2];
324 u8 log_msx_atomic_size_dc; 333 u8 log_max_atomic_size_dc;
325 u8 rsvd28[76]; 334 u8 rsvd28[76];
326}; 335};
327 336
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6b8c496572c8..554548cd3dd4 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -483,6 +483,7 @@ struct mlx5_priv {
483 struct rb_root page_root; 483 struct rb_root page_root;
484 int fw_pages; 484 int fw_pages;
485 int reg_pages; 485 int reg_pages;
486 struct list_head free_list;
486 487
487 struct mlx5_core_health health; 488 struct mlx5_core_health health;
488 489
@@ -557,9 +558,11 @@ typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
557struct mlx5_cmd_work_ent { 558struct mlx5_cmd_work_ent {
558 struct mlx5_cmd_msg *in; 559 struct mlx5_cmd_msg *in;
559 struct mlx5_cmd_msg *out; 560 struct mlx5_cmd_msg *out;
561 void *uout;
562 int uout_size;
560 mlx5_cmd_cbk_t callback; 563 mlx5_cmd_cbk_t callback;
561 void *context; 564 void *context;
562 int idx; 565 int idx;
563 struct completion done; 566 struct completion done;
564 struct mlx5_cmd *cmd; 567 struct mlx5_cmd *cmd;
565 struct work_struct work; 568 struct work_struct work;
@@ -570,6 +573,7 @@ struct mlx5_cmd_work_ent {
570 u8 token; 573 u8 token;
571 struct timespec ts1; 574 struct timespec ts1;
572 struct timespec ts2; 575 struct timespec ts2;
576 u16 op;
573}; 577};
574 578
575struct mlx5_pas { 579struct mlx5_pas {
@@ -653,6 +657,9 @@ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
653int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); 657int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
654int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, 658int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
655 int out_size); 659 int out_size);
660int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
661 void *out, int out_size, mlx5_cmd_cbk_t callback,
662 void *context);
656int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); 663int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
657int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); 664int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
658int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); 665int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
@@ -676,7 +683,9 @@ int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
676int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, 683int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
677 u16 lwm, int is_srq); 684 u16 lwm, int is_srq);
678int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 685int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
679 struct mlx5_create_mkey_mbox_in *in, int inlen); 686 struct mlx5_create_mkey_mbox_in *in, int inlen,
687 mlx5_cmd_cbk_t callback, void *context,
688 struct mlx5_create_mkey_mbox_out *out);
680int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr); 689int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
681int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, 690int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
682 struct mlx5_query_mkey_mbox_out *out, int outlen); 691 struct mlx5_query_mkey_mbox_out *out, int outlen);
@@ -745,6 +754,11 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx)
745 return mkey_idx << 8; 754 return mkey_idx << 8;
746} 755}
747 756
757static inline u8 mlx5_mkey_variant(u32 mkey)
758{
759 return mkey & 0xff;
760}
761
748enum { 762enum {
749 MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, 763 MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0,
750 MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, 764 MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e393171e2fac..979874c627ee 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -67,12 +67,14 @@ enum rdma_node_type {
67 RDMA_NODE_IB_CA = 1, 67 RDMA_NODE_IB_CA = 1,
68 RDMA_NODE_IB_SWITCH, 68 RDMA_NODE_IB_SWITCH,
69 RDMA_NODE_IB_ROUTER, 69 RDMA_NODE_IB_ROUTER,
70 RDMA_NODE_RNIC 70 RDMA_NODE_RNIC,
71 RDMA_NODE_USNIC,
71}; 72};
72 73
73enum rdma_transport_type { 74enum rdma_transport_type {
74 RDMA_TRANSPORT_IB, 75 RDMA_TRANSPORT_IB,
75 RDMA_TRANSPORT_IWARP 76 RDMA_TRANSPORT_IWARP,
77 RDMA_TRANSPORT_USNIC
76}; 78};
77 79
78enum rdma_transport_type 80enum rdma_transport_type
@@ -1436,6 +1438,7 @@ struct ib_device {
1436 1438
1437 int uverbs_abi_ver; 1439 int uverbs_abi_ver;
1438 u64 uverbs_cmd_mask; 1440 u64 uverbs_cmd_mask;
1441 u64 uverbs_ex_cmd_mask;
1439 1442
1440 char node_desc[64]; 1443 char node_desc[64];
1441 __be64 node_guid; 1444 __be64 node_guid;
@@ -2384,4 +2387,17 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
2384 struct ib_flow_attr *flow_attr, int domain); 2387 struct ib_flow_attr *flow_attr, int domain);
2385int ib_destroy_flow(struct ib_flow *flow_id); 2388int ib_destroy_flow(struct ib_flow *flow_id);
2386 2389
2390static inline int ib_check_mr_access(int flags)
2391{
2392 /*
2393 * Local write permission is required if remote write or
2394 * remote atomic permission is also requested.
2395 */
2396 if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
2397 !(flags & IB_ACCESS_LOCAL_WRITE))
2398 return -EINVAL;
2399
2400 return 0;
2401}
2402
2387#endif /* IB_VERBS_H */ 2403#endif /* IB_VERBS_H */
diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h
index ff0f04ac91aa..4ebf6913b7b2 100644
--- a/include/scsi/scsi_transport_srp.h
+++ b/include/scsi/scsi_transport_srp.h
@@ -13,6 +13,27 @@ struct srp_rport_identifiers {
13 u8 roles; 13 u8 roles;
14}; 14};
15 15
16/**
17 * enum srp_rport_state - SRP transport layer state
18 * @SRP_RPORT_RUNNING: Transport layer operational.
19 * @SRP_RPORT_BLOCKED: Transport layer not operational; fast I/O fail timer
20 * is running and I/O has been blocked.
21 * @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast.
22 * @SRP_RPORT_LOST: Device loss timer has expired; port is being removed.
23 */
24enum srp_rport_state {
25 SRP_RPORT_RUNNING,
26 SRP_RPORT_BLOCKED,
27 SRP_RPORT_FAIL_FAST,
28 SRP_RPORT_LOST,
29};
30
31/**
32 * struct srp_rport
33 * @lld_data: LLD private data.
34 * @mutex: Protects against concurrent rport reconnect / fast_io_fail /
35 * dev_loss_tmo activity.
36 */
16struct srp_rport { 37struct srp_rport {
17 /* for initiator and target drivers */ 38 /* for initiator and target drivers */
18 39
@@ -23,11 +44,43 @@ struct srp_rport {
23 44
24 /* for initiator drivers */ 45 /* for initiator drivers */
25 46
26 void *lld_data; /* LLD private data */ 47 void *lld_data;
48
49 struct mutex mutex;
50 enum srp_rport_state state;
51 bool deleted;
52 int reconnect_delay;
53 int failed_reconnects;
54 struct delayed_work reconnect_work;
55 int fast_io_fail_tmo;
56 int dev_loss_tmo;
57 struct delayed_work fast_io_fail_work;
58 struct delayed_work dev_loss_work;
27}; 59};
28 60
61/**
62 * struct srp_function_template
63 * @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and
64 * dev_loss_tmo sysfs attribute for an rport.
65 * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command
66 * timer if the device on which it has been queued is blocked.
67 * @reconnect_delay: If not NULL, points to the default reconnect_delay value.
68 * @fast_io_fail_tmo: If not NULL, points to the default fast_io_fail_tmo value.
69 * @dev_loss_tmo: If not NULL, points to the default dev_loss_tmo value.
70 * @reconnect: Callback function for reconnecting to the target. See also
71 * srp_reconnect_rport().
72 * @terminate_rport_io: Callback function for terminating all outstanding I/O
73 * requests for an rport.
74 */
29struct srp_function_template { 75struct srp_function_template {
30 /* for initiator drivers */ 76 /* for initiator drivers */
77 bool has_rport_state;
78 bool reset_timer_if_blocked;
79 int *reconnect_delay;
80 int *fast_io_fail_tmo;
81 int *dev_loss_tmo;
82 int (*reconnect)(struct srp_rport *rport);
83 void (*terminate_rport_io)(struct srp_rport *rport);
31 void (*rport_delete)(struct srp_rport *rport); 84 void (*rport_delete)(struct srp_rport *rport);
32 /* for target drivers */ 85 /* for target drivers */
33 int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int); 86 int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int);
@@ -38,10 +91,36 @@ extern struct scsi_transport_template *
38srp_attach_transport(struct srp_function_template *); 91srp_attach_transport(struct srp_function_template *);
39extern void srp_release_transport(struct scsi_transport_template *); 92extern void srp_release_transport(struct scsi_transport_template *);
40 93
94extern void srp_rport_get(struct srp_rport *rport);
95extern void srp_rport_put(struct srp_rport *rport);
41extern struct srp_rport *srp_rport_add(struct Scsi_Host *, 96extern struct srp_rport *srp_rport_add(struct Scsi_Host *,
42 struct srp_rport_identifiers *); 97 struct srp_rport_identifiers *);
43extern void srp_rport_del(struct srp_rport *); 98extern void srp_rport_del(struct srp_rport *);
44 99extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo,
100 int dev_loss_tmo);
101extern int srp_reconnect_rport(struct srp_rport *rport);
102extern void srp_start_tl_fail_timers(struct srp_rport *rport);
45extern void srp_remove_host(struct Scsi_Host *); 103extern void srp_remove_host(struct Scsi_Host *);
46 104
105/**
106 * srp_chkready() - evaluate the transport layer state before I/O
107 *
108 * Returns a SCSI result code that can be returned by the LLD queuecommand()
109 * implementation. The role of this function is similar to that of
110 * fc_remote_port_chkready().
111 */
112static inline int srp_chkready(struct srp_rport *rport)
113{
114 switch (rport->state) {
115 case SRP_RPORT_RUNNING:
116 case SRP_RPORT_BLOCKED:
117 default:
118 return 0;
119 case SRP_RPORT_FAIL_FAST:
120 return DID_TRANSPORT_FAILFAST << 16;
121 case SRP_RPORT_LOST:
122 return DID_NO_CONNECT << 16;
123 }
124}
125
47#endif 126#endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index e3ddd86c90a6..cbfdd4ca9510 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -87,10 +87,11 @@ enum {
87 IB_USER_VERBS_CMD_CLOSE_XRCD, 87 IB_USER_VERBS_CMD_CLOSE_XRCD,
88 IB_USER_VERBS_CMD_CREATE_XSRQ, 88 IB_USER_VERBS_CMD_CREATE_XSRQ,
89 IB_USER_VERBS_CMD_OPEN_QP, 89 IB_USER_VERBS_CMD_OPEN_QP,
90#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 90};
91 IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, 91
92 IB_USER_VERBS_CMD_DESTROY_FLOW 92enum {
93#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 93 IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
94 IB_USER_VERBS_EX_CMD_DESTROY_FLOW
94}; 95};
95 96
96/* 97/*
@@ -122,22 +123,24 @@ struct ib_uverbs_comp_event_desc {
122 * the rest of the command struct based on these value. 123 * the rest of the command struct based on these value.
123 */ 124 */
124 125
126#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff
127#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u
128#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24
129
130#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80
131
125struct ib_uverbs_cmd_hdr { 132struct ib_uverbs_cmd_hdr {
126 __u32 command; 133 __u32 command;
127 __u16 in_words; 134 __u16 in_words;
128 __u16 out_words; 135 __u16 out_words;
129}; 136};
130 137
131#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 138struct ib_uverbs_ex_cmd_hdr {
132struct ib_uverbs_cmd_hdr_ex { 139 __u64 response;
133 __u32 command;
134 __u16 in_words;
135 __u16 out_words;
136 __u16 provider_in_words; 140 __u16 provider_in_words;
137 __u16 provider_out_words; 141 __u16 provider_out_words;
138 __u32 cmd_hdr_reserved; 142 __u32 cmd_hdr_reserved;
139}; 143};
140#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
141 144
142struct ib_uverbs_get_context { 145struct ib_uverbs_get_context {
143 __u64 response; 146 __u64 response;
@@ -700,62 +703,71 @@ struct ib_uverbs_detach_mcast {
700 __u64 driver_data[0]; 703 __u64 driver_data[0];
701}; 704};
702 705
703#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 706struct ib_uverbs_flow_spec_hdr {
704struct ib_kern_eth_filter { 707 __u32 type;
708 __u16 size;
709 __u16 reserved;
710 /* followed by flow_spec */
711 __u64 flow_spec_data[0];
712};
713
714struct ib_uverbs_flow_eth_filter {
705 __u8 dst_mac[6]; 715 __u8 dst_mac[6];
706 __u8 src_mac[6]; 716 __u8 src_mac[6];
707 __be16 ether_type; 717 __be16 ether_type;
708 __be16 vlan_tag; 718 __be16 vlan_tag;
709}; 719};
710 720
711struct ib_kern_spec_eth { 721struct ib_uverbs_flow_spec_eth {
712 __u32 type; 722 union {
713 __u16 size; 723 struct ib_uverbs_flow_spec_hdr hdr;
714 __u16 reserved; 724 struct {
715 struct ib_kern_eth_filter val; 725 __u32 type;
716 struct ib_kern_eth_filter mask; 726 __u16 size;
727 __u16 reserved;
728 };
729 };
730 struct ib_uverbs_flow_eth_filter val;
731 struct ib_uverbs_flow_eth_filter mask;
717}; 732};
718 733
719struct ib_kern_ipv4_filter { 734struct ib_uverbs_flow_ipv4_filter {
720 __be32 src_ip; 735 __be32 src_ip;
721 __be32 dst_ip; 736 __be32 dst_ip;
722}; 737};
723 738
724struct ib_kern_spec_ipv4 { 739struct ib_uverbs_flow_spec_ipv4 {
725 __u32 type; 740 union {
726 __u16 size; 741 struct ib_uverbs_flow_spec_hdr hdr;
727 __u16 reserved; 742 struct {
728 struct ib_kern_ipv4_filter val; 743 __u32 type;
729 struct ib_kern_ipv4_filter mask; 744 __u16 size;
745 __u16 reserved;
746 };
747 };
748 struct ib_uverbs_flow_ipv4_filter val;
749 struct ib_uverbs_flow_ipv4_filter mask;
730}; 750};
731 751
732struct ib_kern_tcp_udp_filter { 752struct ib_uverbs_flow_tcp_udp_filter {
733 __be16 dst_port; 753 __be16 dst_port;
734 __be16 src_port; 754 __be16 src_port;
735}; 755};
736 756
737struct ib_kern_spec_tcp_udp { 757struct ib_uverbs_flow_spec_tcp_udp {
738 __u32 type;
739 __u16 size;
740 __u16 reserved;
741 struct ib_kern_tcp_udp_filter val;
742 struct ib_kern_tcp_udp_filter mask;
743};
744
745struct ib_kern_spec {
746 union { 758 union {
759 struct ib_uverbs_flow_spec_hdr hdr;
747 struct { 760 struct {
748 __u32 type; 761 __u32 type;
749 __u16 size; 762 __u16 size;
750 __u16 reserved; 763 __u16 reserved;
751 }; 764 };
752 struct ib_kern_spec_eth eth;
753 struct ib_kern_spec_ipv4 ipv4;
754 struct ib_kern_spec_tcp_udp tcp_udp;
755 }; 765 };
766 struct ib_uverbs_flow_tcp_udp_filter val;
767 struct ib_uverbs_flow_tcp_udp_filter mask;
756}; 768};
757 769
758struct ib_kern_flow_attr { 770struct ib_uverbs_flow_attr {
759 __u32 type; 771 __u32 type;
760 __u16 size; 772 __u16 size;
761 __u16 priority; 773 __u16 priority;
@@ -767,13 +779,13 @@ struct ib_kern_flow_attr {
767 * struct ib_flow_spec_xxx 779 * struct ib_flow_spec_xxx
768 * struct ib_flow_spec_yyy 780 * struct ib_flow_spec_yyy
769 */ 781 */
782 struct ib_uverbs_flow_spec_hdr flow_specs[0];
770}; 783};
771 784
772struct ib_uverbs_create_flow { 785struct ib_uverbs_create_flow {
773 __u32 comp_mask; 786 __u32 comp_mask;
774 __u64 response;
775 __u32 qp_handle; 787 __u32 qp_handle;
776 struct ib_kern_flow_attr flow_attr; 788 struct ib_uverbs_flow_attr flow_attr;
777}; 789};
778 790
779struct ib_uverbs_create_flow_resp { 791struct ib_uverbs_create_flow_resp {
@@ -785,7 +797,6 @@ struct ib_uverbs_destroy_flow {
785 __u32 comp_mask; 797 __u32 comp_mask;
786 __u32 flow_handle; 798 __u32 flow_handle;
787}; 799};
788#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
789 800
790struct ib_uverbs_create_srq { 801struct ib_uverbs_create_srq {
791 __u64 response; 802 __u64 response;