aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-driver-ib_srp13
-rw-r--r--Documentation/ABI/stable/sysfs-transport-srp39
-rw-r--r--drivers/infiniband/Kconfig11
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/cma.c68
-rw-r--r--drivers/infiniband/core/netlink.c2
-rw-r--r--drivers/infiniband/core/sysfs.c1
-rw-r--r--drivers/infiniband/core/ucma.c4
-rw-r--r--drivers/infiniband/core/uverbs.h36
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c109
-rw-r--r--drivers/infiniband/core/uverbs_main.c128
-rw-r--r--drivers/infiniband/core/verbs.c17
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.c7
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx4/main.c8
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h53
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h14
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c24
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c12
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c29
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c10
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c500
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h21
-rw-r--r--drivers/scsi/scsi_transport_srp.c540
-rw-r--r--include/rdma/ib_verbs.h20
-rw-r--r--include/scsi/scsi_transport_srp.h83
-rw-r--r--include/uapi/rdma/ib_user_verbs.h95
38 files changed, 1507 insertions, 414 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp
index 5c53d28f775c..b9688de8455b 100644
--- a/Documentation/ABI/stable/sysfs-driver-ib_srp
+++ b/Documentation/ABI/stable/sysfs-driver-ib_srp
@@ -61,6 +61,12 @@ Description: Interface for making ib_srp connect to a new target.
61 interrupt is handled by a different CPU then the comp_vector 61 interrupt is handled by a different CPU then the comp_vector
62 parameter can be used to spread the SRP completion workload 62 parameter can be used to spread the SRP completion workload
63 over multiple CPU's. 63 over multiple CPU's.
64 * tl_retry_count, a number in the range 2..7 specifying the
65 IB RC retry count.
66 * queue_size, the maximum number of commands that the
67 initiator is allowed to queue per SCSI host. The default
68 value for this parameter is 62. The lowest supported value
69 is 2.
64 70
65What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev 71What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev
66Date: January 2, 2006 72Date: January 2, 2006
@@ -153,6 +159,13 @@ Contact: linux-rdma@vger.kernel.org
153Description: InfiniBand service ID used for establishing communication with 159Description: InfiniBand service ID used for establishing communication with
154 the SRP target. 160 the SRP target.
155 161
162What: /sys/class/scsi_host/host<n>/sgid
163Date: February 1, 2014
164KernelVersion: 3.13
165Contact: linux-rdma@vger.kernel.org
166Description: InfiniBand GID of the source port used for communication with
167 the SRP target.
168
156What: /sys/class/scsi_host/host<n>/zero_req_lim 169What: /sys/class/scsi_host/host<n>/zero_req_lim
157Date: September 20, 2006 170Date: September 20, 2006
158KernelVersion: 2.6.18 171KernelVersion: 2.6.18
diff --git a/Documentation/ABI/stable/sysfs-transport-srp b/Documentation/ABI/stable/sysfs-transport-srp
index b36fb0dc13c8..ec7af69fea0a 100644
--- a/Documentation/ABI/stable/sysfs-transport-srp
+++ b/Documentation/ABI/stable/sysfs-transport-srp
@@ -5,6 +5,24 @@ Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
5Description: Instructs an SRP initiator to disconnect from a target and to 5Description: Instructs an SRP initiator to disconnect from a target and to
6 remove all LUNs imported from that target. 6 remove all LUNs imported from that target.
7 7
8What: /sys/class/srp_remote_ports/port-<h>:<n>/dev_loss_tmo
9Date: February 1, 2014
10KernelVersion: 3.13
11Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
12Description: Number of seconds the SCSI layer will wait after a transport
13 layer error has been observed before removing a target port.
14 Zero means immediate removal. Setting this attribute to "off"
15 will disable the dev_loss timer.
16
17What: /sys/class/srp_remote_ports/port-<h>:<n>/fast_io_fail_tmo
18Date: February 1, 2014
19KernelVersion: 3.13
20Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
21Description: Number of seconds the SCSI layer will wait after a transport
22 layer error has been observed before failing I/O. Zero means
23 failing I/O immediately. Setting this attribute to "off" will
24 disable the fast_io_fail timer.
25
8What: /sys/class/srp_remote_ports/port-<h>:<n>/port_id 26What: /sys/class/srp_remote_ports/port-<h>:<n>/port_id
9Date: June 27, 2007 27Date: June 27, 2007
10KernelVersion: 2.6.24 28KernelVersion: 2.6.24
@@ -12,8 +30,29 @@ Contact: linux-scsi@vger.kernel.org
12Description: 16-byte local SRP port identifier in hexadecimal format. An 30Description: 16-byte local SRP port identifier in hexadecimal format. An
13 example: 4c:49:4e:55:58:20:56:49:4f:00:00:00:00:00:00:00. 31 example: 4c:49:4e:55:58:20:56:49:4f:00:00:00:00:00:00:00.
14 32
33What: /sys/class/srp_remote_ports/port-<h>:<n>/reconnect_delay
34Date: February 1, 2014
35KernelVersion: 3.13
36Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
37Description: Number of seconds the SCSI layer will wait after a reconnect
38 attempt failed before retrying. Setting this attribute to
39 "off" will disable time-based reconnecting.
40
15What: /sys/class/srp_remote_ports/port-<h>:<n>/roles 41What: /sys/class/srp_remote_ports/port-<h>:<n>/roles
16Date: June 27, 2007 42Date: June 27, 2007
17KernelVersion: 2.6.24 43KernelVersion: 2.6.24
18Contact: linux-scsi@vger.kernel.org 44Contact: linux-scsi@vger.kernel.org
19Description: Role of the remote port. Either "SRP Initiator" or "SRP Target". 45Description: Role of the remote port. Either "SRP Initiator" or "SRP Target".
46
47What: /sys/class/srp_remote_ports/port-<h>:<n>/state
48Date: February 1, 2014
49KernelVersion: 3.13
50Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
51Description: State of the transport layer used for communication with the
52 remote port. "running" if the transport layer is operational;
53 "blocked" if a transport layer error has been encountered but
54 the fast_io_fail_tmo timer has not yet fired; "fail-fast"
55 after the fast_io_fail_tmo timer has fired and before the
56 "dev_loss_tmo" timer has fired; "lost" after the
57 "dev_loss_tmo" timer has fired and before the port is finally
58 removed.
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index b84791f03a27..5ceda710f516 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -31,17 +31,6 @@ config INFINIBAND_USER_ACCESS
31 libibverbs, libibcm and a hardware driver library from 31 libibverbs, libibcm and a hardware driver library from
32 <http://www.openfabrics.org/git/>. 32 <http://www.openfabrics.org/git/>.
33 33
34config INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
35 bool "Experimental and unstable ABI for userspace access to flow steering verbs"
36 depends on INFINIBAND_USER_ACCESS
37 depends on STAGING
38 ---help---
39 The final ABI for userspace access to flow steering verbs
40 has not been defined. To use the current ABI, *WHICH WILL
41 CHANGE IN THE FUTURE*, say Y here.
42
43 If unsure, say N.
44
45config INFINIBAND_USER_MEM 34config INFINIBAND_USER_MEM
46 bool 35 bool
47 depends on INFINIBAND_USER_ACCESS != n 36 depends on INFINIBAND_USER_ACCESS != n
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 784b97cb05b0..f2ef7ef0f36f 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -383,14 +383,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
383{ 383{
384 unsigned long flags; 384 unsigned long flags;
385 int id; 385 int id;
386 static int next_id;
387 386
388 idr_preload(GFP_KERNEL); 387 idr_preload(GFP_KERNEL);
389 spin_lock_irqsave(&cm.lock, flags); 388 spin_lock_irqsave(&cm.lock, flags);
390 389
391 id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT); 390 id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
392 if (id >= 0)
393 next_id = max(id + 1, 0);
394 391
395 spin_unlock_irqrestore(&cm.lock, flags); 392 spin_unlock_irqrestore(&cm.lock, flags);
396 idr_preload_end(); 393 idr_preload_end();
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index dab4b41f1715..830c983fdeff 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -328,28 +328,6 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
328 return ret; 328 return ret;
329} 329}
330 330
331static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
332{
333 int i;
334 int err;
335 struct ib_port_attr props;
336 union ib_gid tmp;
337
338 err = ib_query_port(device, port_num, &props);
339 if (err)
340 return err;
341
342 for (i = 0; i < props.gid_tbl_len; ++i) {
343 err = ib_query_gid(device, port_num, i, &tmp);
344 if (err)
345 return err;
346 if (!memcmp(&tmp, gid, sizeof tmp))
347 return 0;
348 }
349
350 return -EADDRNOTAVAIL;
351}
352
353static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 331static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
354{ 332{
355 dev_addr->dev_type = ARPHRD_INFINIBAND; 333 dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -371,13 +349,14 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
371 return ret; 349 return ret;
372} 350}
373 351
374static int cma_acquire_dev(struct rdma_id_private *id_priv) 352static int cma_acquire_dev(struct rdma_id_private *id_priv,
353 struct rdma_id_private *listen_id_priv)
375{ 354{
376 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 355 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
377 struct cma_device *cma_dev; 356 struct cma_device *cma_dev;
378 union ib_gid gid, iboe_gid; 357 union ib_gid gid, iboe_gid;
379 int ret = -ENODEV; 358 int ret = -ENODEV;
380 u8 port; 359 u8 port, found_port;
381 enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? 360 enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
382 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; 361 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
383 362
@@ -389,17 +368,39 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
389 iboe_addr_get_sgid(dev_addr, &iboe_gid); 368 iboe_addr_get_sgid(dev_addr, &iboe_gid);
390 memcpy(&gid, dev_addr->src_dev_addr + 369 memcpy(&gid, dev_addr->src_dev_addr +
391 rdma_addr_gid_offset(dev_addr), sizeof gid); 370 rdma_addr_gid_offset(dev_addr), sizeof gid);
371 if (listen_id_priv &&
372 rdma_port_get_link_layer(listen_id_priv->id.device,
373 listen_id_priv->id.port_num) == dev_ll) {
374 cma_dev = listen_id_priv->cma_dev;
375 port = listen_id_priv->id.port_num;
376 if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
377 rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
378 ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
379 &found_port, NULL);
380 else
381 ret = ib_find_cached_gid(cma_dev->device, &gid,
382 &found_port, NULL);
383
384 if (!ret && (port == found_port)) {
385 id_priv->id.port_num = found_port;
386 goto out;
387 }
388 }
392 list_for_each_entry(cma_dev, &dev_list, list) { 389 list_for_each_entry(cma_dev, &dev_list, list) {
393 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 390 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
391 if (listen_id_priv &&
392 listen_id_priv->cma_dev == cma_dev &&
393 listen_id_priv->id.port_num == port)
394 continue;
394 if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { 395 if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
395 if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && 396 if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
396 rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) 397 rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
397 ret = find_gid_port(cma_dev->device, &iboe_gid, port); 398 ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
398 else 399 else
399 ret = find_gid_port(cma_dev->device, &gid, port); 400 ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
400 401
401 if (!ret) { 402 if (!ret && (port == found_port)) {
402 id_priv->id.port_num = port; 403 id_priv->id.port_num = found_port;
403 goto out; 404 goto out;
404 } 405 }
405 } 406 }
@@ -1292,7 +1293,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1292 } 1293 }
1293 1294
1294 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1295 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1295 ret = cma_acquire_dev(conn_id); 1296 ret = cma_acquire_dev(conn_id, listen_id);
1296 if (ret) 1297 if (ret)
1297 goto err2; 1298 goto err2;
1298 1299
@@ -1451,7 +1452,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1451{ 1452{
1452 struct rdma_cm_id *new_cm_id; 1453 struct rdma_cm_id *new_cm_id;
1453 struct rdma_id_private *listen_id, *conn_id; 1454 struct rdma_id_private *listen_id, *conn_id;
1454 struct net_device *dev = NULL;
1455 struct rdma_cm_event event; 1455 struct rdma_cm_event event;
1456 int ret; 1456 int ret;
1457 struct ib_device_attr attr; 1457 struct ib_device_attr attr;
@@ -1481,7 +1481,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1481 goto out; 1481 goto out;
1482 } 1482 }
1483 1483
1484 ret = cma_acquire_dev(conn_id); 1484 ret = cma_acquire_dev(conn_id, listen_id);
1485 if (ret) { 1485 if (ret) {
1486 mutex_unlock(&conn_id->handler_mutex); 1486 mutex_unlock(&conn_id->handler_mutex);
1487 rdma_destroy_id(new_cm_id); 1487 rdma_destroy_id(new_cm_id);
@@ -1529,8 +1529,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1529 cma_deref_id(conn_id); 1529 cma_deref_id(conn_id);
1530 1530
1531out: 1531out:
1532 if (dev)
1533 dev_put(dev);
1534 mutex_unlock(&listen_id->handler_mutex); 1532 mutex_unlock(&listen_id->handler_mutex);
1535 return ret; 1533 return ret;
1536} 1534}
@@ -2050,7 +2048,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
2050 goto out; 2048 goto out;
2051 2049
2052 if (!status && !id_priv->cma_dev) 2050 if (!status && !id_priv->cma_dev)
2053 status = cma_acquire_dev(id_priv); 2051 status = cma_acquire_dev(id_priv, NULL);
2054 2052
2055 if (status) { 2053 if (status) {
2056 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2054 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@ -2547,7 +2545,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2547 if (ret) 2545 if (ret)
2548 goto err1; 2546 goto err1;
2549 2547
2550 ret = cma_acquire_dev(id_priv); 2548 ret = cma_acquire_dev(id_priv, NULL);
2551 if (ret) 2549 if (ret)
2552 goto err1; 2550 goto err1;
2553 } 2551 }
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index da06abde9e0d..a1e9cba84944 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -148,7 +148,7 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
148 list_for_each_entry(client, &client_list, list) { 148 list_for_each_entry(client, &client_list, list) {
149 if (client->index == index) { 149 if (client->index == index) {
150 if (op < 0 || op >= client->nops || 150 if (op < 0 || op >= client->nops ||
151 !client->cb_table[RDMA_NL_GET_OP(op)].dump) 151 !client->cb_table[op].dump)
152 return -EINVAL; 152 return -EINVAL;
153 153
154 { 154 {
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index cde1e7b5b85d..faad2caf22b1 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -612,6 +612,7 @@ static ssize_t show_node_type(struct device *device,
612 switch (dev->node_type) { 612 switch (dev->node_type) {
613 case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); 613 case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
614 case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); 614 case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
615 case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
615 case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); 616 case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
616 case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); 617 case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
617 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); 618 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index b0f189be543b..ab8b1c30b36b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -57,7 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
57static unsigned int max_backlog = 1024; 57static unsigned int max_backlog = 1024;
58 58
59static struct ctl_table_header *ucma_ctl_table_hdr; 59static struct ctl_table_header *ucma_ctl_table_hdr;
60static ctl_table ucma_ctl_table[] = { 60static struct ctl_table ucma_ctl_table[] = {
61 { 61 {
62 .procname = "max_backlog", 62 .procname = "max_backlog",
63 .data = &max_backlog, 63 .data = &max_backlog,
@@ -271,7 +271,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
271 goto out; 271 goto out;
272 } 272 }
273 ctx->backlog--; 273 ctx->backlog--;
274 } else if (!ctx->uid) { 274 } else if (!ctx->uid || ctx->cm_id != cm_id) {
275 /* 275 /*
276 * We ignore events for new connections until userspace has set 276 * We ignore events for new connections until userspace has set
277 * their context. This can only happen if an error occurs on a 277 * their context. This can only happen if an error occurs on a
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index d8f9c6c272d7..bdc842e9faef 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -47,6 +47,14 @@
47#include <rdma/ib_umem.h> 47#include <rdma/ib_umem.h>
48#include <rdma/ib_user_verbs.h> 48#include <rdma/ib_user_verbs.h>
49 49
50#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
51 do { \
52 (udata)->inbuf = (void __user *) (ibuf); \
53 (udata)->outbuf = (void __user *) (obuf); \
54 (udata)->inlen = (ilen); \
55 (udata)->outlen = (olen); \
56 } while (0)
57
50/* 58/*
51 * Our lifetime rules for these structs are the following: 59 * Our lifetime rules for these structs are the following:
52 * 60 *
@@ -178,6 +186,22 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
178 struct ib_event *event); 186 struct ib_event *event);
179void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); 187void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
180 188
189struct ib_uverbs_flow_spec {
190 union {
191 union {
192 struct ib_uverbs_flow_spec_hdr hdr;
193 struct {
194 __u32 type;
195 __u16 size;
196 __u16 reserved;
197 };
198 };
199 struct ib_uverbs_flow_spec_eth eth;
200 struct ib_uverbs_flow_spec_ipv4 ipv4;
201 struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
202 };
203};
204
181#define IB_UVERBS_DECLARE_CMD(name) \ 205#define IB_UVERBS_DECLARE_CMD(name) \
182 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ 206 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
183 const char __user *buf, int in_len, \ 207 const char __user *buf, int in_len, \
@@ -217,9 +241,13 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
217IB_UVERBS_DECLARE_CMD(create_xsrq); 241IB_UVERBS_DECLARE_CMD(create_xsrq);
218IB_UVERBS_DECLARE_CMD(open_xrcd); 242IB_UVERBS_DECLARE_CMD(open_xrcd);
219IB_UVERBS_DECLARE_CMD(close_xrcd); 243IB_UVERBS_DECLARE_CMD(close_xrcd);
220#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 244
221IB_UVERBS_DECLARE_CMD(create_flow); 245#define IB_UVERBS_DECLARE_EX_CMD(name) \
222IB_UVERBS_DECLARE_CMD(destroy_flow); 246 int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
223#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 247 struct ib_udata *ucore, \
248 struct ib_udata *uhw)
249
250IB_UVERBS_DECLARE_EX_CMD(create_flow);
251IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
224 252
225#endif /* UVERBS_H */ 253#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2f0f01b70e3b..65f6e7dc380c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -54,17 +54,7 @@ static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
54static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; 54static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
55static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; 55static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
56static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; 56static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
57#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
58static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; 57static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
59#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
60
61#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
62 do { \
63 (udata)->inbuf = (void __user *) (ibuf); \
64 (udata)->outbuf = (void __user *) (obuf); \
65 (udata)->inlen = (ilen); \
66 (udata)->outlen = (olen); \
67 } while (0)
68 58
69/* 59/*
70 * The ib_uobject locking scheme is as follows: 60 * The ib_uobject locking scheme is as follows:
@@ -939,13 +929,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
939 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) 929 if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
940 return -EINVAL; 930 return -EINVAL;
941 931
942 /* 932 ret = ib_check_mr_access(cmd.access_flags);
943 * Local write permission is required if remote write or 933 if (ret)
944 * remote atomic permission is also requested. 934 return ret;
945 */
946 if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
947 !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
948 return -EINVAL;
949 935
950 uobj = kmalloc(sizeof *uobj, GFP_KERNEL); 936 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
951 if (!uobj) 937 if (!uobj)
@@ -2128,6 +2114,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2128 } 2114 }
2129 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; 2115 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
2130 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; 2116 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
2117 if (next->opcode == IB_WR_SEND_WITH_IMM)
2118 next->ex.imm_data =
2119 (__be32 __force) user_wr->ex.imm_data;
2131 } else { 2120 } else {
2132 switch (next->opcode) { 2121 switch (next->opcode) {
2133 case IB_WR_RDMA_WRITE_WITH_IMM: 2122 case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -2601,8 +2590,7 @@ out_put:
2601 return ret ? ret : in_len; 2590 return ret ? ret : in_len;
2602} 2591}
2603 2592
2604#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 2593static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
2605static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
2606 union ib_flow_spec *ib_spec) 2594 union ib_flow_spec *ib_spec)
2607{ 2595{
2608 ib_spec->type = kern_spec->type; 2596 ib_spec->type = kern_spec->type;
@@ -2642,28 +2630,31 @@ static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
2642 return 0; 2630 return 0;
2643} 2631}
2644 2632
2645ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file, 2633int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
2646 const char __user *buf, int in_len, 2634 struct ib_udata *ucore,
2647 int out_len) 2635 struct ib_udata *uhw)
2648{ 2636{
2649 struct ib_uverbs_create_flow cmd; 2637 struct ib_uverbs_create_flow cmd;
2650 struct ib_uverbs_create_flow_resp resp; 2638 struct ib_uverbs_create_flow_resp resp;
2651 struct ib_uobject *uobj; 2639 struct ib_uobject *uobj;
2652 struct ib_flow *flow_id; 2640 struct ib_flow *flow_id;
2653 struct ib_kern_flow_attr *kern_flow_attr; 2641 struct ib_uverbs_flow_attr *kern_flow_attr;
2654 struct ib_flow_attr *flow_attr; 2642 struct ib_flow_attr *flow_attr;
2655 struct ib_qp *qp; 2643 struct ib_qp *qp;
2656 int err = 0; 2644 int err = 0;
2657 void *kern_spec; 2645 void *kern_spec;
2658 void *ib_spec; 2646 void *ib_spec;
2659 int i; 2647 int i;
2660 int kern_attr_size;
2661 2648
2662 if (out_len < sizeof(resp)) 2649 if (ucore->outlen < sizeof(resp))
2663 return -ENOSPC; 2650 return -ENOSPC;
2664 2651
2665 if (copy_from_user(&cmd, buf, sizeof(cmd))) 2652 err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
2666 return -EFAULT; 2653 if (err)
2654 return err;
2655
2656 ucore->inbuf += sizeof(cmd);
2657 ucore->inlen -= sizeof(cmd);
2667 2658
2668 if (cmd.comp_mask) 2659 if (cmd.comp_mask)
2669 return -EINVAL; 2660 return -EINVAL;
@@ -2672,32 +2663,27 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2672 !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) 2663 !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
2673 return -EPERM; 2664 return -EPERM;
2674 2665
2675 if (cmd.flow_attr.num_of_specs < 0 || 2666 if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
2676 cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
2677 return -EINVAL; 2667 return -EINVAL;
2678 2668
2679 kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - 2669 if (cmd.flow_attr.size > ucore->inlen ||
2680 sizeof(struct ib_uverbs_cmd_hdr_ex); 2670 cmd.flow_attr.size >
2681 2671 (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
2682 if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||
2683 kern_attr_size < 0 || kern_attr_size >
2684 (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
2685 return -EINVAL; 2672 return -EINVAL;
2686 2673
2687 if (cmd.flow_attr.num_of_specs) { 2674 if (cmd.flow_attr.num_of_specs) {
2688 kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); 2675 kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
2676 GFP_KERNEL);
2689 if (!kern_flow_attr) 2677 if (!kern_flow_attr)
2690 return -ENOMEM; 2678 return -ENOMEM;
2691 2679
2692 memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); 2680 memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
2693 if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd), 2681 err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
2694 kern_attr_size)) { 2682 cmd.flow_attr.size);
2695 err = -EFAULT; 2683 if (err)
2696 goto err_free_attr; 2684 goto err_free_attr;
2697 }
2698 } else { 2685 } else {
2699 kern_flow_attr = &cmd.flow_attr; 2686 kern_flow_attr = &cmd.flow_attr;
2700 kern_attr_size = sizeof(cmd.flow_attr);
2701 } 2687 }
2702 2688
2703 uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); 2689 uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
@@ -2714,7 +2700,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2714 goto err_uobj; 2700 goto err_uobj;
2715 } 2701 }
2716 2702
2717 flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); 2703 flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
2718 if (!flow_attr) { 2704 if (!flow_attr) {
2719 err = -ENOMEM; 2705 err = -ENOMEM;
2720 goto err_put; 2706 goto err_put;
@@ -2729,19 +2715,22 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2729 2715
2730 kern_spec = kern_flow_attr + 1; 2716 kern_spec = kern_flow_attr + 1;
2731 ib_spec = flow_attr + 1; 2717 ib_spec = flow_attr + 1;
2732 for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) { 2718 for (i = 0; i < flow_attr->num_of_specs &&
2719 cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
2720 cmd.flow_attr.size >=
2721 ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
2733 err = kern_spec_to_ib_spec(kern_spec, ib_spec); 2722 err = kern_spec_to_ib_spec(kern_spec, ib_spec);
2734 if (err) 2723 if (err)
2735 goto err_free; 2724 goto err_free;
2736 flow_attr->size += 2725 flow_attr->size +=
2737 ((union ib_flow_spec *) ib_spec)->size; 2726 ((union ib_flow_spec *) ib_spec)->size;
2738 kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size; 2727 cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
2739 kern_spec += ((struct ib_kern_spec *) kern_spec)->size; 2728 kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
2740 ib_spec += ((union ib_flow_spec *) ib_spec)->size; 2729 ib_spec += ((union ib_flow_spec *) ib_spec)->size;
2741 } 2730 }
2742 if (kern_attr_size) { 2731 if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
2743 pr_warn("create flow failed, %d bytes left from uverb cmd\n", 2732 pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
2744 kern_attr_size); 2733 i, cmd.flow_attr.size);
2745 goto err_free; 2734 goto err_free;
2746 } 2735 }
2747 flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); 2736 flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
@@ -2760,11 +2749,10 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2760 memset(&resp, 0, sizeof(resp)); 2749 memset(&resp, 0, sizeof(resp));
2761 resp.flow_handle = uobj->id; 2750 resp.flow_handle = uobj->id;
2762 2751
2763 if (copy_to_user((void __user *)(unsigned long) cmd.response, 2752 err = ib_copy_to_udata(ucore,
2764 &resp, sizeof(resp))) { 2753 &resp, sizeof(resp));
2765 err = -EFAULT; 2754 if (err)
2766 goto err_copy; 2755 goto err_copy;
2767 }
2768 2756
2769 put_qp_read(qp); 2757 put_qp_read(qp);
2770 mutex_lock(&file->mutex); 2758 mutex_lock(&file->mutex);
@@ -2777,7 +2765,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
2777 kfree(flow_attr); 2765 kfree(flow_attr);
2778 if (cmd.flow_attr.num_of_specs) 2766 if (cmd.flow_attr.num_of_specs)
2779 kfree(kern_flow_attr); 2767 kfree(kern_flow_attr);
2780 return in_len; 2768 return 0;
2781err_copy: 2769err_copy:
2782 idr_remove_uobj(&ib_uverbs_rule_idr, uobj); 2770 idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
2783destroy_flow: 2771destroy_flow:
@@ -2794,16 +2782,18 @@ err_free_attr:
2794 return err; 2782 return err;
2795} 2783}
2796 2784
2797ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file, 2785int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
2798 const char __user *buf, int in_len, 2786 struct ib_udata *ucore,
2799 int out_len) { 2787 struct ib_udata *uhw)
2788{
2800 struct ib_uverbs_destroy_flow cmd; 2789 struct ib_uverbs_destroy_flow cmd;
2801 struct ib_flow *flow_id; 2790 struct ib_flow *flow_id;
2802 struct ib_uobject *uobj; 2791 struct ib_uobject *uobj;
2803 int ret; 2792 int ret;
2804 2793
2805 if (copy_from_user(&cmd, buf, sizeof(cmd))) 2794 ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
2806 return -EFAULT; 2795 if (ret)
2796 return ret;
2807 2797
2808 uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, 2798 uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
2809 file->ucontext); 2799 file->ucontext);
@@ -2825,9 +2815,8 @@ ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
2825 2815
2826 put_uobj(uobj); 2816 put_uobj(uobj);
2827 2817
2828 return ret ? ret : in_len; 2818 return ret;
2829} 2819}
2830#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
2831 2820
2832static int __uverbs_create_xsrq(struct ib_uverbs_file *file, 2821static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
2833 struct ib_uverbs_create_xsrq *cmd, 2822 struct ib_uverbs_create_xsrq *cmd,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2df31f68ea09..34386943ebcf 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -115,10 +115,13 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
115 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, 115 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
116 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, 116 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
117 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, 117 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
118#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 118};
119 [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow, 119
120 [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow 120static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
121#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 121 struct ib_udata *ucore,
122 struct ib_udata *uhw) = {
123 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
124 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow
122}; 125};
123 126
124static void ib_uverbs_add_one(struct ib_device *device); 127static void ib_uverbs_add_one(struct ib_device *device);
@@ -589,6 +592,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
589{ 592{
590 struct ib_uverbs_file *file = filp->private_data; 593 struct ib_uverbs_file *file = filp->private_data;
591 struct ib_uverbs_cmd_hdr hdr; 594 struct ib_uverbs_cmd_hdr hdr;
595 __u32 flags;
592 596
593 if (count < sizeof hdr) 597 if (count < sizeof hdr)
594 return -EINVAL; 598 return -EINVAL;
@@ -596,45 +600,105 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
596 if (copy_from_user(&hdr, buf, sizeof hdr)) 600 if (copy_from_user(&hdr, buf, sizeof hdr))
597 return -EFAULT; 601 return -EFAULT;
598 602
599 if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || 603 flags = (hdr.command &
600 !uverbs_cmd_table[hdr.command]) 604 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
601 return -EINVAL;
602 605
603 if (!file->ucontext && 606 if (!flags) {
604 hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) 607 __u32 command;
605 return -EINVAL;
606 608
607 if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) 609 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
608 return -ENOSYS; 610 IB_USER_VERBS_CMD_COMMAND_MASK))
611 return -EINVAL;
609 612
610#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 613 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
611 if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) {
612 struct ib_uverbs_cmd_hdr_ex hdr_ex;
613 614
614 if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex))) 615 if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
615 return -EFAULT; 616 !uverbs_cmd_table[command])
617 return -EINVAL;
616 618
617 if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count) 619 if (!file->ucontext &&
620 command != IB_USER_VERBS_CMD_GET_CONTEXT)
618 return -EINVAL; 621 return -EINVAL;
619 622
620 return uverbs_cmd_table[hdr.command](file, 623 if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command)))
621 buf + sizeof(hdr_ex), 624 return -ENOSYS;
622 (hdr_ex.in_words + 625
623 hdr_ex.provider_in_words) * 4,
624 (hdr_ex.out_words +
625 hdr_ex.provider_out_words) * 4);
626 } else {
627#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
628 if (hdr.in_words * 4 != count) 626 if (hdr.in_words * 4 != count)
629 return -EINVAL; 627 return -EINVAL;
630 628
631 return uverbs_cmd_table[hdr.command](file, 629 return uverbs_cmd_table[command](file,
632 buf + sizeof(hdr), 630 buf + sizeof(hdr),
633 hdr.in_words * 4, 631 hdr.in_words * 4,
634 hdr.out_words * 4); 632 hdr.out_words * 4);
635#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 633
634 } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
635 __u32 command;
636
637 struct ib_uverbs_ex_cmd_hdr ex_hdr;
638 struct ib_udata ucore;
639 struct ib_udata uhw;
640 int err;
641 size_t written_count = count;
642
643 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
644 IB_USER_VERBS_CMD_COMMAND_MASK))
645 return -EINVAL;
646
647 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
648
649 if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
650 !uverbs_ex_cmd_table[command])
651 return -ENOSYS;
652
653 if (!file->ucontext)
654 return -EINVAL;
655
656 if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
657 return -ENOSYS;
658
659 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
660 return -EINVAL;
661
662 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
663 return -EFAULT;
664
665 count -= sizeof(hdr) + sizeof(ex_hdr);
666 buf += sizeof(hdr) + sizeof(ex_hdr);
667
668 if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
669 return -EINVAL;
670
671 if (ex_hdr.response) {
672 if (!hdr.out_words && !ex_hdr.provider_out_words)
673 return -EINVAL;
674 } else {
675 if (hdr.out_words || ex_hdr.provider_out_words)
676 return -EINVAL;
677 }
678
679 INIT_UDATA(&ucore,
680 (hdr.in_words) ? buf : 0,
681 (unsigned long)ex_hdr.response,
682 hdr.in_words * 8,
683 hdr.out_words * 8);
684
685 INIT_UDATA(&uhw,
686 (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
687 (ex_hdr.provider_out_words) ? (unsigned long)ex_hdr.response + ucore.outlen : 0,
688 ex_hdr.provider_in_words * 8,
689 ex_hdr.provider_out_words * 8);
690
691 err = uverbs_ex_cmd_table[command](file,
692 &ucore,
693 &uhw);
694
695 if (err)
696 return err;
697
698 return written_count;
636 } 699 }
637#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 700
701 return -ENOSYS;
638} 702}
639 703
640static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) 704static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index a321df28bab2..d4f6ddf72ffa 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -114,6 +114,8 @@ rdma_node_get_transport(enum rdma_node_type node_type)
114 return RDMA_TRANSPORT_IB; 114 return RDMA_TRANSPORT_IB;
115 case RDMA_NODE_RNIC: 115 case RDMA_NODE_RNIC:
116 return RDMA_TRANSPORT_IWARP; 116 return RDMA_TRANSPORT_IWARP;
117 case RDMA_NODE_USNIC:
118 return RDMA_TRANSPORT_USNIC;
117 default: 119 default:
118 BUG(); 120 BUG();
119 return 0; 121 return 0;
@@ -130,6 +132,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
130 case RDMA_TRANSPORT_IB: 132 case RDMA_TRANSPORT_IB:
131 return IB_LINK_LAYER_INFINIBAND; 133 return IB_LINK_LAYER_INFINIBAND;
132 case RDMA_TRANSPORT_IWARP: 134 case RDMA_TRANSPORT_IWARP:
135 case RDMA_TRANSPORT_USNIC:
133 return IB_LINK_LAYER_ETHERNET; 136 return IB_LINK_LAYER_ETHERNET;
134 default: 137 default:
135 return IB_LINK_LAYER_UNSPECIFIED; 138 return IB_LINK_LAYER_UNSPECIFIED;
@@ -958,6 +961,11 @@ EXPORT_SYMBOL(ib_resize_cq);
958struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) 961struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
959{ 962{
960 struct ib_mr *mr; 963 struct ib_mr *mr;
964 int err;
965
966 err = ib_check_mr_access(mr_access_flags);
967 if (err)
968 return ERR_PTR(err);
961 969
962 mr = pd->device->get_dma_mr(pd, mr_access_flags); 970 mr = pd->device->get_dma_mr(pd, mr_access_flags);
963 971
@@ -980,6 +988,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
980 u64 *iova_start) 988 u64 *iova_start)
981{ 989{
982 struct ib_mr *mr; 990 struct ib_mr *mr;
991 int err;
992
993 err = ib_check_mr_access(mr_access_flags);
994 if (err)
995 return ERR_PTR(err);
983 996
984 if (!pd->device->reg_phys_mr) 997 if (!pd->device->reg_phys_mr)
985 return ERR_PTR(-ENOSYS); 998 return ERR_PTR(-ENOSYS);
@@ -1010,6 +1023,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,
1010 struct ib_pd *old_pd; 1023 struct ib_pd *old_pd;
1011 int ret; 1024 int ret;
1012 1025
1026 ret = ib_check_mr_access(mr_access_flags);
1027 if (ret)
1028 return ret;
1029
1013 if (!mr->device->rereg_phys_mr) 1030 if (!mr->device->rereg_phys_mr)
1014 return -ENOSYS; 1031 return -ENOSYS;
1015 1032
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 33d2cc6ab562..4a033853312e 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -602,10 +602,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
602 rdev->lldi.vr->qp.size, 602 rdev->lldi.vr->qp.size,
603 rdev->lldi.vr->cq.start, 603 rdev->lldi.vr->cq.start,
604 rdev->lldi.vr->cq.size); 604 rdev->lldi.vr->cq.size);
605 PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " 605 PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
606 "qpmask 0x%x cqshift %lu cqmask 0x%x\n", 606 "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
607 (unsigned)pci_resource_len(rdev->lldi.pdev, 2), 607 (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
608 (void *)(unsigned long)pci_resource_start(rdev->lldi.pdev, 2), 608 (u64)pci_resource_start(rdev->lldi.pdev, 2),
609 rdev->lldi.db_reg, 609 rdev->lldi.db_reg,
610 rdev->lldi.gts_reg, 610 rdev->lldi.gts_reg,
611 rdev->qpshift, rdev->qpmask, 611 rdev->qpshift, rdev->qpmask,
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index f5cb13b21445..cc04b7ba3488 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -280,9 +280,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
280 int j; 280 int j;
281 int ret; 281 int ret;
282 282
283 ret = get_user_pages(current, current->mm, addr, 283 ret = get_user_pages_fast(addr, npages, 0, pages);
284 npages, 0, 1, pages, NULL);
285
286 if (ret != npages) { 284 if (ret != npages) {
287 int i; 285 int i;
288 286
@@ -811,10 +809,7 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
811 while (dim) { 809 while (dim) {
812 const int mxp = 8; 810 const int mxp = 8;
813 811
814 down_write(&current->mm->mmap_sem);
815 ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); 812 ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
816 up_write(&current->mm->mmap_sem);
817
818 if (ret <= 0) 813 if (ret <= 0)
819 goto done_unlock; 814 goto done_unlock;
820 else { 815 else {
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index ea299515ecb2..66dbf8062374 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -324,7 +324,7 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
324 u32 i; 324 u32 i;
325 325
326 i = cq->mcq.cons_index; 326 i = cq->mcq.cons_index;
327 while (get_sw_cqe(cq, i & cq->ibcq.cqe)) 327 while (get_sw_cqe(cq, i))
328 ++i; 328 ++i;
329 329
330 return i - cq->mcq.cons_index; 330 return i - cq->mcq.cons_index;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index f0612645de99..1aad9b3e6bdd 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1691,11 +1691,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1691 ibdev->ib_dev.create_flow = mlx4_ib_create_flow; 1691 ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
1692 ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; 1692 ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
1693 1693
1694#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 1694 ibdev->ib_dev.uverbs_ex_cmd_mask |=
1695 ibdev->ib_dev.uverbs_cmd_mask |= 1695 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
1696 (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) | 1696 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
1697 (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
1698#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
1699 } 1697 }
1700 1698
1701 mlx4_ib_alloc_eqs(dev, ibdev); 1699 mlx4_ib_alloc_eqs(dev, ibdev);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5b53ca5a2284..8308e3634767 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2834,7 +2834,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2834 init_attr->qp_context = nesqp->ibqp.qp_context; 2834 init_attr->qp_context = nesqp->ibqp.qp_context;
2835 init_attr->send_cq = nesqp->ibqp.send_cq; 2835 init_attr->send_cq = nesqp->ibqp.send_cq;
2836 init_attr->recv_cq = nesqp->ibqp.recv_cq; 2836 init_attr->recv_cq = nesqp->ibqp.recv_cq;
2837 init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq; 2837 init_attr->srq = nesqp->ibqp.srq;
2838 init_attr->cap = attr->cap; 2838 init_attr->cap = attr->cap;
2839 2839
2840 return 0; 2840 return 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index adc11d14f878..294dd27b601e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -122,6 +122,32 @@ struct mqe_ctx {
122 bool cmd_done; 122 bool cmd_done;
123}; 123};
124 124
125struct ocrdma_hw_mr {
126 u32 lkey;
127 u8 fr_mr;
128 u8 remote_atomic;
129 u8 remote_rd;
130 u8 remote_wr;
131 u8 local_rd;
132 u8 local_wr;
133 u8 mw_bind;
134 u8 rsvd;
135 u64 len;
136 struct ocrdma_pbl *pbl_table;
137 u32 num_pbls;
138 u32 num_pbes;
139 u32 pbl_size;
140 u32 pbe_size;
141 u64 fbo;
142 u64 va;
143};
144
145struct ocrdma_mr {
146 struct ib_mr ibmr;
147 struct ib_umem *umem;
148 struct ocrdma_hw_mr hwmr;
149};
150
125struct ocrdma_dev { 151struct ocrdma_dev {
126 struct ib_device ibdev; 152 struct ib_device ibdev;
127 struct ocrdma_dev_attr attr; 153 struct ocrdma_dev_attr attr;
@@ -169,7 +195,7 @@ struct ocrdma_dev {
169 struct list_head entry; 195 struct list_head entry;
170 struct rcu_head rcu; 196 struct rcu_head rcu;
171 int id; 197 int id;
172 u64 stag_arr[OCRDMA_MAX_STAG]; 198 struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG];
173 u16 pvid; 199 u16 pvid;
174}; 200};
175 201
@@ -294,31 +320,6 @@ struct ocrdma_qp {
294 u16 db_cache; 320 u16 db_cache;
295}; 321};
296 322
297struct ocrdma_hw_mr {
298 u32 lkey;
299 u8 fr_mr;
300 u8 remote_atomic;
301 u8 remote_rd;
302 u8 remote_wr;
303 u8 local_rd;
304 u8 local_wr;
305 u8 mw_bind;
306 u8 rsvd;
307 u64 len;
308 struct ocrdma_pbl *pbl_table;
309 u32 num_pbls;
310 u32 num_pbes;
311 u32 pbl_size;
312 u32 pbe_size;
313 u64 fbo;
314 u64 va;
315};
316
317struct ocrdma_mr {
318 struct ib_mr ibmr;
319 struct ib_umem *umem;
320 struct ocrdma_hw_mr hwmr;
321};
322 323
323struct ocrdma_ucontext { 324struct ocrdma_ucontext {
324 struct ib_ucontext ibucontext; 325 struct ib_ucontext ibucontext;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 50219ab2279d..56bf32fcb62c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1783,7 +1783,7 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
1783 u32 max_sges = attrs->cap.max_send_sge; 1783 u32 max_sges = attrs->cap.max_send_sge;
1784 1784
1785 /* QP1 may exceed 127 */ 1785 /* QP1 may exceed 127 */
1786 max_wqe_allocated = min_t(int, attrs->cap.max_send_wr + 1, 1786 max_wqe_allocated = min_t(u32, attrs->cap.max_send_wr + 1,
1787 dev->attr.max_wqe); 1787 dev->attr.max_wqe);
1788 1788
1789 status = ocrdma_build_q_conf(&max_wqe_allocated, 1789 status = ocrdma_build_q_conf(&max_wqe_allocated,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 0ce7674621ea..91443bcb9e0e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -452,9 +452,6 @@ static void ocrdma_remove_free(struct rcu_head *rcu)
452{ 452{
453 struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu); 453 struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
454 454
455 ocrdma_free_resources(dev);
456 ocrdma_cleanup_hw(dev);
457
458 idr_remove(&ocrdma_dev_id, dev->id); 455 idr_remove(&ocrdma_dev_id, dev->id);
459 kfree(dev->mbx_cmd); 456 kfree(dev->mbx_cmd);
460 ib_dealloc_device(&dev->ibdev); 457 ib_dealloc_device(&dev->ibdev);
@@ -470,6 +467,10 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
470 spin_lock(&ocrdma_devlist_lock); 467 spin_lock(&ocrdma_devlist_lock);
471 list_del_rcu(&dev->entry); 468 list_del_rcu(&dev->entry);
472 spin_unlock(&ocrdma_devlist_lock); 469 spin_unlock(&ocrdma_devlist_lock);
470
471 ocrdma_free_resources(dev);
472 ocrdma_cleanup_hw(dev);
473
473 call_rcu(&dev->rcu, ocrdma_remove_free); 474 call_rcu(&dev->rcu, ocrdma_remove_free);
474} 475}
475 476
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 69f1d1221a6b..7686dceadd29 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1981,9 +1981,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1981 1981
1982 wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); 1982 wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
1983 1983
1984 if ((wr->wr.fast_reg.page_list_len > 1984 if (wr->wr.fast_reg.page_list_len > qp->dev->attr.max_pages_per_frmr)
1985 qp->dev->attr.max_pages_per_frmr) ||
1986 (wr->wr.fast_reg.length > 0xffffffffULL))
1987 return -EINVAL; 1985 return -EINVAL;
1988 1986
1989 hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); 1987 hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
@@ -2839,7 +2837,7 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
2839 goto mbx_err; 2837 goto mbx_err;
2840 mr->ibmr.rkey = mr->hwmr.lkey; 2838 mr->ibmr.rkey = mr->hwmr.lkey;
2841 mr->ibmr.lkey = mr->hwmr.lkey; 2839 mr->ibmr.lkey = mr->hwmr.lkey;
2842 dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (unsigned long) mr; 2840 dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = mr;
2843 return &mr->ibmr; 2841 return &mr->ibmr;
2844mbx_err: 2842mbx_err:
2845 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); 2843 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 016e7429adf6..5bfc02f450e6 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -6190,21 +6190,20 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
6190{ 6190{
6191 struct qib_devdata *dd; 6191 struct qib_devdata *dd;
6192 unsigned long val; 6192 unsigned long val;
6193 int ret; 6193 char *n;
6194
6195 if (strlen(str) >= MAX_ATTEN_LEN) { 6194 if (strlen(str) >= MAX_ATTEN_LEN) {
6196 pr_info("txselect_values string too long\n"); 6195 pr_info("txselect_values string too long\n");
6197 return -ENOSPC; 6196 return -ENOSPC;
6198 } 6197 }
6199 ret = kstrtoul(str, 0, &val); 6198 val = simple_strtoul(str, &n, 0);
6200 if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + 6199 if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
6201 TXDDS_MFG_SZ)) { 6200 TXDDS_MFG_SZ)) {
6202 pr_info("txselect_values must start with a number < %d\n", 6201 pr_info("txselect_values must start with a number < %d\n",
6203 TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); 6202 TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
6204 return ret ? ret : -EINVAL; 6203 return -EINVAL;
6205 } 6204 }
6206
6207 strcpy(txselect_list, str); 6205 strcpy(txselect_list, str);
6206
6208 list_for_each_entry(dd, &qib_dev_list, list) 6207 list_for_each_entry(dd, &qib_dev_list, list)
6209 if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) 6208 if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
6210 set_no_qsfp_atten(dd, 1); 6209 set_no_qsfp_atten(dd, 1);
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 28874f8606f8..941d4d50d8e7 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -54,7 +54,7 @@ struct ib_node_info {
54 __be32 revision; 54 __be32 revision;
55 u8 local_port_num; 55 u8 local_port_num;
56 u8 vendor_id[3]; 56 u8 vendor_id[3];
57} __attribute__ ((packed)); 57} __packed;
58 58
59struct ib_mad_notice_attr { 59struct ib_mad_notice_attr {
60 u8 generic_type; 60 u8 generic_type;
@@ -73,7 +73,7 @@ struct ib_mad_notice_attr {
73 __be16 reserved; 73 __be16 reserved;
74 __be16 lid; /* where violation happened */ 74 __be16 lid; /* where violation happened */
75 u8 port_num; /* where violation happened */ 75 u8 port_num; /* where violation happened */
76 } __attribute__ ((packed)) ntc_129_131; 76 } __packed ntc_129_131;
77 77
78 struct { 78 struct {
79 __be16 reserved; 79 __be16 reserved;
@@ -83,14 +83,14 @@ struct ib_mad_notice_attr {
83 __be32 new_cap_mask; /* new capability mask */ 83 __be32 new_cap_mask; /* new capability mask */
84 u8 reserved3; 84 u8 reserved3;
85 u8 change_flags; /* low 3 bits only */ 85 u8 change_flags; /* low 3 bits only */
86 } __attribute__ ((packed)) ntc_144; 86 } __packed ntc_144;
87 87
88 struct { 88 struct {
89 __be16 reserved; 89 __be16 reserved;
90 __be16 lid; /* lid where sys guid changed */ 90 __be16 lid; /* lid where sys guid changed */
91 __be16 reserved2; 91 __be16 reserved2;
92 __be64 new_sys_guid; 92 __be64 new_sys_guid;
93 } __attribute__ ((packed)) ntc_145; 93 } __packed ntc_145;
94 94
95 struct { 95 struct {
96 __be16 reserved; 96 __be16 reserved;
@@ -104,7 +104,7 @@ struct ib_mad_notice_attr {
104 u8 reserved3; 104 u8 reserved3;
105 u8 dr_trunc_hop; 105 u8 dr_trunc_hop;
106 u8 dr_rtn_path[30]; 106 u8 dr_rtn_path[30];
107 } __attribute__ ((packed)) ntc_256; 107 } __packed ntc_256;
108 108
109 struct { 109 struct {
110 __be16 reserved; 110 __be16 reserved;
@@ -115,7 +115,7 @@ struct ib_mad_notice_attr {
115 __be32 qp2; /* high 8 bits reserved */ 115 __be32 qp2; /* high 8 bits reserved */
116 union ib_gid gid1; 116 union ib_gid gid1;
117 union ib_gid gid2; 117 union ib_gid gid2;
118 } __attribute__ ((packed)) ntc_257_258; 118 } __packed ntc_257_258;
119 119
120 } details; 120 } details;
121}; 121};
@@ -209,7 +209,7 @@ struct ib_pma_portcounters_cong {
209 __be64 port_rcv_packets; 209 __be64 port_rcv_packets;
210 __be64 port_xmit_wait; 210 __be64 port_xmit_wait;
211 __be64 port_adr_events; 211 __be64 port_adr_events;
212} __attribute__ ((packed)); 212} __packed;
213 213
214#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00 214#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00
215#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01 215#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index d0a0ea0c14d6..165aee2ca8a0 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -594,8 +594,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
594 else 594 else
595 j = npages; 595 j = npages;
596 596
597 ret = get_user_pages(current, current->mm, addr, 597 ret = get_user_pages_fast(addr, j, 0, pages);
598 j, 0, 1, pages, NULL);
599 if (ret != j) { 598 if (ret != j) {
600 i = 0; 599 i = 0;
601 j = ret; 600 j = ret;
@@ -1294,11 +1293,8 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
1294 int mxp = 8; 1293 int mxp = 8;
1295 int ndesc = 0; 1294 int ndesc = 0;
1296 1295
1297 down_write(&current->mm->mmap_sem);
1298 ret = qib_user_sdma_queue_pkts(dd, ppd, pq, 1296 ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
1299 iov, dim, &list, &mxp, &ndesc); 1297 iov, dim, &list, &mxp, &ndesc);
1300 up_write(&current->mm->mmap_sem);
1301
1302 if (ret < 0) 1298 if (ret < 0)
1303 goto done_unlock; 1299 goto done_unlock;
1304 else { 1300 else {
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 012e2c7575ad..a01c7d2cf541 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -150,14 +150,14 @@ struct ib_reth {
150 __be64 vaddr; 150 __be64 vaddr;
151 __be32 rkey; 151 __be32 rkey;
152 __be32 length; 152 __be32 length;
153} __attribute__ ((packed)); 153} __packed;
154 154
155struct ib_atomic_eth { 155struct ib_atomic_eth {
156 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ 156 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
157 __be32 rkey; 157 __be32 rkey;
158 __be64 swap_data; 158 __be64 swap_data;
159 __be64 compare_data; 159 __be64 compare_data;
160} __attribute__ ((packed)); 160} __packed;
161 161
162struct qib_other_headers { 162struct qib_other_headers {
163 __be32 bth[3]; 163 __be32 bth[3];
@@ -178,7 +178,7 @@ struct qib_other_headers {
178 __be32 aeth; 178 __be32 aeth;
179 struct ib_atomic_eth atomic_eth; 179 struct ib_atomic_eth atomic_eth;
180 } u; 180 } u;
181} __attribute__ ((packed)); 181} __packed;
182 182
183/* 183/*
184 * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes 184 * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
@@ -195,12 +195,12 @@ struct qib_ib_header {
195 } l; 195 } l;
196 struct qib_other_headers oth; 196 struct qib_other_headers oth;
197 } u; 197 } u;
198} __attribute__ ((packed)); 198} __packed;
199 199
200struct qib_pio_header { 200struct qib_pio_header {
201 __le32 pbc[2]; 201 __le32 pbc[2];
202 struct qib_ib_header hdr; 202 struct qib_ib_header hdr;
203} __attribute__ ((packed)); 203} __packed;
204 204
205/* 205/*
206 * There is one struct qib_mcast for each multicast GID. 206 * There is one struct qib_mcast for each multicast GID.
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index eb71aaa26a9a..c639f90cfda4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -101,6 +101,7 @@ enum {
101 IPOIB_MCAST_FLAG_SENDONLY = 1, 101 IPOIB_MCAST_FLAG_SENDONLY = 1,
102 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ 102 IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
103 IPOIB_MCAST_FLAG_ATTACHED = 3, 103 IPOIB_MCAST_FLAG_ATTACHED = 3,
104 IPOIB_MCAST_JOIN_STARTED = 4,
104 105
105 MAX_SEND_CQE = 16, 106 MAX_SEND_CQE = 16,
106 IPOIB_CM_COPYBREAK = 256, 107 IPOIB_CM_COPYBREAK = 256,
@@ -151,6 +152,7 @@ struct ipoib_mcast {
151 struct sk_buff_head pkt_queue; 152 struct sk_buff_head pkt_queue;
152 153
153 struct net_device *dev; 154 struct net_device *dev;
155 struct completion done;
154}; 156};
155 157
156struct ipoib_rx_buf { 158struct ipoib_rx_buf {
@@ -299,7 +301,7 @@ struct ipoib_dev_priv {
299 301
300 unsigned long flags; 302 unsigned long flags;
301 303
302 struct mutex vlan_mutex; 304 struct rw_semaphore vlan_rwsem;
303 305
304 struct rb_root path_tree; 306 struct rb_root path_tree;
305 struct list_head path_list; 307 struct list_head path_list;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 7a3175400b2a..1377f85911c2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -140,7 +140,8 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
140static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, 140static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
141 struct ipoib_cm_rx_buf *rx_ring, 141 struct ipoib_cm_rx_buf *rx_ring,
142 int id, int frags, 142 int id, int frags,
143 u64 mapping[IPOIB_CM_RX_SG]) 143 u64 mapping[IPOIB_CM_RX_SG],
144 gfp_t gfp)
144{ 145{
145 struct ipoib_dev_priv *priv = netdev_priv(dev); 146 struct ipoib_dev_priv *priv = netdev_priv(dev);
146 struct sk_buff *skb; 147 struct sk_buff *skb;
@@ -164,7 +165,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
164 } 165 }
165 166
166 for (i = 0; i < frags; i++) { 167 for (i = 0; i < frags; i++) {
167 struct page *page = alloc_page(GFP_ATOMIC); 168 struct page *page = alloc_page(gfp);
168 169
169 if (!page) 170 if (!page)
170 goto partial_error; 171 goto partial_error;
@@ -382,7 +383,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
382 383
383 for (i = 0; i < ipoib_recvq_size; ++i) { 384 for (i = 0; i < ipoib_recvq_size; ++i) {
384 if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1, 385 if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
385 rx->rx_ring[i].mapping)) { 386 rx->rx_ring[i].mapping,
387 GFP_KERNEL)) {
386 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 388 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
387 ret = -ENOMEM; 389 ret = -ENOMEM;
388 goto err_count; 390 goto err_count;
@@ -639,7 +641,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
639 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, 641 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
640 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; 642 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
641 643
642 newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping); 644 newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
645 mapping, GFP_ATOMIC);
643 if (unlikely(!newskb)) { 646 if (unlikely(!newskb)) {
644 /* 647 /*
645 * If we can't allocate a new RX buffer, dump 648 * If we can't allocate a new RX buffer, dump
@@ -1556,7 +1559,8 @@ int ipoib_cm_dev_init(struct net_device *dev)
1556 for (i = 0; i < ipoib_recvq_size; ++i) { 1559 for (i = 0; i < ipoib_recvq_size; ++i) {
1557 if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i, 1560 if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
1558 priv->cm.num_frags - 1, 1561 priv->cm.num_frags - 1,
1559 priv->cm.srq_ring[i].mapping)) { 1562 priv->cm.srq_ring[i].mapping,
1563 GFP_KERNEL)) {
1560 ipoib_warn(priv, "failed to allocate " 1564 ipoib_warn(priv, "failed to allocate "
1561 "receive buffer %d\n", i); 1565 "receive buffer %d\n", i);
1562 ipoib_cm_dev_cleanup(dev); 1566 ipoib_cm_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 196b1d13cbcb..6a7003ddb0be 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -685,15 +685,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
685 ret = ipoib_ib_post_receives(dev); 685 ret = ipoib_ib_post_receives(dev);
686 if (ret) { 686 if (ret) {
687 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); 687 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
688 ipoib_ib_dev_stop(dev, 1); 688 goto dev_stop;
689 return -1;
690 } 689 }
691 690
692 ret = ipoib_cm_dev_open(dev); 691 ret = ipoib_cm_dev_open(dev);
693 if (ret) { 692 if (ret) {
694 ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret); 693 ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
695 ipoib_ib_dev_stop(dev, 1); 694 goto dev_stop;
696 return -1;
697 } 695 }
698 696
699 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 697 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
@@ -704,6 +702,11 @@ int ipoib_ib_dev_open(struct net_device *dev)
704 napi_enable(&priv->napi); 702 napi_enable(&priv->napi);
705 703
706 return 0; 704 return 0;
705dev_stop:
706 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
707 napi_enable(&priv->napi);
708 ipoib_ib_dev_stop(dev, 1);
709 return -1;
707} 710}
708 711
709static void ipoib_pkey_dev_check_presence(struct net_device *dev) 712static void ipoib_pkey_dev_check_presence(struct net_device *dev)
@@ -746,10 +749,8 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
746 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { 749 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
747 mutex_lock(&pkey_mutex); 750 mutex_lock(&pkey_mutex);
748 set_bit(IPOIB_PKEY_STOP, &priv->flags); 751 set_bit(IPOIB_PKEY_STOP, &priv->flags);
749 cancel_delayed_work(&priv->pkey_poll_task); 752 cancel_delayed_work_sync(&priv->pkey_poll_task);
750 mutex_unlock(&pkey_mutex); 753 mutex_unlock(&pkey_mutex);
751 if (flush)
752 flush_workqueue(ipoib_workqueue);
753 } 754 }
754 755
755 ipoib_mcast_stop_thread(dev, flush); 756 ipoib_mcast_stop_thread(dev, flush);
@@ -974,7 +975,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
974 u16 new_index; 975 u16 new_index;
975 int result; 976 int result;
976 977
977 mutex_lock(&priv->vlan_mutex); 978 down_read(&priv->vlan_rwsem);
978 979
979 /* 980 /*
980 * Flush any child interfaces too -- they might be up even if 981 * Flush any child interfaces too -- they might be up even if
@@ -983,7 +984,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
983 list_for_each_entry(cpriv, &priv->child_intfs, list) 984 list_for_each_entry(cpriv, &priv->child_intfs, list)
984 __ipoib_ib_dev_flush(cpriv, level); 985 __ipoib_ib_dev_flush(cpriv, level);
985 986
986 mutex_unlock(&priv->vlan_mutex); 987 up_read(&priv->vlan_rwsem);
987 988
988 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) { 989 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
989 /* for non-child devices must check/update the pkey value here */ 990 /* for non-child devices must check/update the pkey value here */
@@ -1081,6 +1082,11 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
1081 struct ipoib_dev_priv *priv = netdev_priv(dev); 1082 struct ipoib_dev_priv *priv = netdev_priv(dev);
1082 1083
1083 ipoib_dbg(priv, "cleaning up ib_dev\n"); 1084 ipoib_dbg(priv, "cleaning up ib_dev\n");
1085 /*
1086 * We must make sure there are no more (path) completions
1087 * that may wish to touch priv fields that are no longer valid
1088 */
1089 ipoib_flush_paths(dev);
1084 1090
1085 ipoib_mcast_stop_thread(dev, 1); 1091 ipoib_mcast_stop_thread(dev, 1);
1086 ipoib_mcast_dev_flush(dev); 1092 ipoib_mcast_dev_flush(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 82cec1af902c..d64ed05fb082 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -119,7 +119,7 @@ int ipoib_open(struct net_device *dev)
119 struct ipoib_dev_priv *cpriv; 119 struct ipoib_dev_priv *cpriv;
120 120
121 /* Bring up any child interfaces too */ 121 /* Bring up any child interfaces too */
122 mutex_lock(&priv->vlan_mutex); 122 down_read(&priv->vlan_rwsem);
123 list_for_each_entry(cpriv, &priv->child_intfs, list) { 123 list_for_each_entry(cpriv, &priv->child_intfs, list) {
124 int flags; 124 int flags;
125 125
@@ -129,7 +129,7 @@ int ipoib_open(struct net_device *dev)
129 129
130 dev_change_flags(cpriv->dev, flags | IFF_UP); 130 dev_change_flags(cpriv->dev, flags | IFF_UP);
131 } 131 }
132 mutex_unlock(&priv->vlan_mutex); 132 up_read(&priv->vlan_rwsem);
133 } 133 }
134 134
135 netif_start_queue(dev); 135 netif_start_queue(dev);
@@ -162,7 +162,7 @@ static int ipoib_stop(struct net_device *dev)
162 struct ipoib_dev_priv *cpriv; 162 struct ipoib_dev_priv *cpriv;
163 163
164 /* Bring down any child interfaces too */ 164 /* Bring down any child interfaces too */
165 mutex_lock(&priv->vlan_mutex); 165 down_read(&priv->vlan_rwsem);
166 list_for_each_entry(cpriv, &priv->child_intfs, list) { 166 list_for_each_entry(cpriv, &priv->child_intfs, list) {
167 int flags; 167 int flags;
168 168
@@ -172,7 +172,7 @@ static int ipoib_stop(struct net_device *dev)
172 172
173 dev_change_flags(cpriv->dev, flags & ~IFF_UP); 173 dev_change_flags(cpriv->dev, flags & ~IFF_UP);
174 } 174 }
175 mutex_unlock(&priv->vlan_mutex); 175 up_read(&priv->vlan_rwsem);
176 } 176 }
177 177
178 return 0; 178 return 0;
@@ -1350,7 +1350,7 @@ void ipoib_setup(struct net_device *dev)
1350 1350
1351 ipoib_set_ethtool_ops(dev); 1351 ipoib_set_ethtool_ops(dev);
1352 1352
1353 netif_napi_add(dev, &priv->napi, ipoib_poll, 100); 1353 netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
1354 1354
1355 dev->watchdog_timeo = HZ; 1355 dev->watchdog_timeo = HZ;
1356 1356
@@ -1372,7 +1372,7 @@ void ipoib_setup(struct net_device *dev)
1372 1372
1373 spin_lock_init(&priv->lock); 1373 spin_lock_init(&priv->lock);
1374 1374
1375 mutex_init(&priv->vlan_mutex); 1375 init_rwsem(&priv->vlan_rwsem);
1376 1376
1377 INIT_LIST_HEAD(&priv->path_list); 1377 INIT_LIST_HEAD(&priv->path_list);
1378 INIT_LIST_HEAD(&priv->child_intfs); 1378 INIT_LIST_HEAD(&priv->child_intfs);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index cecb98a4c662..d4e005720d01 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -386,8 +386,10 @@ static int ipoib_mcast_join_complete(int status,
386 mcast->mcmember.mgid.raw, status); 386 mcast->mcmember.mgid.raw, status);
387 387
388 /* We trap for port events ourselves. */ 388 /* We trap for port events ourselves. */
389 if (status == -ENETRESET) 389 if (status == -ENETRESET) {
390 return 0; 390 status = 0;
391 goto out;
392 }
391 393
392 if (!status) 394 if (!status)
393 status = ipoib_mcast_join_finish(mcast, &multicast->rec); 395 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
@@ -407,7 +409,8 @@ static int ipoib_mcast_join_complete(int status,
407 if (mcast == priv->broadcast) 409 if (mcast == priv->broadcast)
408 queue_work(ipoib_workqueue, &priv->carrier_on_task); 410 queue_work(ipoib_workqueue, &priv->carrier_on_task);
409 411
410 return 0; 412 status = 0;
413 goto out;
411 } 414 }
412 415
413 if (mcast->logcount++ < 20) { 416 if (mcast->logcount++ < 20) {
@@ -434,7 +437,8 @@ static int ipoib_mcast_join_complete(int status,
434 mcast->backoff * HZ); 437 mcast->backoff * HZ);
435 spin_unlock_irq(&priv->lock); 438 spin_unlock_irq(&priv->lock);
436 mutex_unlock(&mcast_mutex); 439 mutex_unlock(&mcast_mutex);
437 440out:
441 complete(&mcast->done);
438 return status; 442 return status;
439} 443}
440 444
@@ -484,11 +488,15 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
484 } 488 }
485 489
486 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 490 set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
491 init_completion(&mcast->done);
492 set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
493
487 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, 494 mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
488 &rec, comp_mask, GFP_KERNEL, 495 &rec, comp_mask, GFP_KERNEL,
489 ipoib_mcast_join_complete, mcast); 496 ipoib_mcast_join_complete, mcast);
490 if (IS_ERR(mcast->mc)) { 497 if (IS_ERR(mcast->mc)) {
491 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 498 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
499 complete(&mcast->done);
492 ret = PTR_ERR(mcast->mc); 500 ret = PTR_ERR(mcast->mc);
493 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); 501 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
494 502
@@ -510,10 +518,18 @@ void ipoib_mcast_join_task(struct work_struct *work)
510 struct ipoib_dev_priv *priv = 518 struct ipoib_dev_priv *priv =
511 container_of(work, struct ipoib_dev_priv, mcast_task.work); 519 container_of(work, struct ipoib_dev_priv, mcast_task.work);
512 struct net_device *dev = priv->dev; 520 struct net_device *dev = priv->dev;
521 struct ib_port_attr port_attr;
513 522
514 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) 523 if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
515 return; 524 return;
516 525
526 if (ib_query_port(priv->ca, priv->port, &port_attr) ||
527 port_attr.state != IB_PORT_ACTIVE) {
528 ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n",
529 port_attr.state);
530 return;
531 }
532
517 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 533 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
518 ipoib_warn(priv, "ib_query_gid() failed\n"); 534 ipoib_warn(priv, "ib_query_gid() failed\n");
519 else 535 else
@@ -751,6 +767,11 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
751 767
752 spin_unlock_irqrestore(&priv->lock, flags); 768 spin_unlock_irqrestore(&priv->lock, flags);
753 769
770 /* seperate between the wait to the leave*/
771 list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
772 if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
773 wait_for_completion(&mcast->done);
774
754 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 775 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
755 ipoib_mcast_leave(dev, mcast); 776 ipoib_mcast_leave(dev, mcast);
756 ipoib_mcast_free(mcast); 777 ipoib_mcast_free(mcast);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index f81abe16cf09..c29b5c838833 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -142,10 +142,10 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
142 priv = netdev_priv(dev); 142 priv = netdev_priv(dev);
143 ppriv = netdev_priv(priv->parent); 143 ppriv = netdev_priv(priv->parent);
144 144
145 mutex_lock(&ppriv->vlan_mutex); 145 down_write(&ppriv->vlan_rwsem);
146 unregister_netdevice_queue(dev, head); 146 unregister_netdevice_queue(dev, head);
147 list_del(&priv->list); 147 list_del(&priv->list);
148 mutex_unlock(&ppriv->vlan_mutex); 148 up_write(&ppriv->vlan_rwsem);
149} 149}
150 150
151static size_t ipoib_get_size(const struct net_device *dev) 151static size_t ipoib_get_size(const struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 8292554bccb5..9fad7b5ac8b9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -140,7 +140,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
140 if (!rtnl_trylock()) 140 if (!rtnl_trylock())
141 return restart_syscall(); 141 return restart_syscall();
142 142
143 mutex_lock(&ppriv->vlan_mutex); 143 down_write(&ppriv->vlan_rwsem);
144 144
145 /* 145 /*
146 * First ensure this isn't a duplicate. We check the parent device and 146 * First ensure this isn't a duplicate. We check the parent device and
@@ -163,7 +163,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
163 result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); 163 result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
164 164
165out: 165out:
166 mutex_unlock(&ppriv->vlan_mutex); 166 up_write(&ppriv->vlan_rwsem);
167 167
168 if (result) 168 if (result)
169 free_netdev(priv->dev); 169 free_netdev(priv->dev);
@@ -185,7 +185,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
185 185
186 if (!rtnl_trylock()) 186 if (!rtnl_trylock())
187 return restart_syscall(); 187 return restart_syscall();
188 mutex_lock(&ppriv->vlan_mutex); 188
189 down_write(&ppriv->vlan_rwsem);
189 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { 190 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
190 if (priv->pkey == pkey && 191 if (priv->pkey == pkey &&
191 priv->child_type == IPOIB_LEGACY_CHILD) { 192 priv->child_type == IPOIB_LEGACY_CHILD) {
@@ -195,7 +196,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
195 break; 196 break;
196 } 197 }
197 } 198 }
198 mutex_unlock(&ppriv->vlan_mutex); 199 up_write(&ppriv->vlan_rwsem);
200
199 rtnl_unlock(); 201 rtnl_unlock();
200 202
201 if (dev) { 203 if (dev) {
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index f93baf8254c4..a88631918e85 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -46,6 +46,7 @@
46#include <scsi/scsi.h> 46#include <scsi/scsi.h>
47#include <scsi/scsi_device.h> 47#include <scsi/scsi_device.h>
48#include <scsi/scsi_dbg.h> 48#include <scsi/scsi_dbg.h>
49#include <scsi/scsi_tcq.h>
49#include <scsi/srp.h> 50#include <scsi/srp.h>
50#include <scsi/scsi_transport_srp.h> 51#include <scsi/scsi_transport_srp.h>
51 52
@@ -86,6 +87,32 @@ module_param(topspin_workarounds, int, 0444);
86MODULE_PARM_DESC(topspin_workarounds, 87MODULE_PARM_DESC(topspin_workarounds,
87 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 88 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
88 89
90static struct kernel_param_ops srp_tmo_ops;
91
92static int srp_reconnect_delay = 10;
93module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
94 S_IRUGO | S_IWUSR);
95MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
96
97static int srp_fast_io_fail_tmo = 15;
98module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
99 S_IRUGO | S_IWUSR);
100MODULE_PARM_DESC(fast_io_fail_tmo,
101 "Number of seconds between the observation of a transport"
102 " layer error and failing all I/O. \"off\" means that this"
103 " functionality is disabled.");
104
105static int srp_dev_loss_tmo = 600;
106module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
107 S_IRUGO | S_IWUSR);
108MODULE_PARM_DESC(dev_loss_tmo,
109 "Maximum number of seconds that the SRP transport should"
110 " insulate transport layer errors. After this time has been"
111 " exceeded the SCSI host is removed. Should be"
112 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
113 " if fast_io_fail_tmo has not been set. \"off\" means that"
114 " this functionality is disabled.");
115
89static void srp_add_one(struct ib_device *device); 116static void srp_add_one(struct ib_device *device);
90static void srp_remove_one(struct ib_device *device); 117static void srp_remove_one(struct ib_device *device);
91static void srp_recv_completion(struct ib_cq *cq, void *target_ptr); 118static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
@@ -102,6 +129,48 @@ static struct ib_client srp_client = {
102 129
103static struct ib_sa_client srp_sa_client; 130static struct ib_sa_client srp_sa_client;
104 131
132static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
133{
134 int tmo = *(int *)kp->arg;
135
136 if (tmo >= 0)
137 return sprintf(buffer, "%d", tmo);
138 else
139 return sprintf(buffer, "off");
140}
141
142static int srp_tmo_set(const char *val, const struct kernel_param *kp)
143{
144 int tmo, res;
145
146 if (strncmp(val, "off", 3) != 0) {
147 res = kstrtoint(val, 0, &tmo);
148 if (res)
149 goto out;
150 } else {
151 tmo = -1;
152 }
153 if (kp->arg == &srp_reconnect_delay)
154 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
155 srp_dev_loss_tmo);
156 else if (kp->arg == &srp_fast_io_fail_tmo)
157 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
158 else
159 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
160 tmo);
161 if (res)
162 goto out;
163 *(int *)kp->arg = tmo;
164
165out:
166 return res;
167}
168
169static struct kernel_param_ops srp_tmo_ops = {
170 .get = srp_tmo_get,
171 .set = srp_tmo_set,
172};
173
105static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 174static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
106{ 175{
107 return (struct srp_target_port *) host->hostdata; 176 return (struct srp_target_port *) host->hostdata;
@@ -231,16 +300,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
231 return -ENOMEM; 300 return -ENOMEM;
232 301
233 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, 302 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
234 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 303 srp_recv_completion, NULL, target,
235 target->comp_vector); 304 target->queue_size, target->comp_vector);
236 if (IS_ERR(recv_cq)) { 305 if (IS_ERR(recv_cq)) {
237 ret = PTR_ERR(recv_cq); 306 ret = PTR_ERR(recv_cq);
238 goto err; 307 goto err;
239 } 308 }
240 309
241 send_cq = ib_create_cq(target->srp_host->srp_dev->dev, 310 send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
242 srp_send_completion, NULL, target, SRP_SQ_SIZE, 311 srp_send_completion, NULL, target,
243 target->comp_vector); 312 target->queue_size, target->comp_vector);
244 if (IS_ERR(send_cq)) { 313 if (IS_ERR(send_cq)) {
245 ret = PTR_ERR(send_cq); 314 ret = PTR_ERR(send_cq);
246 goto err_recv_cq; 315 goto err_recv_cq;
@@ -249,8 +318,8 @@ static int srp_create_target_ib(struct srp_target_port *target)
249 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); 318 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
250 319
251 init_attr->event_handler = srp_qp_event; 320 init_attr->event_handler = srp_qp_event;
252 init_attr->cap.max_send_wr = SRP_SQ_SIZE; 321 init_attr->cap.max_send_wr = target->queue_size;
253 init_attr->cap.max_recv_wr = SRP_RQ_SIZE; 322 init_attr->cap.max_recv_wr = target->queue_size;
254 init_attr->cap.max_recv_sge = 1; 323 init_attr->cap.max_recv_sge = 1;
255 init_attr->cap.max_send_sge = 1; 324 init_attr->cap.max_send_sge = 1;
256 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 325 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -296,6 +365,10 @@ err:
296 return ret; 365 return ret;
297} 366}
298 367
368/*
369 * Note: this function may be called without srp_alloc_iu_bufs() having been
370 * invoked. Hence the target->[rt]x_ring checks.
371 */
299static void srp_free_target_ib(struct srp_target_port *target) 372static void srp_free_target_ib(struct srp_target_port *target)
300{ 373{
301 int i; 374 int i;
@@ -307,10 +380,18 @@ static void srp_free_target_ib(struct srp_target_port *target)
307 target->qp = NULL; 380 target->qp = NULL;
308 target->send_cq = target->recv_cq = NULL; 381 target->send_cq = target->recv_cq = NULL;
309 382
310 for (i = 0; i < SRP_RQ_SIZE; ++i) 383 if (target->rx_ring) {
311 srp_free_iu(target->srp_host, target->rx_ring[i]); 384 for (i = 0; i < target->queue_size; ++i)
312 for (i = 0; i < SRP_SQ_SIZE; ++i) 385 srp_free_iu(target->srp_host, target->rx_ring[i]);
313 srp_free_iu(target->srp_host, target->tx_ring[i]); 386 kfree(target->rx_ring);
387 target->rx_ring = NULL;
388 }
389 if (target->tx_ring) {
390 for (i = 0; i < target->queue_size; ++i)
391 srp_free_iu(target->srp_host, target->tx_ring[i]);
392 kfree(target->tx_ring);
393 target->tx_ring = NULL;
394 }
314} 395}
315 396
316static void srp_path_rec_completion(int status, 397static void srp_path_rec_completion(int status,
@@ -390,7 +471,7 @@ static int srp_send_req(struct srp_target_port *target)
390 req->param.responder_resources = 4; 471 req->param.responder_resources = 4;
391 req->param.remote_cm_response_timeout = 20; 472 req->param.remote_cm_response_timeout = 20;
392 req->param.local_cm_response_timeout = 20; 473 req->param.local_cm_response_timeout = 20;
393 req->param.retry_count = 7; 474 req->param.retry_count = target->tl_retry_count;
394 req->param.rnr_retry_count = 7; 475 req->param.rnr_retry_count = 7;
395 req->param.max_cm_retries = 15; 476 req->param.max_cm_retries = 15;
396 477
@@ -496,7 +577,11 @@ static void srp_free_req_data(struct srp_target_port *target)
496 struct srp_request *req; 577 struct srp_request *req;
497 int i; 578 int i;
498 579
499 for (i = 0, req = target->req_ring; i < SRP_CMD_SQ_SIZE; ++i, ++req) { 580 if (!target->req_ring)
581 return;
582
583 for (i = 0; i < target->req_ring_size; ++i) {
584 req = &target->req_ring[i];
500 kfree(req->fmr_list); 585 kfree(req->fmr_list);
501 kfree(req->map_page); 586 kfree(req->map_page);
502 if (req->indirect_dma_addr) { 587 if (req->indirect_dma_addr) {
@@ -506,6 +591,50 @@ static void srp_free_req_data(struct srp_target_port *target)
506 } 591 }
507 kfree(req->indirect_desc); 592 kfree(req->indirect_desc);
508 } 593 }
594
595 kfree(target->req_ring);
596 target->req_ring = NULL;
597}
598
599static int srp_alloc_req_data(struct srp_target_port *target)
600{
601 struct srp_device *srp_dev = target->srp_host->srp_dev;
602 struct ib_device *ibdev = srp_dev->dev;
603 struct srp_request *req;
604 dma_addr_t dma_addr;
605 int i, ret = -ENOMEM;
606
607 INIT_LIST_HEAD(&target->free_reqs);
608
609 target->req_ring = kzalloc(target->req_ring_size *
610 sizeof(*target->req_ring), GFP_KERNEL);
611 if (!target->req_ring)
612 goto out;
613
614 for (i = 0; i < target->req_ring_size; ++i) {
615 req = &target->req_ring[i];
616 req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
617 GFP_KERNEL);
618 req->map_page = kmalloc(SRP_FMR_SIZE * sizeof(void *),
619 GFP_KERNEL);
620 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
621 if (!req->fmr_list || !req->map_page || !req->indirect_desc)
622 goto out;
623
624 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
625 target->indirect_size,
626 DMA_TO_DEVICE);
627 if (ib_dma_mapping_error(ibdev, dma_addr))
628 goto out;
629
630 req->indirect_dma_addr = dma_addr;
631 req->index = i;
632 list_add_tail(&req->list, &target->free_reqs);
633 }
634 ret = 0;
635
636out:
637 return ret;
509} 638}
510 639
511/** 640/**
@@ -528,12 +657,20 @@ static void srp_remove_target(struct srp_target_port *target)
528 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 657 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
529 658
530 srp_del_scsi_host_attr(target->scsi_host); 659 srp_del_scsi_host_attr(target->scsi_host);
660 srp_rport_get(target->rport);
531 srp_remove_host(target->scsi_host); 661 srp_remove_host(target->scsi_host);
532 scsi_remove_host(target->scsi_host); 662 scsi_remove_host(target->scsi_host);
533 srp_disconnect_target(target); 663 srp_disconnect_target(target);
534 ib_destroy_cm_id(target->cm_id); 664 ib_destroy_cm_id(target->cm_id);
535 srp_free_target_ib(target); 665 srp_free_target_ib(target);
666 cancel_work_sync(&target->tl_err_work);
667 srp_rport_put(target->rport);
536 srp_free_req_data(target); 668 srp_free_req_data(target);
669
670 spin_lock(&target->srp_host->target_lock);
671 list_del(&target->list);
672 spin_unlock(&target->srp_host->target_lock);
673
537 scsi_host_put(target->scsi_host); 674 scsi_host_put(target->scsi_host);
538} 675}
539 676
@@ -545,10 +682,6 @@ static void srp_remove_work(struct work_struct *work)
545 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 682 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
546 683
547 srp_remove_target(target); 684 srp_remove_target(target);
548
549 spin_lock(&target->srp_host->target_lock);
550 list_del(&target->list);
551 spin_unlock(&target->srp_host->target_lock);
552} 685}
553 686
554static void srp_rport_delete(struct srp_rport *rport) 687static void srp_rport_delete(struct srp_rport *rport)
@@ -686,23 +819,42 @@ static void srp_free_req(struct srp_target_port *target,
686 spin_unlock_irqrestore(&target->lock, flags); 819 spin_unlock_irqrestore(&target->lock, flags);
687} 820}
688 821
689static void srp_reset_req(struct srp_target_port *target, struct srp_request *req) 822static void srp_finish_req(struct srp_target_port *target,
823 struct srp_request *req, int result)
690{ 824{
691 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); 825 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
692 826
693 if (scmnd) { 827 if (scmnd) {
694 srp_free_req(target, req, scmnd, 0); 828 srp_free_req(target, req, scmnd, 0);
695 scmnd->result = DID_RESET << 16; 829 scmnd->result = result;
696 scmnd->scsi_done(scmnd); 830 scmnd->scsi_done(scmnd);
697 } 831 }
698} 832}
699 833
700static int srp_reconnect_target(struct srp_target_port *target) 834static void srp_terminate_io(struct srp_rport *rport)
701{ 835{
702 struct Scsi_Host *shost = target->scsi_host; 836 struct srp_target_port *target = rport->lld_data;
703 int i, ret; 837 int i;
704 838
705 scsi_target_block(&shost->shost_gendev); 839 for (i = 0; i < target->req_ring_size; ++i) {
840 struct srp_request *req = &target->req_ring[i];
841 srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16);
842 }
843}
844
845/*
846 * It is up to the caller to ensure that srp_rport_reconnect() calls are
847 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
848 * srp_reset_device() or srp_reset_host() calls will occur while this function
849 * is in progress. One way to realize that is not to call this function
850 * directly but to call srp_reconnect_rport() instead since that last function
851 * serializes calls of this function via rport->mutex and also blocks
852 * srp_queuecommand() calls before invoking this function.
853 */
854static int srp_rport_reconnect(struct srp_rport *rport)
855{
856 struct srp_target_port *target = rport->lld_data;
857 int i, ret;
706 858
707 srp_disconnect_target(target); 859 srp_disconnect_target(target);
708 /* 860 /*
@@ -721,41 +873,21 @@ static int srp_reconnect_target(struct srp_target_port *target)
721 else 873 else
722 srp_create_target_ib(target); 874 srp_create_target_ib(target);
723 875
724 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 876 for (i = 0; i < target->req_ring_size; ++i) {
725 struct srp_request *req = &target->req_ring[i]; 877 struct srp_request *req = &target->req_ring[i];
726 if (req->scmnd) 878 srp_finish_req(target, req, DID_RESET << 16);
727 srp_reset_req(target, req);
728 } 879 }
729 880
730 INIT_LIST_HEAD(&target->free_tx); 881 INIT_LIST_HEAD(&target->free_tx);
731 for (i = 0; i < SRP_SQ_SIZE; ++i) 882 for (i = 0; i < target->queue_size; ++i)
732 list_add(&target->tx_ring[i]->list, &target->free_tx); 883 list_add(&target->tx_ring[i]->list, &target->free_tx);
733 884
734 if (ret == 0) 885 if (ret == 0)
735 ret = srp_connect_target(target); 886 ret = srp_connect_target(target);
736 887
737 scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : 888 if (ret == 0)
738 SDEV_TRANSPORT_OFFLINE); 889 shost_printk(KERN_INFO, target->scsi_host,
739 target->transport_offline = !!ret; 890 PFX "reconnect succeeded\n");
740
741 if (ret)
742 goto err;
743
744 shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n");
745
746 return ret;
747
748err:
749 shost_printk(KERN_ERR, target->scsi_host,
750 PFX "reconnect failed (%d), removing target port.\n", ret);
751
752 /*
753 * We couldn't reconnect, so kill our target port off.
754 * However, we have to defer the real removal because we
755 * are in the context of the SCSI error handler now, which
756 * will deadlock if we call scsi_remove_host().
757 */
758 srp_queue_remove_work(target);
759 891
760 return ret; 892 return ret;
761} 893}
@@ -1302,15 +1434,30 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
1302 PFX "Recv failed with error code %d\n", res); 1434 PFX "Recv failed with error code %d\n", res);
1303} 1435}
1304 1436
1305static void srp_handle_qp_err(enum ib_wc_status wc_status, 1437/**
1306 enum ib_wc_opcode wc_opcode, 1438 * srp_tl_err_work() - handle a transport layer error
1439 *
1440 * Note: This function may get invoked before the rport has been created,
1441 * hence the target->rport test.
1442 */
1443static void srp_tl_err_work(struct work_struct *work)
1444{
1445 struct srp_target_port *target;
1446
1447 target = container_of(work, struct srp_target_port, tl_err_work);
1448 if (target->rport)
1449 srp_start_tl_fail_timers(target->rport);
1450}
1451
1452static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err,
1307 struct srp_target_port *target) 1453 struct srp_target_port *target)
1308{ 1454{
1309 if (target->connected && !target->qp_in_error) { 1455 if (target->connected && !target->qp_in_error) {
1310 shost_printk(KERN_ERR, target->scsi_host, 1456 shost_printk(KERN_ERR, target->scsi_host,
1311 PFX "failed %s status %d\n", 1457 PFX "failed %s status %d\n",
1312 wc_opcode & IB_WC_RECV ? "receive" : "send", 1458 send_err ? "send" : "receive",
1313 wc_status); 1459 wc_status);
1460 queue_work(system_long_wq, &target->tl_err_work);
1314 } 1461 }
1315 target->qp_in_error = true; 1462 target->qp_in_error = true;
1316} 1463}
@@ -1325,7 +1472,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1325 if (likely(wc.status == IB_WC_SUCCESS)) { 1472 if (likely(wc.status == IB_WC_SUCCESS)) {
1326 srp_handle_recv(target, &wc); 1473 srp_handle_recv(target, &wc);
1327 } else { 1474 } else {
1328 srp_handle_qp_err(wc.status, wc.opcode, target); 1475 srp_handle_qp_err(wc.status, false, target);
1329 } 1476 }
1330 } 1477 }
1331} 1478}
@@ -1341,7 +1488,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1341 iu = (struct srp_iu *) (uintptr_t) wc.wr_id; 1488 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1342 list_add(&iu->list, &target->free_tx); 1489 list_add(&iu->list, &target->free_tx);
1343 } else { 1490 } else {
1344 srp_handle_qp_err(wc.status, wc.opcode, target); 1491 srp_handle_qp_err(wc.status, true, target);
1345 } 1492 }
1346 } 1493 }
1347} 1494}
@@ -1349,17 +1496,29 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1349static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 1496static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1350{ 1497{
1351 struct srp_target_port *target = host_to_target(shost); 1498 struct srp_target_port *target = host_to_target(shost);
1499 struct srp_rport *rport = target->rport;
1352 struct srp_request *req; 1500 struct srp_request *req;
1353 struct srp_iu *iu; 1501 struct srp_iu *iu;
1354 struct srp_cmd *cmd; 1502 struct srp_cmd *cmd;
1355 struct ib_device *dev; 1503 struct ib_device *dev;
1356 unsigned long flags; 1504 unsigned long flags;
1357 int len; 1505 int len, result;
1506 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1507
1508 /*
1509 * The SCSI EH thread is the only context from which srp_queuecommand()
1510 * can get invoked for blocked devices (SDEV_BLOCK /
1511 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
1512 * locking the rport mutex if invoked from inside the SCSI EH.
1513 */
1514 if (in_scsi_eh)
1515 mutex_lock(&rport->mutex);
1358 1516
1359 if (unlikely(target->transport_offline)) { 1517 result = srp_chkready(target->rport);
1360 scmnd->result = DID_NO_CONNECT << 16; 1518 if (unlikely(result)) {
1519 scmnd->result = result;
1361 scmnd->scsi_done(scmnd); 1520 scmnd->scsi_done(scmnd);
1362 return 0; 1521 goto unlock_rport;
1363 } 1522 }
1364 1523
1365 spin_lock_irqsave(&target->lock, flags); 1524 spin_lock_irqsave(&target->lock, flags);
@@ -1404,6 +1563,10 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1404 goto err_unmap; 1563 goto err_unmap;
1405 } 1564 }
1406 1565
1566unlock_rport:
1567 if (in_scsi_eh)
1568 mutex_unlock(&rport->mutex);
1569
1407 return 0; 1570 return 0;
1408 1571
1409err_unmap: 1572err_unmap:
@@ -1418,14 +1581,30 @@ err_iu:
1418err_unlock: 1581err_unlock:
1419 spin_unlock_irqrestore(&target->lock, flags); 1582 spin_unlock_irqrestore(&target->lock, flags);
1420 1583
1584 if (in_scsi_eh)
1585 mutex_unlock(&rport->mutex);
1586
1421 return SCSI_MLQUEUE_HOST_BUSY; 1587 return SCSI_MLQUEUE_HOST_BUSY;
1422} 1588}
1423 1589
1590/*
1591 * Note: the resources allocated in this function are freed in
1592 * srp_free_target_ib().
1593 */
1424static int srp_alloc_iu_bufs(struct srp_target_port *target) 1594static int srp_alloc_iu_bufs(struct srp_target_port *target)
1425{ 1595{
1426 int i; 1596 int i;
1427 1597
1428 for (i = 0; i < SRP_RQ_SIZE; ++i) { 1598 target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring),
1599 GFP_KERNEL);
1600 if (!target->rx_ring)
1601 goto err_no_ring;
1602 target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring),
1603 GFP_KERNEL);
1604 if (!target->tx_ring)
1605 goto err_no_ring;
1606
1607 for (i = 0; i < target->queue_size; ++i) {
1429 target->rx_ring[i] = srp_alloc_iu(target->srp_host, 1608 target->rx_ring[i] = srp_alloc_iu(target->srp_host,
1430 target->max_ti_iu_len, 1609 target->max_ti_iu_len,
1431 GFP_KERNEL, DMA_FROM_DEVICE); 1610 GFP_KERNEL, DMA_FROM_DEVICE);
@@ -1433,7 +1612,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
1433 goto err; 1612 goto err;
1434 } 1613 }
1435 1614
1436 for (i = 0; i < SRP_SQ_SIZE; ++i) { 1615 for (i = 0; i < target->queue_size; ++i) {
1437 target->tx_ring[i] = srp_alloc_iu(target->srp_host, 1616 target->tx_ring[i] = srp_alloc_iu(target->srp_host,
1438 target->max_iu_len, 1617 target->max_iu_len,
1439 GFP_KERNEL, DMA_TO_DEVICE); 1618 GFP_KERNEL, DMA_TO_DEVICE);
@@ -1446,16 +1625,18 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
1446 return 0; 1625 return 0;
1447 1626
1448err: 1627err:
1449 for (i = 0; i < SRP_RQ_SIZE; ++i) { 1628 for (i = 0; i < target->queue_size; ++i) {
1450 srp_free_iu(target->srp_host, target->rx_ring[i]); 1629 srp_free_iu(target->srp_host, target->rx_ring[i]);
1451 target->rx_ring[i] = NULL;
1452 }
1453
1454 for (i = 0; i < SRP_SQ_SIZE; ++i) {
1455 srp_free_iu(target->srp_host, target->tx_ring[i]); 1630 srp_free_iu(target->srp_host, target->tx_ring[i]);
1456 target->tx_ring[i] = NULL;
1457 } 1631 }
1458 1632
1633
1634err_no_ring:
1635 kfree(target->tx_ring);
1636 target->tx_ring = NULL;
1637 kfree(target->rx_ring);
1638 target->rx_ring = NULL;
1639
1459 return -ENOMEM; 1640 return -ENOMEM;
1460} 1641}
1461 1642
@@ -1506,6 +1687,9 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1506 target->scsi_host->can_queue 1687 target->scsi_host->can_queue
1507 = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE, 1688 = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
1508 target->scsi_host->can_queue); 1689 target->scsi_host->can_queue);
1690 target->scsi_host->cmd_per_lun
1691 = min_t(int, target->scsi_host->can_queue,
1692 target->scsi_host->cmd_per_lun);
1509 } else { 1693 } else {
1510 shost_printk(KERN_WARNING, target->scsi_host, 1694 shost_printk(KERN_WARNING, target->scsi_host,
1511 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); 1695 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
@@ -1513,7 +1697,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1513 goto error; 1697 goto error;
1514 } 1698 }
1515 1699
1516 if (!target->rx_ring[0]) { 1700 if (!target->rx_ring) {
1517 ret = srp_alloc_iu_bufs(target); 1701 ret = srp_alloc_iu_bufs(target);
1518 if (ret) 1702 if (ret)
1519 goto error; 1703 goto error;
@@ -1533,7 +1717,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1533 if (ret) 1717 if (ret)
1534 goto error_free; 1718 goto error_free;
1535 1719
1536 for (i = 0; i < SRP_RQ_SIZE; i++) { 1720 for (i = 0; i < target->queue_size; i++) {
1537 struct srp_iu *iu = target->rx_ring[i]; 1721 struct srp_iu *iu = target->rx_ring[i];
1538 ret = srp_post_recv(target, iu); 1722 ret = srp_post_recv(target, iu);
1539 if (ret) 1723 if (ret)
@@ -1672,6 +1856,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1672 if (ib_send_cm_drep(cm_id, NULL, 0)) 1856 if (ib_send_cm_drep(cm_id, NULL, 0))
1673 shost_printk(KERN_ERR, target->scsi_host, 1857 shost_printk(KERN_ERR, target->scsi_host,
1674 PFX "Sending CM DREP failed\n"); 1858 PFX "Sending CM DREP failed\n");
1859 queue_work(system_long_wq, &target->tl_err_work);
1675 break; 1860 break;
1676 1861
1677 case IB_CM_TIMEWAIT_EXIT: 1862 case IB_CM_TIMEWAIT_EXIT:
@@ -1698,9 +1883,61 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1698 return 0; 1883 return 0;
1699} 1884}
1700 1885
1886/**
1887 * srp_change_queue_type - changing device queue tag type
1888 * @sdev: scsi device struct
1889 * @tag_type: requested tag type
1890 *
1891 * Returns queue tag type.
1892 */
1893static int
1894srp_change_queue_type(struct scsi_device *sdev, int tag_type)
1895{
1896 if (sdev->tagged_supported) {
1897 scsi_set_tag_type(sdev, tag_type);
1898 if (tag_type)
1899 scsi_activate_tcq(sdev, sdev->queue_depth);
1900 else
1901 scsi_deactivate_tcq(sdev, sdev->queue_depth);
1902 } else
1903 tag_type = 0;
1904
1905 return tag_type;
1906}
1907
1908/**
1909 * srp_change_queue_depth - setting device queue depth
1910 * @sdev: scsi device struct
1911 * @qdepth: requested queue depth
1912 * @reason: SCSI_QDEPTH_DEFAULT/SCSI_QDEPTH_QFULL/SCSI_QDEPTH_RAMP_UP
1913 * (see include/scsi/scsi_host.h for definition)
1914 *
1915 * Returns queue depth.
1916 */
1917static int
1918srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
1919{
1920 struct Scsi_Host *shost = sdev->host;
1921 int max_depth;
1922 if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) {
1923 max_depth = shost->can_queue;
1924 if (!sdev->tagged_supported)
1925 max_depth = 1;
1926 if (qdepth > max_depth)
1927 qdepth = max_depth;
1928 scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
1929 } else if (reason == SCSI_QDEPTH_QFULL)
1930 scsi_track_queue_full(sdev, qdepth);
1931 else
1932 return -EOPNOTSUPP;
1933
1934 return sdev->queue_depth;
1935}
1936
1701static int srp_send_tsk_mgmt(struct srp_target_port *target, 1937static int srp_send_tsk_mgmt(struct srp_target_port *target,
1702 u64 req_tag, unsigned int lun, u8 func) 1938 u64 req_tag, unsigned int lun, u8 func)
1703{ 1939{
1940 struct srp_rport *rport = target->rport;
1704 struct ib_device *dev = target->srp_host->srp_dev->dev; 1941 struct ib_device *dev = target->srp_host->srp_dev->dev;
1705 struct srp_iu *iu; 1942 struct srp_iu *iu;
1706 struct srp_tsk_mgmt *tsk_mgmt; 1943 struct srp_tsk_mgmt *tsk_mgmt;
@@ -1710,12 +1947,20 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1710 1947
1711 init_completion(&target->tsk_mgmt_done); 1948 init_completion(&target->tsk_mgmt_done);
1712 1949
1950 /*
1951 * Lock the rport mutex to avoid that srp_create_target_ib() is
1952 * invoked while a task management function is being sent.
1953 */
1954 mutex_lock(&rport->mutex);
1713 spin_lock_irq(&target->lock); 1955 spin_lock_irq(&target->lock);
1714 iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT); 1956 iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
1715 spin_unlock_irq(&target->lock); 1957 spin_unlock_irq(&target->lock);
1716 1958
1717 if (!iu) 1959 if (!iu) {
1960 mutex_unlock(&rport->mutex);
1961
1718 return -1; 1962 return -1;
1963 }
1719 1964
1720 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 1965 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
1721 DMA_TO_DEVICE); 1966 DMA_TO_DEVICE);
@@ -1732,8 +1977,11 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1732 DMA_TO_DEVICE); 1977 DMA_TO_DEVICE);
1733 if (srp_post_send(target, iu, sizeof *tsk_mgmt)) { 1978 if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {
1734 srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT); 1979 srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT);
1980 mutex_unlock(&rport->mutex);
1981
1735 return -1; 1982 return -1;
1736 } 1983 }
1984 mutex_unlock(&rport->mutex);
1737 1985
1738 if (!wait_for_completion_timeout(&target->tsk_mgmt_done, 1986 if (!wait_for_completion_timeout(&target->tsk_mgmt_done,
1739 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) 1987 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
@@ -1751,11 +1999,11 @@ static int srp_abort(struct scsi_cmnd *scmnd)
1751 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 1999 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
1752 2000
1753 if (!req || !srp_claim_req(target, req, scmnd)) 2001 if (!req || !srp_claim_req(target, req, scmnd))
1754 return FAILED; 2002 return SUCCESS;
1755 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, 2003 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
1756 SRP_TSK_ABORT_TASK) == 0) 2004 SRP_TSK_ABORT_TASK) == 0)
1757 ret = SUCCESS; 2005 ret = SUCCESS;
1758 else if (target->transport_offline) 2006 else if (target->rport->state == SRP_RPORT_LOST)
1759 ret = FAST_IO_FAIL; 2007 ret = FAST_IO_FAIL;
1760 else 2008 else
1761 ret = FAILED; 2009 ret = FAILED;
@@ -1779,10 +2027,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
1779 if (target->tsk_mgmt_status) 2027 if (target->tsk_mgmt_status)
1780 return FAILED; 2028 return FAILED;
1781 2029
1782 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 2030 for (i = 0; i < target->req_ring_size; ++i) {
1783 struct srp_request *req = &target->req_ring[i]; 2031 struct srp_request *req = &target->req_ring[i];
1784 if (req->scmnd && req->scmnd->device == scmnd->device) 2032 if (req->scmnd && req->scmnd->device == scmnd->device)
1785 srp_reset_req(target, req); 2033 srp_finish_req(target, req, DID_RESET << 16);
1786 } 2034 }
1787 2035
1788 return SUCCESS; 2036 return SUCCESS;
@@ -1791,14 +2039,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
1791static int srp_reset_host(struct scsi_cmnd *scmnd) 2039static int srp_reset_host(struct scsi_cmnd *scmnd)
1792{ 2040{
1793 struct srp_target_port *target = host_to_target(scmnd->device->host); 2041 struct srp_target_port *target = host_to_target(scmnd->device->host);
1794 int ret = FAILED;
1795 2042
1796 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); 2043 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
1797 2044
1798 if (!srp_reconnect_target(target)) 2045 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
1799 ret = SUCCESS;
1800
1801 return ret;
1802} 2046}
1803 2047
1804static int srp_slave_configure(struct scsi_device *sdev) 2048static int srp_slave_configure(struct scsi_device *sdev)
@@ -1851,6 +2095,14 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
1851 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey)); 2095 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
1852} 2096}
1853 2097
2098static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2099 char *buf)
2100{
2101 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2102
2103 return sprintf(buf, "%pI6\n", target->path.sgid.raw);
2104}
2105
1854static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, 2106static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
1855 char *buf) 2107 char *buf)
1856{ 2108{
@@ -1907,6 +2159,14 @@ static ssize_t show_comp_vector(struct device *dev,
1907 return sprintf(buf, "%d\n", target->comp_vector); 2159 return sprintf(buf, "%d\n", target->comp_vector);
1908} 2160}
1909 2161
2162static ssize_t show_tl_retry_count(struct device *dev,
2163 struct device_attribute *attr, char *buf)
2164{
2165 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2166
2167 return sprintf(buf, "%d\n", target->tl_retry_count);
2168}
2169
1910static ssize_t show_cmd_sg_entries(struct device *dev, 2170static ssize_t show_cmd_sg_entries(struct device *dev,
1911 struct device_attribute *attr, char *buf) 2171 struct device_attribute *attr, char *buf)
1912{ 2172{
@@ -1927,6 +2187,7 @@ static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
1927static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 2187static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
1928static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 2188static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
1929static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 2189static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2190static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
1930static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 2191static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
1931static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); 2192static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
1932static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); 2193static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
@@ -1934,6 +2195,7 @@ static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
1934static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 2195static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
1935static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 2196static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
1936static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); 2197static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2198static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
1937static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 2199static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
1938static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 2200static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
1939 2201
@@ -1942,6 +2204,7 @@ static struct device_attribute *srp_host_attrs[] = {
1942 &dev_attr_ioc_guid, 2204 &dev_attr_ioc_guid,
1943 &dev_attr_service_id, 2205 &dev_attr_service_id,
1944 &dev_attr_pkey, 2206 &dev_attr_pkey,
2207 &dev_attr_sgid,
1945 &dev_attr_dgid, 2208 &dev_attr_dgid,
1946 &dev_attr_orig_dgid, 2209 &dev_attr_orig_dgid,
1947 &dev_attr_req_lim, 2210 &dev_attr_req_lim,
@@ -1949,6 +2212,7 @@ static struct device_attribute *srp_host_attrs[] = {
1949 &dev_attr_local_ib_port, 2212 &dev_attr_local_ib_port,
1950 &dev_attr_local_ib_device, 2213 &dev_attr_local_ib_device,
1951 &dev_attr_comp_vector, 2214 &dev_attr_comp_vector,
2215 &dev_attr_tl_retry_count,
1952 &dev_attr_cmd_sg_entries, 2216 &dev_attr_cmd_sg_entries,
1953 &dev_attr_allow_ext_sg, 2217 &dev_attr_allow_ext_sg,
1954 NULL 2218 NULL
@@ -1961,14 +2225,16 @@ static struct scsi_host_template srp_template = {
1961 .slave_configure = srp_slave_configure, 2225 .slave_configure = srp_slave_configure,
1962 .info = srp_target_info, 2226 .info = srp_target_info,
1963 .queuecommand = srp_queuecommand, 2227 .queuecommand = srp_queuecommand,
2228 .change_queue_depth = srp_change_queue_depth,
2229 .change_queue_type = srp_change_queue_type,
1964 .eh_abort_handler = srp_abort, 2230 .eh_abort_handler = srp_abort,
1965 .eh_device_reset_handler = srp_reset_device, 2231 .eh_device_reset_handler = srp_reset_device,
1966 .eh_host_reset_handler = srp_reset_host, 2232 .eh_host_reset_handler = srp_reset_host,
1967 .skip_settle_delay = true, 2233 .skip_settle_delay = true,
1968 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 2234 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
1969 .can_queue = SRP_CMD_SQ_SIZE, 2235 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
1970 .this_id = -1, 2236 .this_id = -1,
1971 .cmd_per_lun = SRP_CMD_SQ_SIZE, 2237 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
1972 .use_clustering = ENABLE_CLUSTERING, 2238 .use_clustering = ENABLE_CLUSTERING,
1973 .shost_attrs = srp_host_attrs 2239 .shost_attrs = srp_host_attrs
1974}; 2240};
@@ -1994,6 +2260,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
1994 } 2260 }
1995 2261
1996 rport->lld_data = target; 2262 rport->lld_data = target;
2263 target->rport = rport;
1997 2264
1998 spin_lock(&host->target_lock); 2265 spin_lock(&host->target_lock);
1999 list_add_tail(&target->list, &host->target_list); 2266 list_add_tail(&target->list, &host->target_list);
@@ -2073,6 +2340,8 @@ enum {
2073 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 2340 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2074 SRP_OPT_SG_TABLESIZE = 1 << 11, 2341 SRP_OPT_SG_TABLESIZE = 1 << 11,
2075 SRP_OPT_COMP_VECTOR = 1 << 12, 2342 SRP_OPT_COMP_VECTOR = 1 << 12,
2343 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
2344 SRP_OPT_QUEUE_SIZE = 1 << 14,
2076 SRP_OPT_ALL = (SRP_OPT_ID_EXT | 2345 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2077 SRP_OPT_IOC_GUID | 2346 SRP_OPT_IOC_GUID |
2078 SRP_OPT_DGID | 2347 SRP_OPT_DGID |
@@ -2094,6 +2363,8 @@ static const match_table_t srp_opt_tokens = {
2094 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 2363 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2095 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 2364 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2096 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 2365 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2366 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
2367 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
2097 { SRP_OPT_ERR, NULL } 2368 { SRP_OPT_ERR, NULL }
2098}; 2369};
2099 2370
@@ -2188,13 +2459,25 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2188 target->scsi_host->max_sectors = token; 2459 target->scsi_host->max_sectors = token;
2189 break; 2460 break;
2190 2461
2462 case SRP_OPT_QUEUE_SIZE:
2463 if (match_int(args, &token) || token < 1) {
2464 pr_warn("bad queue_size parameter '%s'\n", p);
2465 goto out;
2466 }
2467 target->scsi_host->can_queue = token;
2468 target->queue_size = token + SRP_RSP_SQ_SIZE +
2469 SRP_TSK_MGMT_SQ_SIZE;
2470 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
2471 target->scsi_host->cmd_per_lun = token;
2472 break;
2473
2191 case SRP_OPT_MAX_CMD_PER_LUN: 2474 case SRP_OPT_MAX_CMD_PER_LUN:
2192 if (match_int(args, &token)) { 2475 if (match_int(args, &token) || token < 1) {
2193 pr_warn("bad max cmd_per_lun parameter '%s'\n", 2476 pr_warn("bad max cmd_per_lun parameter '%s'\n",
2194 p); 2477 p);
2195 goto out; 2478 goto out;
2196 } 2479 }
2197 target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE); 2480 target->scsi_host->cmd_per_lun = token;
2198 break; 2481 break;
2199 2482
2200 case SRP_OPT_IO_CLASS: 2483 case SRP_OPT_IO_CLASS:
@@ -2257,6 +2540,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2257 target->comp_vector = token; 2540 target->comp_vector = token;
2258 break; 2541 break;
2259 2542
2543 case SRP_OPT_TL_RETRY_COUNT:
2544 if (match_int(args, &token) || token < 2 || token > 7) {
2545 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
2546 p);
2547 goto out;
2548 }
2549 target->tl_retry_count = token;
2550 break;
2551
2260 default: 2552 default:
2261 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 2553 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
2262 p); 2554 p);
@@ -2273,6 +2565,12 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2273 pr_warn("target creation request is missing parameter '%s'\n", 2565 pr_warn("target creation request is missing parameter '%s'\n",
2274 srp_opt_tokens[i].pattern); 2566 srp_opt_tokens[i].pattern);
2275 2567
2568 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
2569 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
2570 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
2571 target->scsi_host->cmd_per_lun,
2572 target->scsi_host->can_queue);
2573
2276out: 2574out:
2277 kfree(options); 2575 kfree(options);
2278 return ret; 2576 return ret;
@@ -2287,8 +2585,7 @@ static ssize_t srp_create_target(struct device *dev,
2287 struct Scsi_Host *target_host; 2585 struct Scsi_Host *target_host;
2288 struct srp_target_port *target; 2586 struct srp_target_port *target;
2289 struct ib_device *ibdev = host->srp_dev->dev; 2587 struct ib_device *ibdev = host->srp_dev->dev;
2290 dma_addr_t dma_addr; 2588 int ret;
2291 int i, ret;
2292 2589
2293 target_host = scsi_host_alloc(&srp_template, 2590 target_host = scsi_host_alloc(&srp_template,
2294 sizeof (struct srp_target_port)); 2591 sizeof (struct srp_target_port));
@@ -2311,11 +2608,15 @@ static ssize_t srp_create_target(struct device *dev,
2311 target->cmd_sg_cnt = cmd_sg_entries; 2608 target->cmd_sg_cnt = cmd_sg_entries;
2312 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; 2609 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
2313 target->allow_ext_sg = allow_ext_sg; 2610 target->allow_ext_sg = allow_ext_sg;
2611 target->tl_retry_count = 7;
2612 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
2314 2613
2315 ret = srp_parse_options(buf, target); 2614 ret = srp_parse_options(buf, target);
2316 if (ret) 2615 if (ret)
2317 goto err; 2616 goto err;
2318 2617
2618 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
2619
2319 if (!srp_conn_unique(target->srp_host, target)) { 2620 if (!srp_conn_unique(target->srp_host, target)) {
2320 shost_printk(KERN_INFO, target->scsi_host, 2621 shost_printk(KERN_INFO, target->scsi_host,
2321 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 2622 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
@@ -2339,31 +2640,13 @@ static ssize_t srp_create_target(struct device *dev,
2339 sizeof (struct srp_indirect_buf) + 2640 sizeof (struct srp_indirect_buf) +
2340 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 2641 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
2341 2642
2643 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
2342 INIT_WORK(&target->remove_work, srp_remove_work); 2644 INIT_WORK(&target->remove_work, srp_remove_work);
2343 spin_lock_init(&target->lock); 2645 spin_lock_init(&target->lock);
2344 INIT_LIST_HEAD(&target->free_tx); 2646 INIT_LIST_HEAD(&target->free_tx);
2345 INIT_LIST_HEAD(&target->free_reqs); 2647 ret = srp_alloc_req_data(target);
2346 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 2648 if (ret)
2347 struct srp_request *req = &target->req_ring[i]; 2649 goto err_free_mem;
2348
2349 req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof (void *),
2350 GFP_KERNEL);
2351 req->map_page = kmalloc(SRP_FMR_SIZE * sizeof (void *),
2352 GFP_KERNEL);
2353 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
2354 if (!req->fmr_list || !req->map_page || !req->indirect_desc)
2355 goto err_free_mem;
2356
2357 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
2358 target->indirect_size,
2359 DMA_TO_DEVICE);
2360 if (ib_dma_mapping_error(ibdev, dma_addr))
2361 goto err_free_mem;
2362
2363 req->indirect_dma_addr = dma_addr;
2364 req->index = i;
2365 list_add_tail(&req->list, &target->free_reqs);
2366 }
2367 2650
2368 ib_query_gid(ibdev, host->port, 0, &target->path.sgid); 2651 ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
2369 2652
@@ -2612,7 +2895,14 @@ static void srp_remove_one(struct ib_device *device)
2612} 2895}
2613 2896
2614static struct srp_function_template ib_srp_transport_functions = { 2897static struct srp_function_template ib_srp_transport_functions = {
2898 .has_rport_state = true,
2899 .reset_timer_if_blocked = true,
2900 .reconnect_delay = &srp_reconnect_delay,
2901 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
2902 .dev_loss_tmo = &srp_dev_loss_tmo,
2903 .reconnect = srp_rport_reconnect,
2615 .rport_delete = srp_rport_delete, 2904 .rport_delete = srp_rport_delete,
2905 .terminate_rport_io = srp_terminate_io,
2616}; 2906};
2617 2907
2618static int __init srp_init_module(void) 2908static int __init srp_init_module(void)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index e641088c14dc..575681063f38 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -57,14 +57,11 @@ enum {
57 SRP_MAX_LUN = 512, 57 SRP_MAX_LUN = 512,
58 SRP_DEF_SG_TABLESIZE = 12, 58 SRP_DEF_SG_TABLESIZE = 12,
59 59
60 SRP_RQ_SHIFT = 6, 60 SRP_DEFAULT_QUEUE_SIZE = 1 << 6,
61 SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT,
62
63 SRP_SQ_SIZE = SRP_RQ_SIZE,
64 SRP_RSP_SQ_SIZE = 1, 61 SRP_RSP_SQ_SIZE = 1,
65 SRP_REQ_SQ_SIZE = SRP_SQ_SIZE - SRP_RSP_SQ_SIZE,
66 SRP_TSK_MGMT_SQ_SIZE = 1, 62 SRP_TSK_MGMT_SQ_SIZE = 1,
67 SRP_CMD_SQ_SIZE = SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE, 63 SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE -
64 SRP_TSK_MGMT_SQ_SIZE,
68 65
69 SRP_TAG_NO_REQ = ~0U, 66 SRP_TAG_NO_REQ = ~0U,
70 SRP_TAG_TSK_MGMT = 1U << 31, 67 SRP_TAG_TSK_MGMT = 1U << 31,
@@ -140,7 +137,6 @@ struct srp_target_port {
140 unsigned int cmd_sg_cnt; 137 unsigned int cmd_sg_cnt;
141 unsigned int indirect_size; 138 unsigned int indirect_size;
142 bool allow_ext_sg; 139 bool allow_ext_sg;
143 bool transport_offline;
144 140
145 /* Everything above this point is used in the hot path of 141 /* Everything above this point is used in the hot path of
146 * command processing. Try to keep them packed into cachelines. 142 * command processing. Try to keep them packed into cachelines.
@@ -153,10 +149,14 @@ struct srp_target_port {
153 u16 io_class; 149 u16 io_class;
154 struct srp_host *srp_host; 150 struct srp_host *srp_host;
155 struct Scsi_Host *scsi_host; 151 struct Scsi_Host *scsi_host;
152 struct srp_rport *rport;
156 char target_name[32]; 153 char target_name[32];
157 unsigned int scsi_id; 154 unsigned int scsi_id;
158 unsigned int sg_tablesize; 155 unsigned int sg_tablesize;
156 int queue_size;
157 int req_ring_size;
159 int comp_vector; 158 int comp_vector;
159 int tl_retry_count;
160 160
161 struct ib_sa_path_rec path; 161 struct ib_sa_path_rec path;
162 __be16 orig_dgid[8]; 162 __be16 orig_dgid[8];
@@ -172,10 +172,11 @@ struct srp_target_port {
172 172
173 int zero_req_lim; 173 int zero_req_lim;
174 174
175 struct srp_iu *tx_ring[SRP_SQ_SIZE]; 175 struct srp_iu **tx_ring;
176 struct srp_iu *rx_ring[SRP_RQ_SIZE]; 176 struct srp_iu **rx_ring;
177 struct srp_request req_ring[SRP_CMD_SQ_SIZE]; 177 struct srp_request *req_ring;
178 178
179 struct work_struct tl_err_work;
179 struct work_struct remove_work; 180 struct work_struct remove_work;
180 181
181 struct list_head list; 182 struct list_head list;
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index f379c7f3034c..2700a5a09bd4 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -24,12 +24,15 @@
24#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/delay.h>
27 28
28#include <scsi/scsi.h> 29#include <scsi/scsi.h>
30#include <scsi/scsi_cmnd.h>
29#include <scsi/scsi_device.h> 31#include <scsi/scsi_device.h>
30#include <scsi/scsi_host.h> 32#include <scsi/scsi_host.h>
31#include <scsi/scsi_transport.h> 33#include <scsi/scsi_transport.h>
32#include <scsi/scsi_transport_srp.h> 34#include <scsi/scsi_transport_srp.h>
35#include "scsi_priv.h"
33#include "scsi_transport_srp_internal.h" 36#include "scsi_transport_srp_internal.h"
34 37
35struct srp_host_attrs { 38struct srp_host_attrs {
@@ -38,7 +41,7 @@ struct srp_host_attrs {
38#define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data) 41#define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data)
39 42
40#define SRP_HOST_ATTRS 0 43#define SRP_HOST_ATTRS 0
41#define SRP_RPORT_ATTRS 3 44#define SRP_RPORT_ATTRS 8
42 45
43struct srp_internal { 46struct srp_internal {
44 struct scsi_transport_template t; 47 struct scsi_transport_template t;
@@ -54,6 +57,36 @@ struct srp_internal {
54 57
55#define dev_to_rport(d) container_of(d, struct srp_rport, dev) 58#define dev_to_rport(d) container_of(d, struct srp_rport, dev)
56#define transport_class_to_srp_rport(dev) dev_to_rport((dev)->parent) 59#define transport_class_to_srp_rport(dev) dev_to_rport((dev)->parent)
60static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r)
61{
62 return dev_to_shost(r->dev.parent);
63}
64
65/**
66 * srp_tmo_valid() - check timeout combination validity
67 *
68 * The combination of the timeout parameters must be such that SCSI commands
69 * are finished in a reasonable time. Hence do not allow the fast I/O fail
70 * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT. Furthermore, these
71 * parameters must be such that multipath can detect failed paths timely.
72 * Hence do not allow all three parameters to be disabled simultaneously.
73 */
74int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, int dev_loss_tmo)
75{
76 if (reconnect_delay < 0 && fast_io_fail_tmo < 0 && dev_loss_tmo < 0)
77 return -EINVAL;
78 if (reconnect_delay == 0)
79 return -EINVAL;
80 if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
81 return -EINVAL;
82 if (dev_loss_tmo >= LONG_MAX / HZ)
83 return -EINVAL;
84 if (fast_io_fail_tmo >= 0 && dev_loss_tmo >= 0 &&
85 fast_io_fail_tmo >= dev_loss_tmo)
86 return -EINVAL;
87 return 0;
88}
89EXPORT_SYMBOL_GPL(srp_tmo_valid);
57 90
58static int srp_host_setup(struct transport_container *tc, struct device *dev, 91static int srp_host_setup(struct transport_container *tc, struct device *dev,
59 struct device *cdev) 92 struct device *cdev)
@@ -134,10 +167,465 @@ static ssize_t store_srp_rport_delete(struct device *dev,
134 167
135static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete); 168static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete);
136 169
170static ssize_t show_srp_rport_state(struct device *dev,
171 struct device_attribute *attr,
172 char *buf)
173{
174 static const char *const state_name[] = {
175 [SRP_RPORT_RUNNING] = "running",
176 [SRP_RPORT_BLOCKED] = "blocked",
177 [SRP_RPORT_FAIL_FAST] = "fail-fast",
178 [SRP_RPORT_LOST] = "lost",
179 };
180 struct srp_rport *rport = transport_class_to_srp_rport(dev);
181 enum srp_rport_state state = rport->state;
182
183 return sprintf(buf, "%s\n",
184 (unsigned)state < ARRAY_SIZE(state_name) ?
185 state_name[state] : "???");
186}
187
188static DEVICE_ATTR(state, S_IRUGO, show_srp_rport_state, NULL);
189
190static ssize_t srp_show_tmo(char *buf, int tmo)
191{
192 return tmo >= 0 ? sprintf(buf, "%d\n", tmo) : sprintf(buf, "off\n");
193}
194
195static int srp_parse_tmo(int *tmo, const char *buf)
196{
197 int res = 0;
198
199 if (strncmp(buf, "off", 3) != 0)
200 res = kstrtoint(buf, 0, tmo);
201 else
202 *tmo = -1;
203
204 return res;
205}
206
207static ssize_t show_reconnect_delay(struct device *dev,
208 struct device_attribute *attr, char *buf)
209{
210 struct srp_rport *rport = transport_class_to_srp_rport(dev);
211
212 return srp_show_tmo(buf, rport->reconnect_delay);
213}
214
215static ssize_t store_reconnect_delay(struct device *dev,
216 struct device_attribute *attr,
217 const char *buf, const size_t count)
218{
219 struct srp_rport *rport = transport_class_to_srp_rport(dev);
220 int res, delay;
221
222 res = srp_parse_tmo(&delay, buf);
223 if (res)
224 goto out;
225 res = srp_tmo_valid(delay, rport->fast_io_fail_tmo,
226 rport->dev_loss_tmo);
227 if (res)
228 goto out;
229
230 if (rport->reconnect_delay <= 0 && delay > 0 &&
231 rport->state != SRP_RPORT_RUNNING) {
232 queue_delayed_work(system_long_wq, &rport->reconnect_work,
233 delay * HZ);
234 } else if (delay <= 0) {
235 cancel_delayed_work(&rport->reconnect_work);
236 }
237 rport->reconnect_delay = delay;
238 res = count;
239
240out:
241 return res;
242}
243
244static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR, show_reconnect_delay,
245 store_reconnect_delay);
246
247static ssize_t show_failed_reconnects(struct device *dev,
248 struct device_attribute *attr, char *buf)
249{
250 struct srp_rport *rport = transport_class_to_srp_rport(dev);
251
252 return sprintf(buf, "%d\n", rport->failed_reconnects);
253}
254
255static DEVICE_ATTR(failed_reconnects, S_IRUGO, show_failed_reconnects, NULL);
256
257static ssize_t show_srp_rport_fast_io_fail_tmo(struct device *dev,
258 struct device_attribute *attr,
259 char *buf)
260{
261 struct srp_rport *rport = transport_class_to_srp_rport(dev);
262
263 return srp_show_tmo(buf, rport->fast_io_fail_tmo);
264}
265
266static ssize_t store_srp_rport_fast_io_fail_tmo(struct device *dev,
267 struct device_attribute *attr,
268 const char *buf, size_t count)
269{
270 struct srp_rport *rport = transport_class_to_srp_rport(dev);
271 int res;
272 int fast_io_fail_tmo;
273
274 res = srp_parse_tmo(&fast_io_fail_tmo, buf);
275 if (res)
276 goto out;
277 res = srp_tmo_valid(rport->reconnect_delay, fast_io_fail_tmo,
278 rport->dev_loss_tmo);
279 if (res)
280 goto out;
281 rport->fast_io_fail_tmo = fast_io_fail_tmo;
282 res = count;
283
284out:
285 return res;
286}
287
288static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR,
289 show_srp_rport_fast_io_fail_tmo,
290 store_srp_rport_fast_io_fail_tmo);
291
292static ssize_t show_srp_rport_dev_loss_tmo(struct device *dev,
293 struct device_attribute *attr,
294 char *buf)
295{
296 struct srp_rport *rport = transport_class_to_srp_rport(dev);
297
298 return srp_show_tmo(buf, rport->dev_loss_tmo);
299}
300
301static ssize_t store_srp_rport_dev_loss_tmo(struct device *dev,
302 struct device_attribute *attr,
303 const char *buf, size_t count)
304{
305 struct srp_rport *rport = transport_class_to_srp_rport(dev);
306 int res;
307 int dev_loss_tmo;
308
309 res = srp_parse_tmo(&dev_loss_tmo, buf);
310 if (res)
311 goto out;
312 res = srp_tmo_valid(rport->reconnect_delay, rport->fast_io_fail_tmo,
313 dev_loss_tmo);
314 if (res)
315 goto out;
316 rport->dev_loss_tmo = dev_loss_tmo;
317 res = count;
318
319out:
320 return res;
321}
322
323static DEVICE_ATTR(dev_loss_tmo, S_IRUGO | S_IWUSR,
324 show_srp_rport_dev_loss_tmo,
325 store_srp_rport_dev_loss_tmo);
326
327static int srp_rport_set_state(struct srp_rport *rport,
328 enum srp_rport_state new_state)
329{
330 enum srp_rport_state old_state = rport->state;
331
332 lockdep_assert_held(&rport->mutex);
333
334 switch (new_state) {
335 case SRP_RPORT_RUNNING:
336 switch (old_state) {
337 case SRP_RPORT_LOST:
338 goto invalid;
339 default:
340 break;
341 }
342 break;
343 case SRP_RPORT_BLOCKED:
344 switch (old_state) {
345 case SRP_RPORT_RUNNING:
346 break;
347 default:
348 goto invalid;
349 }
350 break;
351 case SRP_RPORT_FAIL_FAST:
352 switch (old_state) {
353 case SRP_RPORT_LOST:
354 goto invalid;
355 default:
356 break;
357 }
358 break;
359 case SRP_RPORT_LOST:
360 break;
361 }
362 rport->state = new_state;
363 return 0;
364
365invalid:
366 return -EINVAL;
367}
368
369/**
370 * srp_reconnect_work() - reconnect and schedule a new attempt if necessary
371 */
372static void srp_reconnect_work(struct work_struct *work)
373{
374 struct srp_rport *rport = container_of(to_delayed_work(work),
375 struct srp_rport, reconnect_work);
376 struct Scsi_Host *shost = rport_to_shost(rport);
377 int delay, res;
378
379 res = srp_reconnect_rport(rport);
380 if (res != 0) {
381 shost_printk(KERN_ERR, shost,
382 "reconnect attempt %d failed (%d)\n",
383 ++rport->failed_reconnects, res);
384 delay = rport->reconnect_delay *
385 min(100, max(1, rport->failed_reconnects - 10));
386 if (delay > 0)
387 queue_delayed_work(system_long_wq,
388 &rport->reconnect_work, delay * HZ);
389 }
390}
391
392static void __rport_fail_io_fast(struct srp_rport *rport)
393{
394 struct Scsi_Host *shost = rport_to_shost(rport);
395 struct srp_internal *i;
396
397 lockdep_assert_held(&rport->mutex);
398
399 if (srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST))
400 return;
401 scsi_target_unblock(rport->dev.parent, SDEV_TRANSPORT_OFFLINE);
402
403 /* Involve the LLD if possible to terminate all I/O on the rport. */
404 i = to_srp_internal(shost->transportt);
405 if (i->f->terminate_rport_io)
406 i->f->terminate_rport_io(rport);
407}
408
409/**
410 * rport_fast_io_fail_timedout() - fast I/O failure timeout handler
411 */
412static void rport_fast_io_fail_timedout(struct work_struct *work)
413{
414 struct srp_rport *rport = container_of(to_delayed_work(work),
415 struct srp_rport, fast_io_fail_work);
416 struct Scsi_Host *shost = rport_to_shost(rport);
417
418 pr_info("fast_io_fail_tmo expired for SRP %s / %s.\n",
419 dev_name(&rport->dev), dev_name(&shost->shost_gendev));
420
421 mutex_lock(&rport->mutex);
422 if (rport->state == SRP_RPORT_BLOCKED)
423 __rport_fail_io_fast(rport);
424 mutex_unlock(&rport->mutex);
425}
426
427/**
428 * rport_dev_loss_timedout() - device loss timeout handler
429 */
430static void rport_dev_loss_timedout(struct work_struct *work)
431{
432 struct srp_rport *rport = container_of(to_delayed_work(work),
433 struct srp_rport, dev_loss_work);
434 struct Scsi_Host *shost = rport_to_shost(rport);
435 struct srp_internal *i = to_srp_internal(shost->transportt);
436
437 pr_info("dev_loss_tmo expired for SRP %s / %s.\n",
438 dev_name(&rport->dev), dev_name(&shost->shost_gendev));
439
440 mutex_lock(&rport->mutex);
441 WARN_ON(srp_rport_set_state(rport, SRP_RPORT_LOST) != 0);
442 scsi_target_unblock(rport->dev.parent, SDEV_TRANSPORT_OFFLINE);
443 mutex_unlock(&rport->mutex);
444
445 i->f->rport_delete(rport);
446}
447
448static void __srp_start_tl_fail_timers(struct srp_rport *rport)
449{
450 struct Scsi_Host *shost = rport_to_shost(rport);
451 int delay, fast_io_fail_tmo, dev_loss_tmo;
452
453 lockdep_assert_held(&rport->mutex);
454
455 if (!rport->deleted) {
456 delay = rport->reconnect_delay;
457 fast_io_fail_tmo = rport->fast_io_fail_tmo;
458 dev_loss_tmo = rport->dev_loss_tmo;
459 pr_debug("%s current state: %d\n",
460 dev_name(&shost->shost_gendev), rport->state);
461
462 if (delay > 0)
463 queue_delayed_work(system_long_wq,
464 &rport->reconnect_work,
465 1UL * delay * HZ);
466 if (fast_io_fail_tmo >= 0 &&
467 srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
468 pr_debug("%s new state: %d\n",
469 dev_name(&shost->shost_gendev),
470 rport->state);
471 scsi_target_block(&shost->shost_gendev);
472 queue_delayed_work(system_long_wq,
473 &rport->fast_io_fail_work,
474 1UL * fast_io_fail_tmo * HZ);
475 }
476 if (dev_loss_tmo >= 0)
477 queue_delayed_work(system_long_wq,
478 &rport->dev_loss_work,
479 1UL * dev_loss_tmo * HZ);
480 } else {
481 pr_debug("%s has already been deleted\n",
482 dev_name(&shost->shost_gendev));
483 srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST);
484 scsi_target_unblock(&shost->shost_gendev,
485 SDEV_TRANSPORT_OFFLINE);
486 }
487}
488
489/**
490 * srp_start_tl_fail_timers() - start the transport layer failure timers
491 *
492 * Start the transport layer fast I/O failure and device loss timers. Do not
493 * modify a timer that was already started.
494 */
495void srp_start_tl_fail_timers(struct srp_rport *rport)
496{
497 mutex_lock(&rport->mutex);
498 __srp_start_tl_fail_timers(rport);
499 mutex_unlock(&rport->mutex);
500}
501EXPORT_SYMBOL(srp_start_tl_fail_timers);
502
503/**
504 * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn()
505 */
506static int scsi_request_fn_active(struct Scsi_Host *shost)
507{
508 struct scsi_device *sdev;
509 struct request_queue *q;
510 int request_fn_active = 0;
511
512 shost_for_each_device(sdev, shost) {
513 q = sdev->request_queue;
514
515 spin_lock_irq(q->queue_lock);
516 request_fn_active += q->request_fn_active;
517 spin_unlock_irq(q->queue_lock);
518 }
519
520 return request_fn_active;
521}
522
523/**
524 * srp_reconnect_rport() - reconnect to an SRP target port
525 *
526 * Blocks SCSI command queueing before invoking reconnect() such that
527 * queuecommand() won't be invoked concurrently with reconnect() from outside
528 * the SCSI EH. This is important since a reconnect() implementation may
529 * reallocate resources needed by queuecommand().
530 *
531 * Notes:
532 * - This function neither waits until outstanding requests have finished nor
533 * tries to abort these. It is the responsibility of the reconnect()
534 * function to finish outstanding commands before reconnecting to the target
535 * port.
536 * - It is the responsibility of the caller to ensure that the resources
537 * reallocated by the reconnect() function won't be used while this function
538 * is in progress. One possible strategy is to invoke this function from
539 * the context of the SCSI EH thread only. Another possible strategy is to
540 * lock the rport mutex inside each SCSI LLD callback that can be invoked by
541 * the SCSI EH (the scsi_host_template.eh_*() functions and also the
542 * scsi_host_template.queuecommand() function).
543 */
544int srp_reconnect_rport(struct srp_rport *rport)
545{
546 struct Scsi_Host *shost = rport_to_shost(rport);
547 struct srp_internal *i = to_srp_internal(shost->transportt);
548 struct scsi_device *sdev;
549 int res;
550
551 pr_debug("SCSI host %s\n", dev_name(&shost->shost_gendev));
552
553 res = mutex_lock_interruptible(&rport->mutex);
554 if (res)
555 goto out;
556 scsi_target_block(&shost->shost_gendev);
557 while (scsi_request_fn_active(shost))
558 msleep(20);
559 res = i->f->reconnect(rport);
560 pr_debug("%s (state %d): transport.reconnect() returned %d\n",
561 dev_name(&shost->shost_gendev), rport->state, res);
562 if (res == 0) {
563 cancel_delayed_work(&rport->fast_io_fail_work);
564 cancel_delayed_work(&rport->dev_loss_work);
565
566 rport->failed_reconnects = 0;
567 srp_rport_set_state(rport, SRP_RPORT_RUNNING);
568 scsi_target_unblock(&shost->shost_gendev, SDEV_RUNNING);
569 /*
570 * If the SCSI error handler has offlined one or more devices,
571 * invoking scsi_target_unblock() won't change the state of
572 * these devices into running so do that explicitly.
573 */
574 spin_lock_irq(shost->host_lock);
575 __shost_for_each_device(sdev, shost)
576 if (sdev->sdev_state == SDEV_OFFLINE)
577 sdev->sdev_state = SDEV_RUNNING;
578 spin_unlock_irq(shost->host_lock);
579 } else if (rport->state == SRP_RPORT_RUNNING) {
580 /*
581 * srp_reconnect_rport() was invoked with fast_io_fail
582 * off. Mark the port as failed and start the TL failure
583 * timers if these had not yet been started.
584 */
585 __rport_fail_io_fast(rport);
586 scsi_target_unblock(&shost->shost_gendev,
587 SDEV_TRANSPORT_OFFLINE);
588 __srp_start_tl_fail_timers(rport);
589 } else if (rport->state != SRP_RPORT_BLOCKED) {
590 scsi_target_unblock(&shost->shost_gendev,
591 SDEV_TRANSPORT_OFFLINE);
592 }
593 mutex_unlock(&rport->mutex);
594
595out:
596 return res;
597}
598EXPORT_SYMBOL(srp_reconnect_rport);
599
600/**
601 * srp_timed_out() - SRP transport intercept of the SCSI timeout EH
602 *
603 * If a timeout occurs while an rport is in the blocked state, ask the SCSI
604 * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core
605 * handle the timeout (BLK_EH_NOT_HANDLED).
606 *
607 * Note: This function is called from soft-IRQ context and with the request
608 * queue lock held.
609 */
610static enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd)
611{
612 struct scsi_device *sdev = scmd->device;
613 struct Scsi_Host *shost = sdev->host;
614 struct srp_internal *i = to_srp_internal(shost->transportt);
615
616 pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev));
617 return i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ?
618 BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED;
619}
620
137static void srp_rport_release(struct device *dev) 621static void srp_rport_release(struct device *dev)
138{ 622{
139 struct srp_rport *rport = dev_to_rport(dev); 623 struct srp_rport *rport = dev_to_rport(dev);
140 624
625 cancel_delayed_work_sync(&rport->reconnect_work);
626 cancel_delayed_work_sync(&rport->fast_io_fail_work);
627 cancel_delayed_work_sync(&rport->dev_loss_work);
628
141 put_device(dev->parent); 629 put_device(dev->parent);
142 kfree(rport); 630 kfree(rport);
143} 631}
@@ -185,6 +673,24 @@ static int srp_host_match(struct attribute_container *cont, struct device *dev)
185} 673}
186 674
187/** 675/**
676 * srp_rport_get() - increment rport reference count
677 */
678void srp_rport_get(struct srp_rport *rport)
679{
680 get_device(&rport->dev);
681}
682EXPORT_SYMBOL(srp_rport_get);
683
684/**
685 * srp_rport_put() - decrement rport reference count
686 */
687void srp_rport_put(struct srp_rport *rport)
688{
689 put_device(&rport->dev);
690}
691EXPORT_SYMBOL(srp_rport_put);
692
693/**
188 * srp_rport_add - add a SRP remote port to the device hierarchy 694 * srp_rport_add - add a SRP remote port to the device hierarchy
189 * @shost: scsi host the remote port is connected to. 695 * @shost: scsi host the remote port is connected to.
190 * @ids: The port id for the remote port. 696 * @ids: The port id for the remote port.
@@ -196,12 +702,15 @@ struct srp_rport *srp_rport_add(struct Scsi_Host *shost,
196{ 702{
197 struct srp_rport *rport; 703 struct srp_rport *rport;
198 struct device *parent = &shost->shost_gendev; 704 struct device *parent = &shost->shost_gendev;
705 struct srp_internal *i = to_srp_internal(shost->transportt);
199 int id, ret; 706 int id, ret;
200 707
201 rport = kzalloc(sizeof(*rport), GFP_KERNEL); 708 rport = kzalloc(sizeof(*rport), GFP_KERNEL);
202 if (!rport) 709 if (!rport)
203 return ERR_PTR(-ENOMEM); 710 return ERR_PTR(-ENOMEM);
204 711
712 mutex_init(&rport->mutex);
713
205 device_initialize(&rport->dev); 714 device_initialize(&rport->dev);
206 715
207 rport->dev.parent = get_device(parent); 716 rport->dev.parent = get_device(parent);
@@ -210,6 +719,17 @@ struct srp_rport *srp_rport_add(struct Scsi_Host *shost,
210 memcpy(rport->port_id, ids->port_id, sizeof(rport->port_id)); 719 memcpy(rport->port_id, ids->port_id, sizeof(rport->port_id));
211 rport->roles = ids->roles; 720 rport->roles = ids->roles;
212 721
722 if (i->f->reconnect)
723 rport->reconnect_delay = i->f->reconnect_delay ?
724 *i->f->reconnect_delay : 10;
725 INIT_DELAYED_WORK(&rport->reconnect_work, srp_reconnect_work);
726 rport->fast_io_fail_tmo = i->f->fast_io_fail_tmo ?
727 *i->f->fast_io_fail_tmo : 15;
728 rport->dev_loss_tmo = i->f->dev_loss_tmo ? *i->f->dev_loss_tmo : 60;
729 INIT_DELAYED_WORK(&rport->fast_io_fail_work,
730 rport_fast_io_fail_timedout);
731 INIT_DELAYED_WORK(&rport->dev_loss_work, rport_dev_loss_timedout);
732
213 id = atomic_inc_return(&to_srp_host_attrs(shost)->next_port_id); 733 id = atomic_inc_return(&to_srp_host_attrs(shost)->next_port_id);
214 dev_set_name(&rport->dev, "port-%d:%d", shost->host_no, id); 734 dev_set_name(&rport->dev, "port-%d:%d", shost->host_no, id);
215 735
@@ -259,6 +779,13 @@ void srp_rport_del(struct srp_rport *rport)
259 transport_remove_device(dev); 779 transport_remove_device(dev);
260 device_del(dev); 780 device_del(dev);
261 transport_destroy_device(dev); 781 transport_destroy_device(dev);
782
783 mutex_lock(&rport->mutex);
784 if (rport->state == SRP_RPORT_BLOCKED)
785 __rport_fail_io_fast(rport);
786 rport->deleted = true;
787 mutex_unlock(&rport->mutex);
788
262 put_device(dev); 789 put_device(dev);
263} 790}
264EXPORT_SYMBOL_GPL(srp_rport_del); 791EXPORT_SYMBOL_GPL(srp_rport_del);
@@ -310,6 +837,8 @@ srp_attach_transport(struct srp_function_template *ft)
310 if (!i) 837 if (!i)
311 return NULL; 838 return NULL;
312 839
840 i->t.eh_timed_out = srp_timed_out;
841
313 i->t.tsk_mgmt_response = srp_tsk_mgmt_response; 842 i->t.tsk_mgmt_response = srp_tsk_mgmt_response;
314 i->t.it_nexus_response = srp_it_nexus_response; 843 i->t.it_nexus_response = srp_it_nexus_response;
315 844
@@ -327,6 +856,15 @@ srp_attach_transport(struct srp_function_template *ft)
327 count = 0; 856 count = 0;
328 i->rport_attrs[count++] = &dev_attr_port_id; 857 i->rport_attrs[count++] = &dev_attr_port_id;
329 i->rport_attrs[count++] = &dev_attr_roles; 858 i->rport_attrs[count++] = &dev_attr_roles;
859 if (ft->has_rport_state) {
860 i->rport_attrs[count++] = &dev_attr_state;
861 i->rport_attrs[count++] = &dev_attr_fast_io_fail_tmo;
862 i->rport_attrs[count++] = &dev_attr_dev_loss_tmo;
863 }
864 if (ft->reconnect) {
865 i->rport_attrs[count++] = &dev_attr_reconnect_delay;
866 i->rport_attrs[count++] = &dev_attr_failed_reconnects;
867 }
330 if (ft->rport_delete) 868 if (ft->rport_delete)
331 i->rport_attrs[count++] = &dev_attr_delete; 869 i->rport_attrs[count++] = &dev_attr_delete;
332 i->rport_attrs[count++] = NULL; 870 i->rport_attrs[count++] = NULL;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e393171e2fac..979874c627ee 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -67,12 +67,14 @@ enum rdma_node_type {
67 RDMA_NODE_IB_CA = 1, 67 RDMA_NODE_IB_CA = 1,
68 RDMA_NODE_IB_SWITCH, 68 RDMA_NODE_IB_SWITCH,
69 RDMA_NODE_IB_ROUTER, 69 RDMA_NODE_IB_ROUTER,
70 RDMA_NODE_RNIC 70 RDMA_NODE_RNIC,
71 RDMA_NODE_USNIC,
71}; 72};
72 73
73enum rdma_transport_type { 74enum rdma_transport_type {
74 RDMA_TRANSPORT_IB, 75 RDMA_TRANSPORT_IB,
75 RDMA_TRANSPORT_IWARP 76 RDMA_TRANSPORT_IWARP,
77 RDMA_TRANSPORT_USNIC
76}; 78};
77 79
78enum rdma_transport_type 80enum rdma_transport_type
@@ -1436,6 +1438,7 @@ struct ib_device {
1436 1438
1437 int uverbs_abi_ver; 1439 int uverbs_abi_ver;
1438 u64 uverbs_cmd_mask; 1440 u64 uverbs_cmd_mask;
1441 u64 uverbs_ex_cmd_mask;
1439 1442
1440 char node_desc[64]; 1443 char node_desc[64];
1441 __be64 node_guid; 1444 __be64 node_guid;
@@ -2384,4 +2387,17 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
2384 struct ib_flow_attr *flow_attr, int domain); 2387 struct ib_flow_attr *flow_attr, int domain);
2385int ib_destroy_flow(struct ib_flow *flow_id); 2388int ib_destroy_flow(struct ib_flow *flow_id);
2386 2389
2390static inline int ib_check_mr_access(int flags)
2391{
2392 /*
2393 * Local write permission is required if remote write or
2394 * remote atomic permission is also requested.
2395 */
2396 if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
2397 !(flags & IB_ACCESS_LOCAL_WRITE))
2398 return -EINVAL;
2399
2400 return 0;
2401}
2402
2387#endif /* IB_VERBS_H */ 2403#endif /* IB_VERBS_H */
diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h
index ff0f04ac91aa..4ebf6913b7b2 100644
--- a/include/scsi/scsi_transport_srp.h
+++ b/include/scsi/scsi_transport_srp.h
@@ -13,6 +13,27 @@ struct srp_rport_identifiers {
13 u8 roles; 13 u8 roles;
14}; 14};
15 15
16/**
17 * enum srp_rport_state - SRP transport layer state
18 * @SRP_RPORT_RUNNING: Transport layer operational.
19 * @SRP_RPORT_BLOCKED: Transport layer not operational; fast I/O fail timer
20 * is running and I/O has been blocked.
21 * @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast.
22 * @SRP_RPORT_LOST: Device loss timer has expired; port is being removed.
23 */
24enum srp_rport_state {
25 SRP_RPORT_RUNNING,
26 SRP_RPORT_BLOCKED,
27 SRP_RPORT_FAIL_FAST,
28 SRP_RPORT_LOST,
29};
30
31/**
32 * struct srp_rport
33 * @lld_data: LLD private data.
34 * @mutex: Protects against concurrent rport reconnect / fast_io_fail /
35 * dev_loss_tmo activity.
36 */
16struct srp_rport { 37struct srp_rport {
17 /* for initiator and target drivers */ 38 /* for initiator and target drivers */
18 39
@@ -23,11 +44,43 @@ struct srp_rport {
23 44
24 /* for initiator drivers */ 45 /* for initiator drivers */
25 46
26 void *lld_data; /* LLD private data */ 47 void *lld_data;
48
49 struct mutex mutex;
50 enum srp_rport_state state;
51 bool deleted;
52 int reconnect_delay;
53 int failed_reconnects;
54 struct delayed_work reconnect_work;
55 int fast_io_fail_tmo;
56 int dev_loss_tmo;
57 struct delayed_work fast_io_fail_work;
58 struct delayed_work dev_loss_work;
27}; 59};
28 60
61/**
62 * struct srp_function_template
63 * @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and
64 * dev_loss_tmo sysfs attribute for an rport.
65 * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command
66 * timer if the device on which it has been queued is blocked.
67 * @reconnect_delay: If not NULL, points to the default reconnect_delay value.
68 * @fast_io_fail_tmo: If not NULL, points to the default fast_io_fail_tmo value.
69 * @dev_loss_tmo: If not NULL, points to the default dev_loss_tmo value.
70 * @reconnect: Callback function for reconnecting to the target. See also
71 * srp_reconnect_rport().
72 * @terminate_rport_io: Callback function for terminating all outstanding I/O
73 * requests for an rport.
74 */
29struct srp_function_template { 75struct srp_function_template {
30 /* for initiator drivers */ 76 /* for initiator drivers */
77 bool has_rport_state;
78 bool reset_timer_if_blocked;
79 int *reconnect_delay;
80 int *fast_io_fail_tmo;
81 int *dev_loss_tmo;
82 int (*reconnect)(struct srp_rport *rport);
83 void (*terminate_rport_io)(struct srp_rport *rport);
31 void (*rport_delete)(struct srp_rport *rport); 84 void (*rport_delete)(struct srp_rport *rport);
32 /* for target drivers */ 85 /* for target drivers */
33 int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int); 86 int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int);
@@ -38,10 +91,36 @@ extern struct scsi_transport_template *
38srp_attach_transport(struct srp_function_template *); 91srp_attach_transport(struct srp_function_template *);
39extern void srp_release_transport(struct scsi_transport_template *); 92extern void srp_release_transport(struct scsi_transport_template *);
40 93
94extern void srp_rport_get(struct srp_rport *rport);
95extern void srp_rport_put(struct srp_rport *rport);
41extern struct srp_rport *srp_rport_add(struct Scsi_Host *, 96extern struct srp_rport *srp_rport_add(struct Scsi_Host *,
42 struct srp_rport_identifiers *); 97 struct srp_rport_identifiers *);
43extern void srp_rport_del(struct srp_rport *); 98extern void srp_rport_del(struct srp_rport *);
44 99extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo,
100 int dev_loss_tmo);
101extern int srp_reconnect_rport(struct srp_rport *rport);
102extern void srp_start_tl_fail_timers(struct srp_rport *rport);
45extern void srp_remove_host(struct Scsi_Host *); 103extern void srp_remove_host(struct Scsi_Host *);
46 104
105/**
106 * srp_chkready() - evaluate the transport layer state before I/O
107 *
108 * Returns a SCSI result code that can be returned by the LLD queuecommand()
109 * implementation. The role of this function is similar to that of
110 * fc_remote_port_chkready().
111 */
112static inline int srp_chkready(struct srp_rport *rport)
113{
114 switch (rport->state) {
115 case SRP_RPORT_RUNNING:
116 case SRP_RPORT_BLOCKED:
117 default:
118 return 0;
119 case SRP_RPORT_FAIL_FAST:
120 return DID_TRANSPORT_FAILFAST << 16;
121 case SRP_RPORT_LOST:
122 return DID_NO_CONNECT << 16;
123 }
124}
125
47#endif 126#endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index e3ddd86c90a6..cbfdd4ca9510 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -87,10 +87,11 @@ enum {
87 IB_USER_VERBS_CMD_CLOSE_XRCD, 87 IB_USER_VERBS_CMD_CLOSE_XRCD,
88 IB_USER_VERBS_CMD_CREATE_XSRQ, 88 IB_USER_VERBS_CMD_CREATE_XSRQ,
89 IB_USER_VERBS_CMD_OPEN_QP, 89 IB_USER_VERBS_CMD_OPEN_QP,
90#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 90};
91 IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, 91
92 IB_USER_VERBS_CMD_DESTROY_FLOW 92enum {
93#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ 93 IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
94 IB_USER_VERBS_EX_CMD_DESTROY_FLOW
94}; 95};
95 96
96/* 97/*
@@ -122,22 +123,24 @@ struct ib_uverbs_comp_event_desc {
122 * the rest of the command struct based on these value. 123 * the rest of the command struct based on these value.
123 */ 124 */
124 125
126#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff
127#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u
128#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24
129
130#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80
131
125struct ib_uverbs_cmd_hdr { 132struct ib_uverbs_cmd_hdr {
126 __u32 command; 133 __u32 command;
127 __u16 in_words; 134 __u16 in_words;
128 __u16 out_words; 135 __u16 out_words;
129}; 136};
130 137
131#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 138struct ib_uverbs_ex_cmd_hdr {
132struct ib_uverbs_cmd_hdr_ex { 139 __u64 response;
133 __u32 command;
134 __u16 in_words;
135 __u16 out_words;
136 __u16 provider_in_words; 140 __u16 provider_in_words;
137 __u16 provider_out_words; 141 __u16 provider_out_words;
138 __u32 cmd_hdr_reserved; 142 __u32 cmd_hdr_reserved;
139}; 143};
140#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
141 144
142struct ib_uverbs_get_context { 145struct ib_uverbs_get_context {
143 __u64 response; 146 __u64 response;
@@ -700,62 +703,71 @@ struct ib_uverbs_detach_mcast {
700 __u64 driver_data[0]; 703 __u64 driver_data[0];
701}; 704};
702 705
703#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING 706struct ib_uverbs_flow_spec_hdr {
704struct ib_kern_eth_filter { 707 __u32 type;
708 __u16 size;
709 __u16 reserved;
710 /* followed by flow_spec */
711 __u64 flow_spec_data[0];
712};
713
714struct ib_uverbs_flow_eth_filter {
705 __u8 dst_mac[6]; 715 __u8 dst_mac[6];
706 __u8 src_mac[6]; 716 __u8 src_mac[6];
707 __be16 ether_type; 717 __be16 ether_type;
708 __be16 vlan_tag; 718 __be16 vlan_tag;
709}; 719};
710 720
711struct ib_kern_spec_eth { 721struct ib_uverbs_flow_spec_eth {
712 __u32 type; 722 union {
713 __u16 size; 723 struct ib_uverbs_flow_spec_hdr hdr;
714 __u16 reserved; 724 struct {
715 struct ib_kern_eth_filter val; 725 __u32 type;
716 struct ib_kern_eth_filter mask; 726 __u16 size;
727 __u16 reserved;
728 };
729 };
730 struct ib_uverbs_flow_eth_filter val;
731 struct ib_uverbs_flow_eth_filter mask;
717}; 732};
718 733
719struct ib_kern_ipv4_filter { 734struct ib_uverbs_flow_ipv4_filter {
720 __be32 src_ip; 735 __be32 src_ip;
721 __be32 dst_ip; 736 __be32 dst_ip;
722}; 737};
723 738
724struct ib_kern_spec_ipv4 { 739struct ib_uverbs_flow_spec_ipv4 {
725 __u32 type; 740 union {
726 __u16 size; 741 struct ib_uverbs_flow_spec_hdr hdr;
727 __u16 reserved; 742 struct {
728 struct ib_kern_ipv4_filter val; 743 __u32 type;
729 struct ib_kern_ipv4_filter mask; 744 __u16 size;
745 __u16 reserved;
746 };
747 };
748 struct ib_uverbs_flow_ipv4_filter val;
749 struct ib_uverbs_flow_ipv4_filter mask;
730}; 750};
731 751
732struct ib_kern_tcp_udp_filter { 752struct ib_uverbs_flow_tcp_udp_filter {
733 __be16 dst_port; 753 __be16 dst_port;
734 __be16 src_port; 754 __be16 src_port;
735}; 755};
736 756
737struct ib_kern_spec_tcp_udp { 757struct ib_uverbs_flow_spec_tcp_udp {
738 __u32 type;
739 __u16 size;
740 __u16 reserved;
741 struct ib_kern_tcp_udp_filter val;
742 struct ib_kern_tcp_udp_filter mask;
743};
744
745struct ib_kern_spec {
746 union { 758 union {
759 struct ib_uverbs_flow_spec_hdr hdr;
747 struct { 760 struct {
748 __u32 type; 761 __u32 type;
749 __u16 size; 762 __u16 size;
750 __u16 reserved; 763 __u16 reserved;
751 }; 764 };
752 struct ib_kern_spec_eth eth;
753 struct ib_kern_spec_ipv4 ipv4;
754 struct ib_kern_spec_tcp_udp tcp_udp;
755 }; 765 };
766 struct ib_uverbs_flow_tcp_udp_filter val;
767 struct ib_uverbs_flow_tcp_udp_filter mask;
756}; 768};
757 769
758struct ib_kern_flow_attr { 770struct ib_uverbs_flow_attr {
759 __u32 type; 771 __u32 type;
760 __u16 size; 772 __u16 size;
761 __u16 priority; 773 __u16 priority;
@@ -767,13 +779,13 @@ struct ib_kern_flow_attr {
767 * struct ib_flow_spec_xxx 779 * struct ib_flow_spec_xxx
768 * struct ib_flow_spec_yyy 780 * struct ib_flow_spec_yyy
769 */ 781 */
782 struct ib_uverbs_flow_spec_hdr flow_specs[0];
770}; 783};
771 784
772struct ib_uverbs_create_flow { 785struct ib_uverbs_create_flow {
773 __u32 comp_mask; 786 __u32 comp_mask;
774 __u64 response;
775 __u32 qp_handle; 787 __u32 qp_handle;
776 struct ib_kern_flow_attr flow_attr; 788 struct ib_uverbs_flow_attr flow_attr;
777}; 789};
778 790
779struct ib_uverbs_create_flow_resp { 791struct ib_uverbs_create_flow_resp {
@@ -785,7 +797,6 @@ struct ib_uverbs_destroy_flow {
785 __u32 comp_mask; 797 __u32 comp_mask;
786 __u32 flow_handle; 798 __u32 flow_handle;
787}; 799};
788#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
789 800
790struct ib_uverbs_create_srq { 801struct ib_uverbs_create_srq {
791 __u64 response; 802 __u64 response;