aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-13 22:19:09 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-13 22:19:09 -0500
commitf132c54e3ab25b305a1e368ad413a417052c966e (patch)
treee34ef551cd3e544f9b8599728e56cce292703179
parentd3b43e12b2c8c69f79ab76dcdc5956f47c376378 (diff)
parent01e03365981ebd99f1b4027dbf7c215d1c136f71 (diff)
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull infiniband upate from Roland Dreier: "First batch of InfiniBand/RDMA changes for the 3.8 merge window: - A good chunk of Bart Van Assche's SRP fixes - UAPI disintegration from David Howells - mlx4 support for "64-byte CQE" hardware feature from Or Gerlitz - Other miscellaneous fixes" Fix up trivial conflict in mellanox/mlx4 driver. * tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (33 commits) RDMA/nes: Fix for crash when registering zero length MR for CQ RDMA/nes: Fix for terminate timer crash RDMA/nes: Fix for BUG_ON due to adding already-pending timer IB/srp: Allow SRP disconnect through sysfs srp_transport: Document sysfs attributes srp_transport: Simplify attribute initialization code srp_transport: Fix attribute registration IB/srp: Document sysfs attributes IB/srp: send disconnect request without waiting for CM timewait exit IB/srp: destroy and recreate QP and CQs when reconnecting IB/srp: Eliminate state SRP_TARGET_DEAD IB/srp: Introduce the helper function srp_remove_target() IB/srp: Suppress superfluous error messages IB/srp: Process all error completions IB/srp: Introduce srp_handle_qp_err() IB/srp: Simplify SCSI error handling IB/srp: Keep processing commands during host removal IB/srp: Eliminate state SRP_TARGET_CONNECTING IB/srp: Increase block layer timeout RDMA/cm: Change return value from find_gid_port() ...
-rw-r--r--Documentation/ABI/stable/sysfs-driver-ib_srp156
-rw-r--r--Documentation/ABI/stable/sysfs-transport-srp19
-rw-r--r--drivers/infiniband/core/cma.c9
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c6
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c10
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c4
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c34
-rw-r--r--drivers/infiniband/hw/mlx4/main.c27
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx4/user.h12
-rw-r--r--drivers/infiniband/hw/nes/nes.h1
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c32
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c9
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c42
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c13
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c9
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c314
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_cq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h1
-rw-r--r--drivers/scsi/scsi_transport_srp.c51
-rw-r--r--include/linux/mlx4/device.h21
-rw-r--r--include/rdma/Kbuild6
-rw-r--r--include/rdma/rdma_netlink.h36
-rw-r--r--include/scsi/scsi_transport_srp.h8
-rw-r--r--include/uapi/rdma/Kbuild6
-rw-r--r--include/uapi/rdma/ib_user_cm.h (renamed from include/rdma/ib_user_cm.h)0
-rw-r--r--include/uapi/rdma/ib_user_mad.h (renamed from include/rdma/ib_user_mad.h)0
-rw-r--r--include/uapi/rdma/ib_user_sa.h (renamed from include/rdma/ib_user_sa.h)0
-rw-r--r--include/uapi/rdma/ib_user_verbs.h (renamed from include/rdma/ib_user_verbs.h)0
-rw-r--r--include/uapi/rdma/rdma_netlink.h37
-rw-r--r--include/uapi/rdma/rdma_user_cm.h (renamed from include/rdma/rdma_user_cm.h)0
42 files changed, 689 insertions, 322 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp
new file mode 100644
index 000000000000..481aae95c7d1
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-ib_srp
@@ -0,0 +1,156 @@
1What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/add_target
2Date: January 2, 2006
3KernelVersion: 2.6.15
4Contact: linux-rdma@vger.kernel.org
5Description: Interface for making ib_srp connect to a new target.
6 One can request ib_srp to connect to a new target by writing
7 a comma-separated list of login parameters to this sysfs
8 attribute. The supported parameters are:
9 * id_ext, a 16-digit hexadecimal number specifying the eight
10 byte identifier extension in the 16-byte SRP target port
11 identifier. The target port identifier is sent by ib_srp
12 to the target in the SRP_LOGIN_REQ request.
13 * ioc_guid, a 16-digit hexadecimal number specifying the eight
14 byte I/O controller GUID portion of the 16-byte target port
15 identifier.
16 * dgid, a 32-digit hexadecimal number specifying the
17 destination GID.
18 * pkey, a four-digit hexadecimal number specifying the
19 InfiniBand partition key.
20 * service_id, a 16-digit hexadecimal number specifying the
21 InfiniBand service ID used to establish communication with
22 the SRP target. How to find out the value of the service ID
23 is specified in the documentation of the SRP target.
24 * max_sect, a decimal number specifying the maximum number of
25 512-byte sectors to be transferred via a single SCSI command.
26 * max_cmd_per_lun, a decimal number specifying the maximum
27 number of outstanding commands for a single LUN.
28 * io_class, a hexadecimal number specifying the SRP I/O class.
29 Must be either 0xff00 (rev 10) or 0x0100 (rev 16a). The I/O
30 class defines the format of the SRP initiator and target
31 port identifiers.
32 * initiator_ext, a 16-digit hexadecimal number specifying the
33 identifier extension portion of the SRP initiator port
34 identifier. This data is sent by the initiator to the target
35 in the SRP_LOGIN_REQ request.
36 * cmd_sg_entries, a number in the range 1..255 that specifies
37 the maximum number of data buffer descriptors stored in the
38 SRP_CMD information unit itself. With allow_ext_sg=0 the
39 parameter cmd_sg_entries defines the maximum S/G list length
40 for a single SRP_CMD, and commands whose S/G list length
41 exceeds this limit after S/G list collapsing will fail.
42 * allow_ext_sg, whether ib_srp is allowed to include a partial
43 memory descriptor list in an SRP_CMD instead of the entire
44 list. If a partial memory descriptor list has been included
45 in an SRP_CMD the remaining memory descriptors are
46 communicated from initiator to target via an additional RDMA
47 transfer. Setting allow_ext_sg to 1 increases the maximum
48 amount of data that can be transferred between initiator and
49 target via a single SCSI command. Since not all SRP target
50 implementations support partial memory descriptor lists the
51 default value for this option is 0.
52 * sg_tablesize, a number in the range 1..2048 specifying the
53 maximum S/G list length the SCSI layer is allowed to pass to
54 ib_srp. Specifying a value that exceeds cmd_sg_entries is
55 only safe with partial memory descriptor list support enabled
56 (allow_ext_sg=1).
57
58What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev
59Date: January 2, 2006
60KernelVersion: 2.6.15
61Contact: linux-rdma@vger.kernel.org
62Description: HCA name (<hca>).
63
64What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/port
65Date: January 2, 2006
66KernelVersion: 2.6.15
67Contact: linux-rdma@vger.kernel.org
68Description: HCA port number (<port_number>).
69
70What: /sys/class/scsi_host/host<n>/allow_ext_sg
71Date: May 19, 2011
72KernelVersion: 2.6.39
73Contact: linux-rdma@vger.kernel.org
74Description: Whether ib_srp is allowed to include a partial memory
75 descriptor list in an SRP_CMD when communicating with an SRP
76 target.
77
78What: /sys/class/scsi_host/host<n>/cmd_sg_entries
79Date: May 19, 2011
80KernelVersion: 2.6.39
81Contact: linux-rdma@vger.kernel.org
82Description: Maximum number of data buffer descriptors that may be sent to
83 the target in a single SRP_CMD request.
84
85What: /sys/class/scsi_host/host<n>/dgid
86Date: June 17, 2006
87KernelVersion: 2.6.17
88Contact: linux-rdma@vger.kernel.org
89Description: InfiniBand destination GID used for communication with the SRP
90 target. Differs from orig_dgid if port redirection has happened.
91
92What: /sys/class/scsi_host/host<n>/id_ext
93Date: June 17, 2006
94KernelVersion: 2.6.17
95Contact: linux-rdma@vger.kernel.org
96Description: Eight-byte identifier extension portion of the 16-byte target
97 port identifier.
98
99What: /sys/class/scsi_host/host<n>/ioc_guid
100Date: June 17, 2006
101KernelVersion: 2.6.17
102Contact: linux-rdma@vger.kernel.org
103Description: Eight-byte I/O controller GUID portion of the 16-byte target
104 port identifier.
105
106What: /sys/class/scsi_host/host<n>/local_ib_device
107Date: November 29, 2006
108KernelVersion: 2.6.19
109Contact: linux-rdma@vger.kernel.org
110Description: Name of the InfiniBand HCA used for communicating with the
111 SRP target.
112
113What: /sys/class/scsi_host/host<n>/local_ib_port
114Date: November 29, 2006
115KernelVersion: 2.6.19
116Contact: linux-rdma@vger.kernel.org
117Description: Number of the HCA port used for communicating with the
118 SRP target.
119
120What: /sys/class/scsi_host/host<n>/orig_dgid
121Date: June 17, 2006
122KernelVersion: 2.6.17
123Contact: linux-rdma@vger.kernel.org
124Description: InfiniBand destination GID specified in the parameters
125 written to the add_target sysfs attribute.
126
127What: /sys/class/scsi_host/host<n>/pkey
128Date: June 17, 2006
129KernelVersion: 2.6.17
130Contact: linux-rdma@vger.kernel.org
131Description: A 16-bit number representing the InfiniBand partition key used
132 for communication with the SRP target.
133
134What: /sys/class/scsi_host/host<n>/req_lim
135Date: October 20, 2010
136KernelVersion: 2.6.36
137Contact: linux-rdma@vger.kernel.org
138Description: Number of requests ib_srp can send to the target before it has
139 to wait for more credits. For more information see also the
140 SRP credit algorithm in the SRP specification.
141
142What: /sys/class/scsi_host/host<n>/service_id
143Date: June 17, 2006
144KernelVersion: 2.6.17
145Contact: linux-rdma@vger.kernel.org
146Description: InfiniBand service ID used for establishing communication with
147 the SRP target.
148
149What: /sys/class/scsi_host/host<n>/zero_req_lim
150Date: September 20, 2006
151KernelVersion: 2.6.18
152Contact: linux-rdma@vger.kernel.org
153Description: Number of times the initiator had to wait before sending a
154 request to the target because it ran out of credits. For more
155 information see also the SRP credit algorithm in the SRP
156 specification.
diff --git a/Documentation/ABI/stable/sysfs-transport-srp b/Documentation/ABI/stable/sysfs-transport-srp
new file mode 100644
index 000000000000..b36fb0dc13c8
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-transport-srp
@@ -0,0 +1,19 @@
1What: /sys/class/srp_remote_ports/port-<h>:<n>/delete
2Date: June 1, 2012
3KernelVersion: 3.7
4Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
5Description: Instructs an SRP initiator to disconnect from a target and to
6 remove all LUNs imported from that target.
7
8What: /sys/class/srp_remote_ports/port-<h>:<n>/port_id
9Date: June 27, 2007
10KernelVersion: 2.6.24
11Contact: linux-scsi@vger.kernel.org
12Description: 16-byte local SRP port identifier in hexadecimal format. An
13 example: 4c:49:4e:55:58:20:56:49:4f:00:00:00:00:00:00:00.
14
15What: /sys/class/srp_remote_ports/port-<h>:<n>/roles
16Date: June 27, 2007
17KernelVersion: 2.6.24
18Contact: linux-scsi@vger.kernel.org
19Description: Role of the remote port. Either "SRP Initiator" or "SRP Target".
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index a7568c34a1aa..d789eea32168 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -345,17 +345,17 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu
345 345
346 err = ib_query_port(device, port_num, &props); 346 err = ib_query_port(device, port_num, &props);
347 if (err) 347 if (err)
348 return 1; 348 return err;
349 349
350 for (i = 0; i < props.gid_tbl_len; ++i) { 350 for (i = 0; i < props.gid_tbl_len; ++i) {
351 err = ib_query_gid(device, port_num, i, &tmp); 351 err = ib_query_gid(device, port_num, i, &tmp);
352 if (err) 352 if (err)
353 return 1; 353 return err;
354 if (!memcmp(&tmp, gid, sizeof tmp)) 354 if (!memcmp(&tmp, gid, sizeof tmp))
355 return 0; 355 return 0;
356 } 356 }
357 357
358 return -EAGAIN; 358 return -EADDRNOTAVAIL;
359} 359}
360 360
361static int cma_acquire_dev(struct rdma_id_private *id_priv) 361static int cma_acquire_dev(struct rdma_id_private *id_priv)
@@ -388,8 +388,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
388 if (!ret) { 388 if (!ret) {
389 id_priv->id.port_num = port; 389 id_priv->id.port_num = port;
390 goto out; 390 goto out;
391 } else if (ret == 1) 391 }
392 break;
393 } 392 }
394 } 393 }
395 } 394 }
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
index 32d34e88d5cf..706cf97cbe8f 100644
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -311,6 +311,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
311 if (cq->ibcq.event_handler) 311 if (cq->ibcq.event_handler)
312 cq->ibcq.event_handler(&ib_event, 312 cq->ibcq.event_handler(&ib_event,
313 cq->ibcq.cq_context); 313 cq->ibcq.cq_context);
314 break;
314 } 315 }
315 316
316 default: 317 default:
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index aaf88ef9409c..3e094cd6a0e3 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -128,9 +128,8 @@ static void stop_ep_timer(struct iwch_ep *ep)
128{ 128{
129 PDBG("%s ep %p\n", __func__, ep); 129 PDBG("%s ep %p\n", __func__, ep);
130 if (!timer_pending(&ep->timer)) { 130 if (!timer_pending(&ep->timer)) {
131 printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n", 131 WARN(1, "%s timer stopped when its not running! ep %p state %u\n",
132 __func__, ep, ep->com.state); 132 __func__, ep, ep->com.state);
133 WARN_ON(1);
134 return; 133 return;
135 } 134 }
136 del_timer_sync(&ep->timer); 135 del_timer_sync(&ep->timer);
@@ -1756,9 +1755,8 @@ static void ep_timeout(unsigned long arg)
1756 __state_set(&ep->com, ABORTING); 1755 __state_set(&ep->com, ABORTING);
1757 break; 1756 break;
1758 default: 1757 default:
1759 printk(KERN_ERR "%s unexpected state ep %p state %u\n", 1758 WARN(1, "%s unexpected state ep %p state %u\n",
1760 __func__, ep, ep->com.state); 1759 __func__, ep, ep->com.state);
1761 WARN_ON(1);
1762 abort = 0; 1760 abort = 0;
1763 } 1761 }
1764 spin_unlock_irqrestore(&ep->com.lock, flags); 1762 spin_unlock_irqrestore(&ep->com.lock, flags);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 6cfd4d8fd0bd..5de86968379d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -151,9 +151,8 @@ static void stop_ep_timer(struct c4iw_ep *ep)
151{ 151{
152 PDBG("%s ep %p\n", __func__, ep); 152 PDBG("%s ep %p\n", __func__, ep);
153 if (!timer_pending(&ep->timer)) { 153 if (!timer_pending(&ep->timer)) {
154 printk(KERN_ERR "%s timer stopped when its not running! " 154 WARN(1, "%s timer stopped when its not running! "
155 "ep %p state %u\n", __func__, ep, ep->com.state); 155 "ep %p state %u\n", __func__, ep, ep->com.state);
156 WARN_ON(1);
157 return; 156 return;
158 } 157 }
159 del_timer_sync(&ep->timer); 158 del_timer_sync(&ep->timer);
@@ -2551,9 +2550,8 @@ static void process_timeout(struct c4iw_ep *ep)
2551 __state_set(&ep->com, ABORTING); 2550 __state_set(&ep->com, ABORTING);
2552 break; 2551 break;
2553 default: 2552 default:
2554 printk(KERN_ERR "%s unexpected state ep %p tid %u state %u\n", 2553 WARN(1, "%s unexpected state ep %p tid %u state %u\n",
2555 __func__, ep, ep->hwtid, ep->com.state); 2554 __func__, ep, ep->hwtid, ep->com.state);
2556 WARN_ON(1);
2557 abort = 0; 2555 abort = 0;
2558 } 2556 }
2559 mutex_unlock(&ep->com.mutex); 2557 mutex_unlock(&ep->com.mutex);
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 49b09c697c7c..be2a60e142b0 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -719,16 +719,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
719 goto done; 719 goto done;
720 720
721 /* 721 /*
722 * we ignore most issues after reporting them, but have to specially
723 * handle hardware-disabled chips.
724 */
725 if (ret == 2) {
726 /* unique error, known to ipath_init_one */
727 ret = -EPERM;
728 goto done;
729 }
730
731 /*
732 * We could bump this to allow for full rcvegrcnt + rcvtidcnt, 722 * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
733 * but then it no longer nicely fits power of two, and since 723 * but then it no longer nicely fits power of two, and since
734 * we now use routines that backend onto __get_free_pages, the 724 * we now use routines that backend onto __get_free_pages, the
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index 80079e5a2e30..dbc99d41605c 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -268,15 +268,15 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
268 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; 268 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
269 unsigned long flags; 269 unsigned long flags;
270 270
271 spin_lock_irqsave(&sriov->going_down_lock, flags);
272 spin_lock(&sriov->id_map_lock); 271 spin_lock(&sriov->id_map_lock);
272 spin_lock_irqsave(&sriov->going_down_lock, flags);
273 /*make sure that there is no schedule inside the scheduled work.*/ 273 /*make sure that there is no schedule inside the scheduled work.*/
274 if (!sriov->is_going_down) { 274 if (!sriov->is_going_down) {
275 id->scheduled_delete = 1; 275 id->scheduled_delete = 1;
276 schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); 276 schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
277 } 277 }
278 spin_unlock(&sriov->id_map_lock);
279 spin_unlock_irqrestore(&sriov->going_down_lock, flags); 278 spin_unlock_irqrestore(&sriov->going_down_lock, flags);
279 spin_unlock(&sriov->id_map_lock);
280} 280}
281 281
282int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, 282int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index c9eb6a6815ce..ae67df35dd4d 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
66 66
67static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) 67static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
68{ 68{
69 return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); 69 return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
70} 70}
71 71
72static void *get_cqe(struct mlx4_ib_cq *cq, int n) 72static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
77static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) 77static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
78{ 78{
79 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); 79 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
80 struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
80 81
81 return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ 82 return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
82 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; 83 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
83} 84}
84 85
@@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
99{ 100{
100 int err; 101 int err;
101 102
102 err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), 103 err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
103 PAGE_SIZE * 2, &buf->buf); 104 PAGE_SIZE * 2, &buf->buf);
104 105
105 if (err) 106 if (err)
106 goto out; 107 goto out;
107 108
109 buf->entry_size = dev->dev->caps.cqe_size;
108 err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, 110 err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
109 &buf->mtt); 111 &buf->mtt);
110 if (err) 112 if (err)
@@ -120,8 +122,7 @@ err_mtt:
120 mlx4_mtt_cleanup(dev->dev, &buf->mtt); 122 mlx4_mtt_cleanup(dev->dev, &buf->mtt);
121 123
122err_buf: 124err_buf:
123 mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), 125 mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
124 &buf->buf);
125 126
126out: 127out:
127 return err; 128 return err;
@@ -129,7 +130,7 @@ out:
129 130
130static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) 131static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
131{ 132{
132 mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); 133 mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
133} 134}
134 135
135static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, 136static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
137 u64 buf_addr, int cqe) 138 u64 buf_addr, int cqe)
138{ 139{
139 int err; 140 int err;
141 int cqe_size = dev->dev->caps.cqe_size;
140 142
141 *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), 143 *umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
142 IB_ACCESS_LOCAL_WRITE, 1); 144 IB_ACCESS_LOCAL_WRITE, 1);
143 if (IS_ERR(*umem)) 145 if (IS_ERR(*umem))
144 return PTR_ERR(*umem); 146 return PTR_ERR(*umem);
@@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
331{ 333{
332 struct mlx4_cqe *cqe, *new_cqe; 334 struct mlx4_cqe *cqe, *new_cqe;
333 int i; 335 int i;
336 int cqe_size = cq->buf.entry_size;
337 int cqe_inc = cqe_size == 64 ? 1 : 0;
334 338
335 i = cq->mcq.cons_index; 339 i = cq->mcq.cons_index;
336 cqe = get_cqe(cq, i & cq->ibcq.cqe); 340 cqe = get_cqe(cq, i & cq->ibcq.cqe);
341 cqe += cqe_inc;
342
337 while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { 343 while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
338 new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, 344 new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
339 (i + 1) & cq->resize_buf->cqe); 345 (i + 1) & cq->resize_buf->cqe);
340 memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); 346 memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
347 new_cqe += cqe_inc;
348
341 new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | 349 new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
342 (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); 350 (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
343 cqe = get_cqe(cq, ++i & cq->ibcq.cqe); 351 cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
352 cqe += cqe_inc;
344 } 353 }
345 ++cq->mcq.cons_index; 354 ++cq->mcq.cons_index;
346} 355}
@@ -438,6 +447,7 @@ err_buf:
438 447
439out: 448out:
440 mutex_unlock(&cq->resize_mutex); 449 mutex_unlock(&cq->resize_mutex);
450
441 return err; 451 return err;
442} 452}
443 453
@@ -586,6 +596,9 @@ repoll:
586 if (!cqe) 596 if (!cqe)
587 return -EAGAIN; 597 return -EAGAIN;
588 598
599 if (cq->buf.entry_size == 64)
600 cqe++;
601
589 ++cq->mcq.cons_index; 602 ++cq->mcq.cons_index;
590 603
591 /* 604 /*
@@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
807 int nfreed = 0; 820 int nfreed = 0;
808 struct mlx4_cqe *cqe, *dest; 821 struct mlx4_cqe *cqe, *dest;
809 u8 owner_bit; 822 u8 owner_bit;
823 int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
810 824
811 /* 825 /*
812 * First we need to find the current producer index, so we 826 * First we need to find the current producer index, so we
@@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
825 */ 839 */
826 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { 840 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
827 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); 841 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
842 cqe += cqe_inc;
843
828 if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { 844 if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
829 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) 845 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
830 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); 846 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
831 ++nfreed; 847 ++nfreed;
832 } else if (nfreed) { 848 } else if (nfreed) {
833 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); 849 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
850 dest += cqe_inc;
851
834 owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; 852 owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
835 memcpy(dest, cqe, sizeof *cqe); 853 memcpy(dest, cqe, sizeof *cqe);
836 dest->owner_sr_opcode = owner_bit | 854 dest->owner_sr_opcode = owner_bit |
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 718ec6b2bad2..e7d81c0d1ac5 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
563{ 563{
564 struct mlx4_ib_dev *dev = to_mdev(ibdev); 564 struct mlx4_ib_dev *dev = to_mdev(ibdev);
565 struct mlx4_ib_ucontext *context; 565 struct mlx4_ib_ucontext *context;
566 struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
566 struct mlx4_ib_alloc_ucontext_resp resp; 567 struct mlx4_ib_alloc_ucontext_resp resp;
567 int err; 568 int err;
568 569
569 if (!dev->ib_active) 570 if (!dev->ib_active)
570 return ERR_PTR(-EAGAIN); 571 return ERR_PTR(-EAGAIN);
571 572
572 resp.qp_tab_size = dev->dev->caps.num_qps; 573 if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
573 resp.bf_reg_size = dev->dev->caps.bf_reg_size; 574 resp_v3.qp_tab_size = dev->dev->caps.num_qps;
574 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; 575 resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
576 resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
577 } else {
578 resp.dev_caps = dev->dev->caps.userspace_caps;
579 resp.qp_tab_size = dev->dev->caps.num_qps;
580 resp.bf_reg_size = dev->dev->caps.bf_reg_size;
581 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
582 resp.cqe_size = dev->dev->caps.cqe_size;
583 }
575 584
576 context = kmalloc(sizeof *context, GFP_KERNEL); 585 context = kmalloc(sizeof *context, GFP_KERNEL);
577 if (!context) 586 if (!context)
@@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
586 INIT_LIST_HEAD(&context->db_page_list); 595 INIT_LIST_HEAD(&context->db_page_list);
587 mutex_init(&context->db_page_mutex); 596 mutex_init(&context->db_page_mutex);
588 597
589 err = ib_copy_to_udata(udata, &resp, sizeof resp); 598 if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
599 err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
600 else
601 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
602
590 if (err) { 603 if (err) {
591 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); 604 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
592 kfree(context); 605 kfree(context);
@@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1342 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; 1355 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
1343 ibdev->ib_dev.dma_device = &dev->pdev->dev; 1356 ibdev->ib_dev.dma_device = &dev->pdev->dev;
1344 1357
1345 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; 1358 if (dev->caps.userspace_caps)
1359 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
1360 else
1361 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
1362
1346 ibdev->ib_dev.uverbs_cmd_mask = 1363 ibdev->ib_dev.uverbs_cmd_mask =
1347 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 1364 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
1348 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 1365 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e04cbc9a54a5..dcd845bc30f0 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -90,6 +90,7 @@ struct mlx4_ib_xrcd {
90struct mlx4_ib_cq_buf { 90struct mlx4_ib_cq_buf {
91 struct mlx4_buf buf; 91 struct mlx4_buf buf;
92 struct mlx4_mtt mtt; 92 struct mlx4_mtt mtt;
93 int entry_size;
93}; 94};
94 95
95struct mlx4_ib_cq_resize { 96struct mlx4_ib_cq_resize {
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index 13beedeeef9f..07e6769ef43b 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -40,7 +40,9 @@
40 * Increment this value if any changes that break userspace ABI 40 * Increment this value if any changes that break userspace ABI
41 * compatibility are made. 41 * compatibility are made.
42 */ 42 */
43#define MLX4_IB_UVERBS_ABI_VERSION 3 43
44#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
45#define MLX4_IB_UVERBS_ABI_VERSION 4
44 46
45/* 47/*
46 * Make sure that all structs defined in this file remain laid out so 48 * Make sure that all structs defined in this file remain laid out so
@@ -50,10 +52,18 @@
50 * instead. 52 * instead.
51 */ 53 */
52 54
55struct mlx4_ib_alloc_ucontext_resp_v3 {
56 __u32 qp_tab_size;
57 __u16 bf_reg_size;
58 __u16 bf_regs_per_page;
59};
60
53struct mlx4_ib_alloc_ucontext_resp { 61struct mlx4_ib_alloc_ucontext_resp {
62 __u32 dev_caps;
54 __u32 qp_tab_size; 63 __u32 qp_tab_size;
55 __u16 bf_reg_size; 64 __u16 bf_reg_size;
56 __u16 bf_regs_per_page; 65 __u16 bf_regs_per_page;
66 __u32 cqe_size;
57}; 67};
58 68
59struct mlx4_ib_alloc_pd_resp { 69struct mlx4_ib_alloc_pd_resp {
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 5cac29e6bc1c..33cc58941a3e 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -532,6 +532,7 @@ void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
532int nes_destroy_cqp(struct nes_device *); 532int nes_destroy_cqp(struct nes_device *);
533int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); 533int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
534void nes_recheck_link_status(struct work_struct *work); 534void nes_recheck_link_status(struct work_struct *work);
535void nes_terminate_timeout(unsigned long context);
535 536
536/* nes_nic.c */ 537/* nes_nic.c */
537struct net_device *nes_netdev_init(struct nes_device *, void __iomem *); 538struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index cfaacaf6bf5f..22ea67eea5dc 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -629,11 +629,9 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a
629 629
630 case SEND_RDMA_READ_ZERO: 630 case SEND_RDMA_READ_ZERO:
631 default: 631 default:
632 if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) { 632 if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO)
633 printk(KERN_ERR "%s[%u]: Unsupported RDMA0 len operation=%u\n", 633 WARN(1, "Unsupported RDMA0 len operation=%u\n",
634 __func__, __LINE__, cm_node->send_rdma0_op); 634 cm_node->send_rdma0_op);
635 WARN_ON(1);
636 }
637 nes_debug(NES_DBG_CM, "Sending first rdma operation.\n"); 635 nes_debug(NES_DBG_CM, "Sending first rdma operation.\n");
638 wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = 636 wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
639 cpu_to_le32(NES_IWARP_SQ_OP_RDMAR); 637 cpu_to_le32(NES_IWARP_SQ_OP_RDMAR);
@@ -671,7 +669,6 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
671 struct nes_cm_core *cm_core = cm_node->cm_core; 669 struct nes_cm_core *cm_core = cm_node->cm_core;
672 struct nes_timer_entry *new_send; 670 struct nes_timer_entry *new_send;
673 int ret = 0; 671 int ret = 0;
674 u32 was_timer_set;
675 672
676 new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); 673 new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
677 if (!new_send) 674 if (!new_send)
@@ -723,12 +720,8 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
723 } 720 }
724 } 721 }
725 722
726 was_timer_set = timer_pending(&cm_core->tcp_timer); 723 if (!timer_pending(&cm_core->tcp_timer))
727 724 mod_timer(&cm_core->tcp_timer, new_send->timetosend);
728 if (!was_timer_set) {
729 cm_core->tcp_timer.expires = new_send->timetosend;
730 add_timer(&cm_core->tcp_timer);
731 }
732 725
733 return ret; 726 return ret;
734} 727}
@@ -946,10 +939,8 @@ static void nes_cm_timer_tick(unsigned long pass)
946 } 939 }
947 940
948 if (settimer) { 941 if (settimer) {
949 if (!timer_pending(&cm_core->tcp_timer)) { 942 if (!timer_pending(&cm_core->tcp_timer))
950 cm_core->tcp_timer.expires = nexttimeout; 943 mod_timer(&cm_core->tcp_timer, nexttimeout);
951 add_timer(&cm_core->tcp_timer);
952 }
953 } 944 }
954} 945}
955 946
@@ -1314,8 +1305,6 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core,
1314static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, 1305static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
1315 struct nes_cm_node *cm_node) 1306 struct nes_cm_node *cm_node)
1316{ 1307{
1317 u32 was_timer_set;
1318
1319 cm_node->accelerated = 1; 1308 cm_node->accelerated = 1;
1320 1309
1321 if (cm_node->accept_pend) { 1310 if (cm_node->accept_pend) {
@@ -1325,11 +1314,8 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
1325 BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); 1314 BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
1326 } 1315 }
1327 1316
1328 was_timer_set = timer_pending(&cm_core->tcp_timer); 1317 if (!timer_pending(&cm_core->tcp_timer))
1329 if (!was_timer_set) { 1318 mod_timer(&cm_core->tcp_timer, (jiffies + NES_SHORT_TIME));
1330 cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME;
1331 add_timer(&cm_core->tcp_timer);
1332 }
1333 1319
1334 return 0; 1320 return 0;
1335} 1321}
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index fe7965ee4096..67647e264611 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -75,7 +75,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
75static void process_critical_error(struct nes_device *nesdev); 75static void process_critical_error(struct nes_device *nesdev);
76static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); 76static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
77static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); 77static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
78static void nes_terminate_timeout(unsigned long context);
79static void nes_terminate_start_timer(struct nes_qp *nesqp); 78static void nes_terminate_start_timer(struct nes_qp *nesqp);
80 79
81#ifdef CONFIG_INFINIBAND_NES_DEBUG 80#ifdef CONFIG_INFINIBAND_NES_DEBUG
@@ -3520,7 +3519,7 @@ static void nes_terminate_received(struct nes_device *nesdev,
3520} 3519}
3521 3520
3522/* Timeout routine in case terminate fails to complete */ 3521/* Timeout routine in case terminate fails to complete */
3523static void nes_terminate_timeout(unsigned long context) 3522void nes_terminate_timeout(unsigned long context)
3524{ 3523{
3525 struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context; 3524 struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context;
3526 3525
@@ -3530,11 +3529,7 @@ static void nes_terminate_timeout(unsigned long context)
3530/* Set a timer in case hw cannot complete the terminate sequence */ 3529/* Set a timer in case hw cannot complete the terminate sequence */
3531static void nes_terminate_start_timer(struct nes_qp *nesqp) 3530static void nes_terminate_start_timer(struct nes_qp *nesqp)
3532{ 3531{
3533 init_timer(&nesqp->terminate_timer); 3532 mod_timer(&nesqp->terminate_timer, (jiffies + HZ));
3534 nesqp->terminate_timer.function = nes_terminate_timeout;
3535 nesqp->terminate_timer.expires = jiffies + HZ;
3536 nesqp->terminate_timer.data = (unsigned long)nesqp;
3537 add_timer(&nesqp->terminate_timer);
3538} 3533}
3539 3534
3540/** 3535/**
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
index 3ba7be369452..416645259b0f 100644
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -210,6 +210,9 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp
210 } 210 }
211 211
212 while (1) { 212 while (1) {
213 if (skb_queue_empty(&nesqp->pau_list))
214 goto out;
215
213 seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd); 216 seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd);
214 if (seq == nextseq) { 217 if (seq == nextseq) {
215 if (skb->len || processacks) 218 if (skb->len || processacks)
@@ -218,14 +221,13 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp
218 goto out; 221 goto out;
219 } 222 }
220 223
221 if (skb->next == (struct sk_buff *)&nesqp->pau_list)
222 goto out;
223
224 old_skb = skb; 224 old_skb = skb;
225 skb = skb->next; 225 skb = skb->next;
226 skb_unlink(old_skb, &nesqp->pau_list); 226 skb_unlink(old_skb, &nesqp->pau_list);
227 nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE); 227 nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE);
228 nes_rem_ref_cm_node(nesqp->cm_node); 228 nes_rem_ref_cm_node(nesqp->cm_node);
229 if (skb == (struct sk_buff *)&nesqp->pau_list)
230 goto out;
229 } 231 }
230 return skb; 232 return skb;
231 233
@@ -245,7 +247,6 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
245 struct nes_rskb_cb *cb; 247 struct nes_rskb_cb *cb;
246 struct pau_fpdu_info *fpdu_info = NULL; 248 struct pau_fpdu_info *fpdu_info = NULL;
247 struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; 249 struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
248 unsigned long flags;
249 u32 fpdu_len = 0; 250 u32 fpdu_len = 0;
250 u32 tmp_len; 251 u32 tmp_len;
251 int frag_cnt = 0; 252 int frag_cnt = 0;
@@ -260,12 +261,10 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
260 261
261 *pau_fpdu_info = NULL; 262 *pau_fpdu_info = NULL;
262 263
263 spin_lock_irqsave(&nesqp->pau_lock, flags);
264 skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd); 264 skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd);
265 if (!skb) { 265 if (!skb)
266 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
267 goto out; 266 goto out;
268 } 267
269 cb = (struct nes_rskb_cb *)&skb->cb[0]; 268 cb = (struct nes_rskb_cb *)&skb->cb[0];
270 if (skb->len) { 269 if (skb->len) {
271 fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING; 270 fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING;
@@ -290,10 +289,9 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
290 289
291 skb = nes_get_next_skb(nesdev, nesqp, skb, 290 skb = nes_get_next_skb(nesdev, nesqp, skb,
292 nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd); 291 nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd);
293 if (!skb) { 292 if (!skb)
294 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
295 goto out; 293 goto out;
296 } else if (rst_rcvd) { 294 if (rst_rcvd) {
297 /* rst received in the middle of fpdu */ 295 /* rst received in the middle of fpdu */
298 for (; i >= 0; i--) { 296 for (; i >= 0; i--) {
299 skb_unlink(frags[i].skb, &nesqp->pau_list); 297 skb_unlink(frags[i].skb, &nesqp->pau_list);
@@ -320,8 +318,6 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
320 frag_cnt = 1; 318 frag_cnt = 1;
321 } 319 }
322 320
323 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
324
325 /* Found one */ 321 /* Found one */
326 fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC); 322 fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
327 if (fpdu_info == NULL) { 323 if (fpdu_info == NULL) {
@@ -383,9 +379,8 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
383 379
384 if (frags[i].skb->len == 0) { 380 if (frags[i].skb->len == 0) {
385 /* Pull skb off the list - it will be freed in the callback */ 381 /* Pull skb off the list - it will be freed in the callback */
386 spin_lock_irqsave(&nesqp->pau_lock, flags); 382 if (!skb_queue_empty(&nesqp->pau_list))
387 skb_unlink(frags[i].skb, &nesqp->pau_list); 383 skb_unlink(frags[i].skb, &nesqp->pau_list);
388 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
389 } else { 384 } else {
390 /* Last skb still has data so update the seq */ 385 /* Last skb still has data so update the seq */
391 iph = (struct iphdr *)(cb->data_start + ETH_HLEN); 386 iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
@@ -414,14 +409,18 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
414 struct pau_fpdu_info *fpdu_info; 409 struct pau_fpdu_info *fpdu_info;
415 struct nes_hw_cqp_wqe *cqp_wqe; 410 struct nes_hw_cqp_wqe *cqp_wqe;
416 struct nes_cqp_request *cqp_request; 411 struct nes_cqp_request *cqp_request;
412 unsigned long flags;
417 u64 u64tmp; 413 u64 u64tmp;
418 u32 u32tmp; 414 u32 u32tmp;
419 int rc; 415 int rc;
420 416
421 while (1) { 417 while (1) {
418 spin_lock_irqsave(&nesqp->pau_lock, flags);
422 rc = get_fpdu_info(nesdev, nesqp, &fpdu_info); 419 rc = get_fpdu_info(nesdev, nesqp, &fpdu_info);
423 if (fpdu_info == NULL) 420 if (rc || (fpdu_info == NULL)) {
421 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
424 return rc; 422 return rc;
423 }
425 424
426 cqp_request = fpdu_info->cqp_request; 425 cqp_request = fpdu_info->cqp_request;
427 cqp_wqe = &cqp_request->cqp_wqe; 426 cqp_wqe = &cqp_request->cqp_wqe;
@@ -447,7 +446,7 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
447 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX, 446 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX,
448 lower_32_bits(u64tmp)); 447 lower_32_bits(u64tmp));
449 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX, 448 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX,
450 upper_32_bits(u64tmp >> 32)); 449 upper_32_bits(u64tmp));
451 450
452 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, 451 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
453 lower_32_bits(fpdu_info->frags[0].physaddr)); 452 lower_32_bits(fpdu_info->frags[0].physaddr));
@@ -475,6 +474,7 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
475 474
476 atomic_set(&cqp_request->refcount, 1); 475 atomic_set(&cqp_request->refcount, 1);
477 nes_post_cqp_request(nesdev, cqp_request); 476 nes_post_cqp_request(nesdev, cqp_request);
477 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
478 } 478 }
479 479
480 return 0; 480 return 0;
@@ -649,11 +649,9 @@ static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request
649 nesqp = qh_chg->nesqp; 649 nesqp = qh_chg->nesqp;
650 650
651 /* Should we handle the bad completion */ 651 /* Should we handle the bad completion */
652 if (cqp_request->major_code) { 652 if (cqp_request->major_code)
653 printk(KERN_ERR PFX "Invalid cqp_request major_code=0x%x\n", 653 WARN(1, PFX "Invalid cqp_request major_code=0x%x\n",
654 cqp_request->major_code); 654 cqp_request->major_code);
655 WARN_ON(1);
656 }
657 655
658 switch (nesqp->pau_state) { 656 switch (nesqp->pau_state) {
659 case PAU_DEL_QH: 657 case PAU_DEL_QH:
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 0564be757d82..9542e1644a5c 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -944,12 +944,13 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
944 addr, 944 addr,
945 perfect_filter_register_address+(mc_index * 8), 945 perfect_filter_register_address+(mc_index * 8),
946 mc_nic_index); 946 mc_nic_index);
947 macaddr_high = ((u16) addr[0]) << 8; 947 macaddr_high = ((u8) addr[0]) << 8;
948 macaddr_high += (u16) addr[1]; 948 macaddr_high += (u8) addr[1];
949 macaddr_low = ((u32) addr[2]) << 24; 949 macaddr_low = ((u8) addr[2]) << 24;
950 macaddr_low += ((u32) addr[3]) << 16; 950 macaddr_low += ((u8) addr[3]) << 16;
951 macaddr_low += ((u32) addr[4]) << 8; 951 macaddr_low += ((u8) addr[4]) << 8;
952 macaddr_low += (u32) addr[5]; 952 macaddr_low += (u8) addr[5];
953
953 nes_write_indexed(nesdev, 954 nes_write_indexed(nesdev,
954 perfect_filter_register_address+(mc_index * 8), 955 perfect_filter_register_address+(mc_index * 8),
955 macaddr_low); 956 macaddr_low);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index cd0ecb215cca..07e4fbad987a 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1404,6 +1404,9 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1404 } 1404 }
1405 1405
1406 nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR); 1406 nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
1407 init_timer(&nesqp->terminate_timer);
1408 nesqp->terminate_timer.function = nes_terminate_timeout;
1409 nesqp->terminate_timer.data = (unsigned long)nesqp;
1407 1410
1408 /* update the QP table */ 1411 /* update the QP table */
1409 nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp; 1412 nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
@@ -1413,7 +1416,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1413 return &nesqp->ibqp; 1416 return &nesqp->ibqp;
1414} 1417}
1415 1418
1416
1417/** 1419/**
1418 * nes_clean_cq 1420 * nes_clean_cq
1419 */ 1421 */
@@ -2559,6 +2561,11 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
2559 return ibmr; 2561 return ibmr;
2560 case IWNES_MEMREG_TYPE_QP: 2562 case IWNES_MEMREG_TYPE_QP:
2561 case IWNES_MEMREG_TYPE_CQ: 2563 case IWNES_MEMREG_TYPE_CQ:
2564 if (!region->length) {
2565 nes_debug(NES_DBG_MR, "Unable to register zero length region for CQ\n");
2566 ib_umem_release(region);
2567 return ERR_PTR(-EINVAL);
2568 }
2562 nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); 2569 nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
2563 if (!nespbl) { 2570 if (!nespbl) {
2564 nes_debug(NES_DBG_MR, "Unable to allocate PBL\n"); 2571 nes_debug(NES_DBG_MR, "Unable to allocate PBL\n");
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 922d845f76b0..d5088ce78290 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -222,27 +222,29 @@ static int srp_new_cm_id(struct srp_target_port *target)
222static int srp_create_target_ib(struct srp_target_port *target) 222static int srp_create_target_ib(struct srp_target_port *target)
223{ 223{
224 struct ib_qp_init_attr *init_attr; 224 struct ib_qp_init_attr *init_attr;
225 struct ib_cq *recv_cq, *send_cq;
226 struct ib_qp *qp;
225 int ret; 227 int ret;
226 228
227 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 229 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
228 if (!init_attr) 230 if (!init_attr)
229 return -ENOMEM; 231 return -ENOMEM;
230 232
231 target->recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, 233 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
232 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); 234 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0);
233 if (IS_ERR(target->recv_cq)) { 235 if (IS_ERR(recv_cq)) {
234 ret = PTR_ERR(target->recv_cq); 236 ret = PTR_ERR(recv_cq);
235 goto err; 237 goto err;
236 } 238 }
237 239
238 target->send_cq = ib_create_cq(target->srp_host->srp_dev->dev, 240 send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
239 srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); 241 srp_send_completion, NULL, target, SRP_SQ_SIZE, 0);
240 if (IS_ERR(target->send_cq)) { 242 if (IS_ERR(send_cq)) {
241 ret = PTR_ERR(target->send_cq); 243 ret = PTR_ERR(send_cq);
242 goto err_recv_cq; 244 goto err_recv_cq;
243 } 245 }
244 246
245 ib_req_notify_cq(target->recv_cq, IB_CQ_NEXT_COMP); 247 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
246 248
247 init_attr->event_handler = srp_qp_event; 249 init_attr->event_handler = srp_qp_event;
248 init_attr->cap.max_send_wr = SRP_SQ_SIZE; 250 init_attr->cap.max_send_wr = SRP_SQ_SIZE;
@@ -251,30 +253,41 @@ static int srp_create_target_ib(struct srp_target_port *target)
251 init_attr->cap.max_send_sge = 1; 253 init_attr->cap.max_send_sge = 1;
252 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 254 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
253 init_attr->qp_type = IB_QPT_RC; 255 init_attr->qp_type = IB_QPT_RC;
254 init_attr->send_cq = target->send_cq; 256 init_attr->send_cq = send_cq;
255 init_attr->recv_cq = target->recv_cq; 257 init_attr->recv_cq = recv_cq;
256 258
257 target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); 259 qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
258 if (IS_ERR(target->qp)) { 260 if (IS_ERR(qp)) {
259 ret = PTR_ERR(target->qp); 261 ret = PTR_ERR(qp);
260 goto err_send_cq; 262 goto err_send_cq;
261 } 263 }
262 264
263 ret = srp_init_qp(target, target->qp); 265 ret = srp_init_qp(target, qp);
264 if (ret) 266 if (ret)
265 goto err_qp; 267 goto err_qp;
266 268
269 if (target->qp)
270 ib_destroy_qp(target->qp);
271 if (target->recv_cq)
272 ib_destroy_cq(target->recv_cq);
273 if (target->send_cq)
274 ib_destroy_cq(target->send_cq);
275
276 target->qp = qp;
277 target->recv_cq = recv_cq;
278 target->send_cq = send_cq;
279
267 kfree(init_attr); 280 kfree(init_attr);
268 return 0; 281 return 0;
269 282
270err_qp: 283err_qp:
271 ib_destroy_qp(target->qp); 284 ib_destroy_qp(qp);
272 285
273err_send_cq: 286err_send_cq:
274 ib_destroy_cq(target->send_cq); 287 ib_destroy_cq(send_cq);
275 288
276err_recv_cq: 289err_recv_cq:
277 ib_destroy_cq(target->recv_cq); 290 ib_destroy_cq(recv_cq);
278 291
279err: 292err:
280 kfree(init_attr); 293 kfree(init_attr);
@@ -289,6 +302,9 @@ static void srp_free_target_ib(struct srp_target_port *target)
289 ib_destroy_cq(target->send_cq); 302 ib_destroy_cq(target->send_cq);
290 ib_destroy_cq(target->recv_cq); 303 ib_destroy_cq(target->recv_cq);
291 304
305 target->qp = NULL;
306 target->send_cq = target->recv_cq = NULL;
307
292 for (i = 0; i < SRP_RQ_SIZE; ++i) 308 for (i = 0; i < SRP_RQ_SIZE; ++i)
293 srp_free_iu(target->srp_host, target->rx_ring[i]); 309 srp_free_iu(target->srp_host, target->rx_ring[i]);
294 for (i = 0; i < SRP_SQ_SIZE; ++i) 310 for (i = 0; i < SRP_SQ_SIZE; ++i)
@@ -428,34 +444,50 @@ static int srp_send_req(struct srp_target_port *target)
428 return status; 444 return status;
429} 445}
430 446
431static void srp_disconnect_target(struct srp_target_port *target) 447static bool srp_queue_remove_work(struct srp_target_port *target)
432{ 448{
433 /* XXX should send SRP_I_LOGOUT request */ 449 bool changed = false;
434 450
435 init_completion(&target->done); 451 spin_lock_irq(&target->lock);
436 if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { 452 if (target->state != SRP_TARGET_REMOVED) {
437 shost_printk(KERN_DEBUG, target->scsi_host, 453 target->state = SRP_TARGET_REMOVED;
438 PFX "Sending CM DREQ failed\n"); 454 changed = true;
439 return;
440 } 455 }
441 wait_for_completion(&target->done); 456 spin_unlock_irq(&target->lock);
457
458 if (changed)
459 queue_work(system_long_wq, &target->remove_work);
460
461 return changed;
442} 462}
443 463
444static bool srp_change_state(struct srp_target_port *target, 464static bool srp_change_conn_state(struct srp_target_port *target,
445 enum srp_target_state old, 465 bool connected)
446 enum srp_target_state new)
447{ 466{
448 bool changed = false; 467 bool changed = false;
449 468
450 spin_lock_irq(&target->lock); 469 spin_lock_irq(&target->lock);
451 if (target->state == old) { 470 if (target->connected != connected) {
452 target->state = new; 471 target->connected = connected;
453 changed = true; 472 changed = true;
454 } 473 }
455 spin_unlock_irq(&target->lock); 474 spin_unlock_irq(&target->lock);
475
456 return changed; 476 return changed;
457} 477}
458 478
479static void srp_disconnect_target(struct srp_target_port *target)
480{
481 if (srp_change_conn_state(target, false)) {
482 /* XXX should send SRP_I_LOGOUT request */
483
484 if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
485 shost_printk(KERN_DEBUG, target->scsi_host,
486 PFX "Sending CM DREQ failed\n");
487 }
488 }
489}
490
459static void srp_free_req_data(struct srp_target_port *target) 491static void srp_free_req_data(struct srp_target_port *target)
460{ 492{
461 struct ib_device *ibdev = target->srp_host->srp_dev->dev; 493 struct ib_device *ibdev = target->srp_host->srp_dev->dev;
@@ -489,32 +521,50 @@ static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
489 device_remove_file(&shost->shost_dev, *attr); 521 device_remove_file(&shost->shost_dev, *attr);
490} 522}
491 523
492static void srp_remove_work(struct work_struct *work) 524static void srp_remove_target(struct srp_target_port *target)
493{ 525{
494 struct srp_target_port *target = 526 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
495 container_of(work, struct srp_target_port, work);
496
497 if (!srp_change_state(target, SRP_TARGET_DEAD, SRP_TARGET_REMOVED))
498 return;
499
500 spin_lock(&target->srp_host->target_lock);
501 list_del(&target->list);
502 spin_unlock(&target->srp_host->target_lock);
503 527
504 srp_del_scsi_host_attr(target->scsi_host); 528 srp_del_scsi_host_attr(target->scsi_host);
505 srp_remove_host(target->scsi_host); 529 srp_remove_host(target->scsi_host);
506 scsi_remove_host(target->scsi_host); 530 scsi_remove_host(target->scsi_host);
531 srp_disconnect_target(target);
507 ib_destroy_cm_id(target->cm_id); 532 ib_destroy_cm_id(target->cm_id);
508 srp_free_target_ib(target); 533 srp_free_target_ib(target);
509 srp_free_req_data(target); 534 srp_free_req_data(target);
510 scsi_host_put(target->scsi_host); 535 scsi_host_put(target->scsi_host);
511} 536}
512 537
538static void srp_remove_work(struct work_struct *work)
539{
540 struct srp_target_port *target =
541 container_of(work, struct srp_target_port, remove_work);
542
543 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
544
545 spin_lock(&target->srp_host->target_lock);
546 list_del(&target->list);
547 spin_unlock(&target->srp_host->target_lock);
548
549 srp_remove_target(target);
550}
551
552static void srp_rport_delete(struct srp_rport *rport)
553{
554 struct srp_target_port *target = rport->lld_data;
555
556 srp_queue_remove_work(target);
557}
558
513static int srp_connect_target(struct srp_target_port *target) 559static int srp_connect_target(struct srp_target_port *target)
514{ 560{
515 int retries = 3; 561 int retries = 3;
516 int ret; 562 int ret;
517 563
564 WARN_ON_ONCE(target->connected);
565
566 target->qp_in_error = false;
567
518 ret = srp_lookup_path(target); 568 ret = srp_lookup_path(target);
519 if (ret) 569 if (ret)
520 return ret; 570 return ret;
@@ -534,6 +584,7 @@ static int srp_connect_target(struct srp_target_port *target)
534 */ 584 */
535 switch (target->status) { 585 switch (target->status) {
536 case 0: 586 case 0:
587 srp_change_conn_state(target, true);
537 return 0; 588 return 0;
538 589
539 case SRP_PORT_REDIRECT: 590 case SRP_PORT_REDIRECT:
@@ -646,13 +697,14 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re
646 697
647static int srp_reconnect_target(struct srp_target_port *target) 698static int srp_reconnect_target(struct srp_target_port *target)
648{ 699{
649 struct ib_qp_attr qp_attr; 700 struct Scsi_Host *shost = target->scsi_host;
650 struct ib_wc wc;
651 int i, ret; 701 int i, ret;
652 702
653 if (!srp_change_state(target, SRP_TARGET_LIVE, SRP_TARGET_CONNECTING)) 703 if (target->state != SRP_TARGET_LIVE)
654 return -EAGAIN; 704 return -EAGAIN;
655 705
706 scsi_target_block(&shost->shost_gendev);
707
656 srp_disconnect_target(target); 708 srp_disconnect_target(target);
657 /* 709 /*
658 * Now get a new local CM ID so that we avoid confusing the 710 * Now get a new local CM ID so that we avoid confusing the
@@ -660,21 +712,11 @@ static int srp_reconnect_target(struct srp_target_port *target)
660 */ 712 */
661 ret = srp_new_cm_id(target); 713 ret = srp_new_cm_id(target);
662 if (ret) 714 if (ret)
663 goto err; 715 goto unblock;
664 716
665 qp_attr.qp_state = IB_QPS_RESET; 717 ret = srp_create_target_ib(target);
666 ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE);
667 if (ret)
668 goto err;
669
670 ret = srp_init_qp(target, target->qp);
671 if (ret) 718 if (ret)
672 goto err; 719 goto unblock;
673
674 while (ib_poll_cq(target->recv_cq, 1, &wc) > 0)
675 ; /* nothing */
676 while (ib_poll_cq(target->send_cq, 1, &wc) > 0)
677 ; /* nothing */
678 720
679 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 721 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
680 struct srp_request *req = &target->req_ring[i]; 722 struct srp_request *req = &target->req_ring[i];
@@ -686,13 +728,16 @@ static int srp_reconnect_target(struct srp_target_port *target)
686 for (i = 0; i < SRP_SQ_SIZE; ++i) 728 for (i = 0; i < SRP_SQ_SIZE; ++i)
687 list_add(&target->tx_ring[i]->list, &target->free_tx); 729 list_add(&target->tx_ring[i]->list, &target->free_tx);
688 730
689 target->qp_in_error = 0;
690 ret = srp_connect_target(target); 731 ret = srp_connect_target(target);
732
733unblock:
734 scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
735 SDEV_TRANSPORT_OFFLINE);
736
691 if (ret) 737 if (ret)
692 goto err; 738 goto err;
693 739
694 if (!srp_change_state(target, SRP_TARGET_CONNECTING, SRP_TARGET_LIVE)) 740 shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n");
695 ret = -EAGAIN;
696 741
697 return ret; 742 return ret;
698 743
@@ -705,17 +750,8 @@ err:
705 * However, we have to defer the real removal because we 750 * However, we have to defer the real removal because we
706 * are in the context of the SCSI error handler now, which 751 * are in the context of the SCSI error handler now, which
707 * will deadlock if we call scsi_remove_host(). 752 * will deadlock if we call scsi_remove_host().
708 *
709 * Schedule our work inside the lock to avoid a race with
710 * the flush_scheduled_work() in srp_remove_one().
711 */ 753 */
712 spin_lock_irq(&target->lock); 754 srp_queue_remove_work(target);
713 if (target->state == SRP_TARGET_CONNECTING) {
714 target->state = SRP_TARGET_DEAD;
715 INIT_WORK(&target->work, srp_remove_work);
716 queue_work(ib_wq, &target->work);
717 }
718 spin_unlock_irq(&target->lock);
719 755
720 return ret; 756 return ret;
721} 757}
@@ -1262,6 +1298,19 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
1262 PFX "Recv failed with error code %d\n", res); 1298 PFX "Recv failed with error code %d\n", res);
1263} 1299}
1264 1300
1301static void srp_handle_qp_err(enum ib_wc_status wc_status,
1302 enum ib_wc_opcode wc_opcode,
1303 struct srp_target_port *target)
1304{
1305 if (target->connected && !target->qp_in_error) {
1306 shost_printk(KERN_ERR, target->scsi_host,
1307 PFX "failed %s status %d\n",
1308 wc_opcode & IB_WC_RECV ? "receive" : "send",
1309 wc_status);
1310 }
1311 target->qp_in_error = true;
1312}
1313
1265static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) 1314static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1266{ 1315{
1267 struct srp_target_port *target = target_ptr; 1316 struct srp_target_port *target = target_ptr;
@@ -1269,15 +1318,11 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1269 1318
1270 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 1319 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1271 while (ib_poll_cq(cq, 1, &wc) > 0) { 1320 while (ib_poll_cq(cq, 1, &wc) > 0) {
1272 if (wc.status) { 1321 if (likely(wc.status == IB_WC_SUCCESS)) {
1273 shost_printk(KERN_ERR, target->scsi_host, 1322 srp_handle_recv(target, &wc);
1274 PFX "failed receive status %d\n", 1323 } else {
1275 wc.status); 1324 srp_handle_qp_err(wc.status, wc.opcode, target);
1276 target->qp_in_error = 1;
1277 break;
1278 } 1325 }
1279
1280 srp_handle_recv(target, &wc);
1281 } 1326 }
1282} 1327}
1283 1328
@@ -1288,16 +1333,12 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1288 struct srp_iu *iu; 1333 struct srp_iu *iu;
1289 1334
1290 while (ib_poll_cq(cq, 1, &wc) > 0) { 1335 while (ib_poll_cq(cq, 1, &wc) > 0) {
1291 if (wc.status) { 1336 if (likely(wc.status == IB_WC_SUCCESS)) {
1292 shost_printk(KERN_ERR, target->scsi_host, 1337 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1293 PFX "failed send status %d\n", 1338 list_add(&iu->list, &target->free_tx);
1294 wc.status); 1339 } else {
1295 target->qp_in_error = 1; 1340 srp_handle_qp_err(wc.status, wc.opcode, target);
1296 break;
1297 } 1341 }
1298
1299 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1300 list_add(&iu->list, &target->free_tx);
1301 } 1342 }
1302} 1343}
1303 1344
@@ -1311,16 +1352,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1311 unsigned long flags; 1352 unsigned long flags;
1312 int len; 1353 int len;
1313 1354
1314 if (target->state == SRP_TARGET_CONNECTING)
1315 goto err;
1316
1317 if (target->state == SRP_TARGET_DEAD ||
1318 target->state == SRP_TARGET_REMOVED) {
1319 scmnd->result = DID_BAD_TARGET << 16;
1320 scmnd->scsi_done(scmnd);
1321 return 0;
1322 }
1323
1324 spin_lock_irqsave(&target->lock, flags); 1355 spin_lock_irqsave(&target->lock, flags);
1325 iu = __srp_get_tx_iu(target, SRP_IU_CMD); 1356 iu = __srp_get_tx_iu(target, SRP_IU_CMD);
1326 if (!iu) 1357 if (!iu)
@@ -1377,7 +1408,6 @@ err_iu:
1377err_unlock: 1408err_unlock:
1378 spin_unlock_irqrestore(&target->lock, flags); 1409 spin_unlock_irqrestore(&target->lock, flags);
1379 1410
1380err:
1381 return SCSI_MLQUEUE_HOST_BUSY; 1411 return SCSI_MLQUEUE_HOST_BUSY;
1382} 1412}
1383 1413
@@ -1419,6 +1449,33 @@ err:
1419 return -ENOMEM; 1449 return -ENOMEM;
1420} 1450}
1421 1451
1452static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
1453{
1454 uint64_t T_tr_ns, max_compl_time_ms;
1455 uint32_t rq_tmo_jiffies;
1456
1457 /*
1458 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
1459 * table 91), both the QP timeout and the retry count have to be set
1460 * for RC QP's during the RTR to RTS transition.
1461 */
1462 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
1463 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
1464
1465 /*
1466 * Set target->rq_tmo_jiffies to one second more than the largest time
1467 * it can take before an error completion is generated. See also
1468 * C9-140..142 in the IBTA spec for more information about how to
1469 * convert the QP Local ACK Timeout value to nanoseconds.
1470 */
1471 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
1472 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
1473 do_div(max_compl_time_ms, NSEC_PER_MSEC);
1474 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
1475
1476 return rq_tmo_jiffies;
1477}
1478
1422static void srp_cm_rep_handler(struct ib_cm_id *cm_id, 1479static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1423 struct srp_login_rsp *lrsp, 1480 struct srp_login_rsp *lrsp,
1424 struct srp_target_port *target) 1481 struct srp_target_port *target)
@@ -1478,6 +1535,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1478 if (ret) 1535 if (ret)
1479 goto error_free; 1536 goto error_free;
1480 1537
1538 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
1539
1481 ret = ib_modify_qp(target->qp, qp_attr, attr_mask); 1540 ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
1482 if (ret) 1541 if (ret)
1483 goto error_free; 1542 goto error_free;
@@ -1599,6 +1658,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1599 case IB_CM_DREQ_RECEIVED: 1658 case IB_CM_DREQ_RECEIVED:
1600 shost_printk(KERN_WARNING, target->scsi_host, 1659 shost_printk(KERN_WARNING, target->scsi_host,
1601 PFX "DREQ received - connection closed\n"); 1660 PFX "DREQ received - connection closed\n");
1661 srp_change_conn_state(target, false);
1602 if (ib_send_cm_drep(cm_id, NULL, 0)) 1662 if (ib_send_cm_drep(cm_id, NULL, 0))
1603 shost_printk(KERN_ERR, target->scsi_host, 1663 shost_printk(KERN_ERR, target->scsi_host,
1604 PFX "Sending CM DREP failed\n"); 1664 PFX "Sending CM DREP failed\n");
@@ -1608,7 +1668,6 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1608 shost_printk(KERN_ERR, target->scsi_host, 1668 shost_printk(KERN_ERR, target->scsi_host,
1609 PFX "connection closed\n"); 1669 PFX "connection closed\n");
1610 1670
1611 comp = 1;
1612 target->status = 0; 1671 target->status = 0;
1613 break; 1672 break;
1614 1673
@@ -1636,10 +1695,6 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1636 struct srp_iu *iu; 1695 struct srp_iu *iu;
1637 struct srp_tsk_mgmt *tsk_mgmt; 1696 struct srp_tsk_mgmt *tsk_mgmt;
1638 1697
1639 if (target->state == SRP_TARGET_DEAD ||
1640 target->state == SRP_TARGET_REMOVED)
1641 return -1;
1642
1643 init_completion(&target->tsk_mgmt_done); 1698 init_completion(&target->tsk_mgmt_done);
1644 1699
1645 spin_lock_irq(&target->lock); 1700 spin_lock_irq(&target->lock);
@@ -1729,6 +1784,21 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
1729 return ret; 1784 return ret;
1730} 1785}
1731 1786
1787static int srp_slave_configure(struct scsi_device *sdev)
1788{
1789 struct Scsi_Host *shost = sdev->host;
1790 struct srp_target_port *target = host_to_target(shost);
1791 struct request_queue *q = sdev->request_queue;
1792 unsigned long timeout;
1793
1794 if (sdev->type == TYPE_DISK) {
1795 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
1796 blk_queue_rq_timeout(q, timeout);
1797 }
1798
1799 return 0;
1800}
1801
1732static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, 1802static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
1733 char *buf) 1803 char *buf)
1734{ 1804{
@@ -1861,6 +1931,7 @@ static struct scsi_host_template srp_template = {
1861 .module = THIS_MODULE, 1931 .module = THIS_MODULE,
1862 .name = "InfiniBand SRP initiator", 1932 .name = "InfiniBand SRP initiator",
1863 .proc_name = DRV_NAME, 1933 .proc_name = DRV_NAME,
1934 .slave_configure = srp_slave_configure,
1864 .info = srp_target_info, 1935 .info = srp_target_info,
1865 .queuecommand = srp_queuecommand, 1936 .queuecommand = srp_queuecommand,
1866 .eh_abort_handler = srp_abort, 1937 .eh_abort_handler = srp_abort,
@@ -1894,11 +1965,14 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
1894 return PTR_ERR(rport); 1965 return PTR_ERR(rport);
1895 } 1966 }
1896 1967
1968 rport->lld_data = target;
1969
1897 spin_lock(&host->target_lock); 1970 spin_lock(&host->target_lock);
1898 list_add_tail(&target->list, &host->target_list); 1971 list_add_tail(&target->list, &host->target_list);
1899 spin_unlock(&host->target_lock); 1972 spin_unlock(&host->target_lock);
1900 1973
1901 target->state = SRP_TARGET_LIVE; 1974 target->state = SRP_TARGET_LIVE;
1975 target->connected = false;
1902 1976
1903 scsi_scan_target(&target->scsi_host->shost_gendev, 1977 scsi_scan_target(&target->scsi_host->shost_gendev,
1904 0, target->scsi_id, SCAN_WILD_CARD, 0); 1978 0, target->scsi_id, SCAN_WILD_CARD, 0);
@@ -2188,6 +2262,7 @@ static ssize_t srp_create_target(struct device *dev,
2188 sizeof (struct srp_indirect_buf) + 2262 sizeof (struct srp_indirect_buf) +
2189 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 2263 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
2190 2264
2265 INIT_WORK(&target->remove_work, srp_remove_work);
2191 spin_lock_init(&target->lock); 2266 spin_lock_init(&target->lock);
2192 INIT_LIST_HEAD(&target->free_tx); 2267 INIT_LIST_HEAD(&target->free_tx);
2193 INIT_LIST_HEAD(&target->free_reqs); 2268 INIT_LIST_HEAD(&target->free_reqs);
@@ -2232,7 +2307,6 @@ static ssize_t srp_create_target(struct device *dev,
2232 if (ret) 2307 if (ret)
2233 goto err_free_ib; 2308 goto err_free_ib;
2234 2309
2235 target->qp_in_error = 0;
2236 ret = srp_connect_target(target); 2310 ret = srp_connect_target(target);
2237 if (ret) { 2311 if (ret) {
2238 shost_printk(KERN_ERR, target->scsi_host, 2312 shost_printk(KERN_ERR, target->scsi_host,
@@ -2422,8 +2496,7 @@ static void srp_remove_one(struct ib_device *device)
2422{ 2496{
2423 struct srp_device *srp_dev; 2497 struct srp_device *srp_dev;
2424 struct srp_host *host, *tmp_host; 2498 struct srp_host *host, *tmp_host;
2425 LIST_HEAD(target_list); 2499 struct srp_target_port *target;
2426 struct srp_target_port *target, *tmp_target;
2427 2500
2428 srp_dev = ib_get_client_data(device, &srp_client); 2501 srp_dev = ib_get_client_data(device, &srp_client);
2429 2502
@@ -2436,35 +2509,17 @@ static void srp_remove_one(struct ib_device *device)
2436 wait_for_completion(&host->released); 2509 wait_for_completion(&host->released);
2437 2510
2438 /* 2511 /*
2439 * Mark all target ports as removed, so we stop queueing 2512 * Remove all target ports.
2440 * commands and don't try to reconnect.
2441 */ 2513 */
2442 spin_lock(&host->target_lock); 2514 spin_lock(&host->target_lock);
2443 list_for_each_entry(target, &host->target_list, list) { 2515 list_for_each_entry(target, &host->target_list, list)
2444 spin_lock_irq(&target->lock); 2516 srp_queue_remove_work(target);
2445 target->state = SRP_TARGET_REMOVED;
2446 spin_unlock_irq(&target->lock);
2447 }
2448 spin_unlock(&host->target_lock); 2517 spin_unlock(&host->target_lock);
2449 2518
2450 /* 2519 /*
2451 * Wait for any reconnection tasks that may have 2520 * Wait for target port removal tasks.
2452 * started before we marked our target ports as
2453 * removed, and any target port removal tasks.
2454 */ 2521 */
2455 flush_workqueue(ib_wq); 2522 flush_workqueue(system_long_wq);
2456
2457 list_for_each_entry_safe(target, tmp_target,
2458 &host->target_list, list) {
2459 srp_del_scsi_host_attr(target->scsi_host);
2460 srp_remove_host(target->scsi_host);
2461 scsi_remove_host(target->scsi_host);
2462 srp_disconnect_target(target);
2463 ib_destroy_cm_id(target->cm_id);
2464 srp_free_target_ib(target);
2465 srp_free_req_data(target);
2466 scsi_host_put(target->scsi_host);
2467 }
2468 2523
2469 kfree(host); 2524 kfree(host);
2470 } 2525 }
@@ -2478,6 +2533,7 @@ static void srp_remove_one(struct ib_device *device)
2478} 2533}
2479 2534
2480static struct srp_function_template ib_srp_transport_functions = { 2535static struct srp_function_template ib_srp_transport_functions = {
2536 .rport_delete = srp_rport_delete,
2481}; 2537};
2482 2538
2483static int __init srp_init_module(void) 2539static int __init srp_init_module(void)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 020caf0c3789..de2d0b3c0bfe 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -80,9 +80,7 @@ enum {
80 80
81enum srp_target_state { 81enum srp_target_state {
82 SRP_TARGET_LIVE, 82 SRP_TARGET_LIVE,
83 SRP_TARGET_CONNECTING, 83 SRP_TARGET_REMOVED,
84 SRP_TARGET_DEAD,
85 SRP_TARGET_REMOVED
86}; 84};
87 85
88enum srp_iu_type { 86enum srp_iu_type {
@@ -163,6 +161,9 @@ struct srp_target_port {
163 struct ib_sa_query *path_query; 161 struct ib_sa_query *path_query;
164 int path_query_id; 162 int path_query_id;
165 163
164 u32 rq_tmo_jiffies;
165 bool connected;
166
166 struct ib_cm_id *cm_id; 167 struct ib_cm_id *cm_id;
167 168
168 int max_ti_iu_len; 169 int max_ti_iu_len;
@@ -173,12 +174,12 @@ struct srp_target_port {
173 struct srp_iu *rx_ring[SRP_RQ_SIZE]; 174 struct srp_iu *rx_ring[SRP_RQ_SIZE];
174 struct srp_request req_ring[SRP_CMD_SQ_SIZE]; 175 struct srp_request req_ring[SRP_CMD_SQ_SIZE];
175 176
176 struct work_struct work; 177 struct work_struct remove_work;
177 178
178 struct list_head list; 179 struct list_head list;
179 struct completion done; 180 struct completion done;
180 int status; 181 int status;
181 int qp_in_error; 182 bool qp_in_error;
182 183
183 struct completion tsk_mgmt_done; 184 struct completion tsk_mgmt_done;
184 u8 tsk_mgmt_status; 185 u8 tsk_mgmt_status;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 3d1899ff1076..fdc5f23d8e9f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1498,6 +1498,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
1498 u32 reply; 1498 u32 reply;
1499 u8 is_going_down = 0; 1499 u8 is_going_down = 0;
1500 int i; 1500 int i;
1501 unsigned long flags;
1501 1502
1502 slave_state[slave].comm_toggle ^= 1; 1503 slave_state[slave].comm_toggle ^= 1;
1503 reply = (u32) slave_state[slave].comm_toggle << 31; 1504 reply = (u32) slave_state[slave].comm_toggle << 31;
@@ -1576,12 +1577,12 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
1576 mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave); 1577 mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave);
1577 goto reset_slave; 1578 goto reset_slave;
1578 } 1579 }
1579 spin_lock(&priv->mfunc.master.slave_state_lock); 1580 spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
1580 if (!slave_state[slave].is_slave_going_down) 1581 if (!slave_state[slave].is_slave_going_down)
1581 slave_state[slave].last_cmd = cmd; 1582 slave_state[slave].last_cmd = cmd;
1582 else 1583 else
1583 is_going_down = 1; 1584 is_going_down = 1;
1584 spin_unlock(&priv->mfunc.master.slave_state_lock); 1585 spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
1585 if (is_going_down) { 1586 if (is_going_down) {
1586 mlx4_warn(dev, "Slave is going down aborting command(%d)" 1587 mlx4_warn(dev, "Slave is going down aborting command(%d)"
1587 " executing from slave:%d\n", 1588 " executing from slave:%d\n",
@@ -1597,10 +1598,10 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
1597reset_slave: 1598reset_slave:
1598 /* cleanup any slave resources */ 1599 /* cleanup any slave resources */
1599 mlx4_delete_all_resources_for_slave(dev, slave); 1600 mlx4_delete_all_resources_for_slave(dev, slave);
1600 spin_lock(&priv->mfunc.master.slave_state_lock); 1601 spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
1601 if (!slave_state[slave].is_slave_going_down) 1602 if (!slave_state[slave].is_slave_going_down)
1602 slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; 1603 slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
1603 spin_unlock(&priv->mfunc.master.slave_state_lock); 1604 spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
1604 /*with slave in the middle of flr, no need to clean resources again.*/ 1605 /*with slave in the middle of flr, no need to clean resources again.*/
1605inform_slave_state: 1606inform_slave_state:
1606 memset(&slave_state[slave].event_eq, 0, 1607 memset(&slave_state[slave].event_eq, 0,
@@ -1755,7 +1756,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
1755 spin_lock_init(&s_state->lock); 1756 spin_lock_init(&s_state->lock);
1756 } 1757 }
1757 1758
1758 memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe)); 1759 memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
1759 priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; 1760 priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
1760 INIT_WORK(&priv->mfunc.master.comm_work, 1761 INIT_WORK(&priv->mfunc.master.comm_work,
1761 mlx4_master_comm_channel); 1762 mlx4_master_comm_channel);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index aa9c2f6cf3c0..b8d0854a7ad1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
51 int err; 51 int err;
52 52
53 cq->size = entries; 53 cq->size = entries;
54 cq->buf_size = cq->size * sizeof(struct mlx4_cqe); 54 cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
55 55
56 cq->ring = ring; 56 cq->ring = ring;
57 cq->is_tx = mode; 57 cq->is_tx = mode;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 7d1287f81a31..75a3f467bb5b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1604,6 +1604,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
1604 goto out; 1604 goto out;
1605 } 1605 }
1606 priv->rx_ring_num = prof->rx_ring_num; 1606 priv->rx_ring_num = prof->rx_ring_num;
1607 priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
1607 priv->mac_index = -1; 1608 priv->mac_index = -1;
1608 priv->msg_enable = MLX4_EN_MSG_LEVEL; 1609 priv->msg_enable = MLX4_EN_MSG_LEVEL;
1609 spin_lock_init(&priv->stats_lock); 1610 spin_lock_init(&priv->stats_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index f76c9671f362..fed26d867f4e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
566 struct ethhdr *ethh; 566 struct ethhdr *ethh;
567 dma_addr_t dma; 567 dma_addr_t dma;
568 u64 s_mac; 568 u64 s_mac;
569 int factor = priv->cqe_factor;
569 570
570 if (!priv->port_up) 571 if (!priv->port_up)
571 return 0; 572 return 0;
@@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
574 * descriptor offset can be deduced from the CQE index instead of 575 * descriptor offset can be deduced from the CQE index instead of
575 * reading 'cqe->index' */ 576 * reading 'cqe->index' */
576 index = cq->mcq.cons_index & ring->size_mask; 577 index = cq->mcq.cons_index & ring->size_mask;
577 cqe = &cq->buf[index]; 578 cqe = &cq->buf[(index << factor) + factor];
578 579
579 /* Process all completed CQEs */ 580 /* Process all completed CQEs */
580 while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, 581 while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
@@ -709,7 +710,7 @@ next:
709 710
710 ++cq->mcq.cons_index; 711 ++cq->mcq.cons_index;
711 index = (cq->mcq.cons_index) & ring->size_mask; 712 index = (cq->mcq.cons_index) & ring->size_mask;
712 cqe = &cq->buf[index]; 713 cqe = &cq->buf[(index << factor) + factor];
713 if (++polled == budget) 714 if (++polled == budget)
714 goto out; 715 goto out;
715 } 716 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 1f571d009155..2b799f4f1c37 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
315 struct mlx4_cqe *buf = cq->buf; 315 struct mlx4_cqe *buf = cq->buf;
316 u32 packets = 0; 316 u32 packets = 0;
317 u32 bytes = 0; 317 u32 bytes = 0;
318 int factor = priv->cqe_factor;
318 319
319 if (!priv->port_up) 320 if (!priv->port_up)
320 return; 321 return;
321 322
322 index = cons_index & size_mask; 323 index = cons_index & size_mask;
323 cqe = &buf[index]; 324 cqe = &buf[(index << factor) + factor];
324 ring_index = ring->cons & size_mask; 325 ring_index = ring->cons & size_mask;
325 326
326 /* Process all completed CQEs */ 327 /* Process all completed CQEs */
@@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
349 350
350 ++cons_index; 351 ++cons_index;
351 index = cons_index & size_mask; 352 index = cons_index & size_mask;
352 cqe = &buf[index]; 353 cqe = &buf[(index << factor) + factor];
353 } 354 }
354 355
355 356
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index c48cf6f6529c..251ae2f93116 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
101 mb(); 101 mb();
102} 102}
103 103
104static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry) 104static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
105{ 105{
106 unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE; 106 /* (entry & (eq->nent - 1)) gives us a cyclic array */
107 return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE; 107 unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
108 /* CX3 is capable of extending the EQE from 32 to 64 bytes.
109 * When this feature is enabled, the first (in the lower addresses)
110 * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
111 * contain the legacy EQE information.
112 */
113 return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
108} 114}
109 115
110static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq) 116static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
111{ 117{
112 struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index); 118 struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
113 return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe; 119 return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
114} 120}
115 121
@@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
177 return; 183 return;
178 } 184 }
179 185
180 memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1); 186 memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
181 s_eqe->slave_id = slave; 187 s_eqe->slave_id = slave;
182 /* ensure all information is written before setting the ownersip bit */ 188 /* ensure all information is written before setting the ownersip bit */
183 wmb(); 189 wmb();
@@ -401,6 +407,7 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
401 struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; 407 struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
402 int i; 408 int i;
403 int err; 409 int err;
410 unsigned long flags;
404 411
405 mlx4_dbg(dev, "mlx4_handle_slave_flr\n"); 412 mlx4_dbg(dev, "mlx4_handle_slave_flr\n");
406 413
@@ -412,10 +419,10 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
412 419
413 mlx4_delete_all_resources_for_slave(dev, i); 420 mlx4_delete_all_resources_for_slave(dev, i);
414 /*return the slave to running mode*/ 421 /*return the slave to running mode*/
415 spin_lock(&priv->mfunc.master.slave_state_lock); 422 spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
416 slave_state[i].last_cmd = MLX4_COMM_CMD_RESET; 423 slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
417 slave_state[i].is_slave_going_down = 0; 424 slave_state[i].is_slave_going_down = 0;
418 spin_unlock(&priv->mfunc.master.slave_state_lock); 425 spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
419 /*notify the FW:*/ 426 /*notify the FW:*/
420 err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE, 427 err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE,
421 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 428 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
@@ -440,8 +447,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
440 u8 update_slave_state; 447 u8 update_slave_state;
441 int i; 448 int i;
442 enum slave_port_gen_event gen_event; 449 enum slave_port_gen_event gen_event;
450 unsigned long flags;
443 451
444 while ((eqe = next_eqe_sw(eq))) { 452 while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
445 /* 453 /*
446 * Make sure we read EQ entry contents after we've 454 * Make sure we read EQ entry contents after we've
447 * checked the ownership bit. 455 * checked the ownership bit.
@@ -647,13 +655,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
647 } else 655 } else
648 update_slave_state = 1; 656 update_slave_state = 1;
649 657
650 spin_lock(&priv->mfunc.master.slave_state_lock); 658 spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
651 if (update_slave_state) { 659 if (update_slave_state) {
652 priv->mfunc.master.slave_state[flr_slave].active = false; 660 priv->mfunc.master.slave_state[flr_slave].active = false;
653 priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR; 661 priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR;
654 priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1; 662 priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
655 } 663 }
656 spin_unlock(&priv->mfunc.master.slave_state_lock); 664 spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
657 queue_work(priv->mfunc.master.comm_wq, 665 queue_work(priv->mfunc.master.comm_wq,
658 &priv->mfunc.master.slave_flr_event_work); 666 &priv->mfunc.master.slave_flr_event_work);
659 break; 667 break;
@@ -864,7 +872,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
864 872
865 eq->dev = dev; 873 eq->dev = dev;
866 eq->nent = roundup_pow_of_two(max(nent, 2)); 874 eq->nent = roundup_pow_of_two(max(nent, 2));
867 npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE; 875 /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
876 npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
868 877
869 eq->page_list = kmalloc(npages * sizeof *eq->page_list, 878 eq->page_list = kmalloc(npages * sizeof *eq->page_list,
870 GFP_KERNEL); 879 GFP_KERNEL);
@@ -966,8 +975,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
966 struct mlx4_priv *priv = mlx4_priv(dev); 975 struct mlx4_priv *priv = mlx4_priv(dev);
967 struct mlx4_cmd_mailbox *mailbox; 976 struct mlx4_cmd_mailbox *mailbox;
968 int err; 977 int err;
969 int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
970 int i; 978 int i;
979 /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
980 int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
971 981
972 mailbox = mlx4_alloc_cmd_mailbox(dev); 982 mailbox = mlx4_alloc_cmd_mailbox(dev);
973 if (IS_ERR(mailbox)) 983 if (IS_ERR(mailbox))
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 4f30b99324cf..9a9de51ecc91 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
110 [42] = "Multicast VEP steering support", 110 [42] = "Multicast VEP steering support",
111 [48] = "Counters support", 111 [48] = "Counters support",
112 [59] = "Port management change event support", 112 [59] = "Port management change event support",
113 [61] = "64 byte EQE support",
114 [62] = "64 byte CQE support",
113 }; 115 };
114 int i; 116 int i;
115 117
@@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
235 field = dev->caps.num_ports; 237 field = dev->caps.num_ports;
236 MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); 238 MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
237 239
238 size = 0; /* no PF behaviour is set for now */ 240 size = dev->caps.function_caps; /* set PF behaviours */
239 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); 241 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
240 242
241 field = 0; /* protected FMR support not available as yet */ 243 field = 0; /* protected FMR support not available as yet */
@@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
1237 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) 1239 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
1238 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); 1240 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
1239 1241
1242 /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
1243 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
1244 *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
1245 dev->caps.eqe_size = 64;
1246 dev->caps.eqe_factor = 1;
1247 } else {
1248 dev->caps.eqe_size = 32;
1249 dev->caps.eqe_factor = 0;
1250 }
1251
1252 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
1253 *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
1254 dev->caps.cqe_size = 64;
1255 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
1256 } else {
1257 dev->caps.cqe_size = 32;
1258 }
1259
1240 /* QPC/EEC/CQC/EQC/RDMARC attributes */ 1260 /* QPC/EEC/CQC/EQC/RDMARC attributes */
1241 1261
1242 MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); 1262 MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
@@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
1319 struct mlx4_cmd_mailbox *mailbox; 1339 struct mlx4_cmd_mailbox *mailbox;
1320 __be32 *outbox; 1340 __be32 *outbox;
1321 int err; 1341 int err;
1342 u8 byte_field;
1322 1343
1323#define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 1344#define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04
1324 1345
@@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
1370 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); 1391 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
1371 } 1392 }
1372 1393
1394 /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
1395 MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS);
1396 if (byte_field & 0x20) /* 64-bytes eqe enabled */
1397 param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
1398 if (byte_field & 0x40) /* 64-bytes cqe enabled */
1399 param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
1400
1373 /* TPT attributes */ 1401 /* TPT attributes */
1374 1402
1375 MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); 1403 MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 85abe9c11a22..2c2e7ade2a34 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -172,6 +172,7 @@ struct mlx4_init_hca_param {
172 u8 log_uar_sz; 172 u8 log_uar_sz;
173 u8 uar_page_sz; /* log pg sz in 4k chunks */ 173 u8 uar_page_sz; /* log pg sz in 4k chunks */
174 u8 fs_hash_enable_bits; 174 u8 fs_hash_enable_bits;
175 u64 dev_cap_enabled;
175}; 176};
176 177
177struct mlx4_init_ib_param { 178struct mlx4_init_ib_param {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 200cc0ec8052..b2acbe7706a3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
95 " Not in use with device managed" 95 " Not in use with device managed"
96 " flow steering"); 96 " flow steering");
97 97
98static bool enable_64b_cqe_eqe;
99module_param(enable_64b_cqe_eqe, bool, 0444);
100MODULE_PARM_DESC(enable_64b_cqe_eqe,
101 "Enable 64 byte CQEs/EQEs when the the FW supports this");
102
98#define HCA_GLOBAL_CAP_MASK 0 103#define HCA_GLOBAL_CAP_MASK 0
99#define PF_CONTEXT_BEHAVIOUR_MASK 0 104
105#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE
100 106
101static char mlx4_version[] = 107static char mlx4_version[] =
102 DRV_NAME ": Mellanox ConnectX core driver v" 108 DRV_NAME ": Mellanox ConnectX core driver v"
@@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
386 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; 392 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
387 393
388 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; 394 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
395
396 if (!enable_64b_cqe_eqe) {
397 if (dev_cap->flags &
398 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
399 mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
400 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
401 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
402 }
403 }
404
405 if ((dev_cap->flags &
406 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
407 mlx4_is_master(dev))
408 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
409
389 return 0; 410 return 0;
390} 411}
391/*The function checks if there are live vf, return the num of them*/ 412/*The function checks if there are live vf, return the num of them*/
@@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
599 goto err_mem; 620 goto err_mem;
600 } 621 }
601 622
623 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
624 dev->caps.eqe_size = 64;
625 dev->caps.eqe_factor = 1;
626 } else {
627 dev->caps.eqe_size = 32;
628 dev->caps.eqe_factor = 0;
629 }
630
631 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
632 dev->caps.cqe_size = 64;
633 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
634 } else {
635 dev->caps.cqe_size = 32;
636 }
637
602 return 0; 638 return 0;
603 639
604err_mem: 640err_mem:
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 334ec483480b..8d54412ada63 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -473,6 +473,7 @@ struct mlx4_en_priv {
473 int mac_index; 473 int mac_index;
474 unsigned max_mtu; 474 unsigned max_mtu;
475 int base_qpn; 475 int base_qpn;
476 int cqe_factor;
476 477
477 struct mlx4_en_rss_map rss_map; 478 struct mlx4_en_rss_map rss_map;
478 __be32 ctrl_flags; 479 __be32 ctrl_flags;
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index 21a045e0559f..f379c7f3034c 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -38,7 +38,7 @@ struct srp_host_attrs {
38#define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data) 38#define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data)
39 39
40#define SRP_HOST_ATTRS 0 40#define SRP_HOST_ATTRS 0
41#define SRP_RPORT_ATTRS 2 41#define SRP_RPORT_ATTRS 3
42 42
43struct srp_internal { 43struct srp_internal {
44 struct scsi_transport_template t; 44 struct scsi_transport_template t;
@@ -47,7 +47,6 @@ struct srp_internal {
47 struct device_attribute *host_attrs[SRP_HOST_ATTRS + 1]; 47 struct device_attribute *host_attrs[SRP_HOST_ATTRS + 1];
48 48
49 struct device_attribute *rport_attrs[SRP_RPORT_ATTRS + 1]; 49 struct device_attribute *rport_attrs[SRP_RPORT_ATTRS + 1];
50 struct device_attribute private_rport_attrs[SRP_RPORT_ATTRS];
51 struct transport_container rport_attr_cont; 50 struct transport_container rport_attr_cont;
52}; 51};
53 52
@@ -72,24 +71,6 @@ static DECLARE_TRANSPORT_CLASS(srp_host_class, "srp_host", srp_host_setup,
72static DECLARE_TRANSPORT_CLASS(srp_rport_class, "srp_remote_ports", 71static DECLARE_TRANSPORT_CLASS(srp_rport_class, "srp_remote_ports",
73 NULL, NULL, NULL); 72 NULL, NULL, NULL);
74 73
75#define SETUP_TEMPLATE(attrb, field, perm, test, ro_test, ro_perm) \
76 i->private_##attrb[count] = dev_attr_##field; \
77 i->private_##attrb[count].attr.mode = perm; \
78 if (ro_test) { \
79 i->private_##attrb[count].attr.mode = ro_perm; \
80 i->private_##attrb[count].store = NULL; \
81 } \
82 i->attrb[count] = &i->private_##attrb[count]; \
83 if (test) \
84 count++
85
86#define SETUP_RPORT_ATTRIBUTE_RD(field) \
87 SETUP_TEMPLATE(rport_attrs, field, S_IRUGO, 1, 0, 0)
88
89#define SETUP_RPORT_ATTRIBUTE_RW(field) \
90 SETUP_TEMPLATE(rport_attrs, field, S_IRUGO | S_IWUSR, \
91 1, 1, S_IRUGO)
92
93#define SRP_PID(p) \ 74#define SRP_PID(p) \
94 (p)->port_id[0], (p)->port_id[1], (p)->port_id[2], (p)->port_id[3], \ 75 (p)->port_id[0], (p)->port_id[1], (p)->port_id[2], (p)->port_id[3], \
95 (p)->port_id[4], (p)->port_id[5], (p)->port_id[6], (p)->port_id[7], \ 76 (p)->port_id[4], (p)->port_id[5], (p)->port_id[6], (p)->port_id[7], \
@@ -135,6 +116,24 @@ show_srp_rport_roles(struct device *dev, struct device_attribute *attr,
135 116
136static DEVICE_ATTR(roles, S_IRUGO, show_srp_rport_roles, NULL); 117static DEVICE_ATTR(roles, S_IRUGO, show_srp_rport_roles, NULL);
137 118
119static ssize_t store_srp_rport_delete(struct device *dev,
120 struct device_attribute *attr,
121 const char *buf, size_t count)
122{
123 struct srp_rport *rport = transport_class_to_srp_rport(dev);
124 struct Scsi_Host *shost = dev_to_shost(dev);
125 struct srp_internal *i = to_srp_internal(shost->transportt);
126
127 if (i->f->rport_delete) {
128 i->f->rport_delete(rport);
129 return count;
130 } else {
131 return -ENOSYS;
132 }
133}
134
135static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete);
136
138static void srp_rport_release(struct device *dev) 137static void srp_rport_release(struct device *dev)
139{ 138{
140 struct srp_rport *rport = dev_to_rport(dev); 139 struct srp_rport *rport = dev_to_rport(dev);
@@ -324,12 +323,16 @@ srp_attach_transport(struct srp_function_template *ft)
324 i->rport_attr_cont.ac.attrs = &i->rport_attrs[0]; 323 i->rport_attr_cont.ac.attrs = &i->rport_attrs[0];
325 i->rport_attr_cont.ac.class = &srp_rport_class.class; 324 i->rport_attr_cont.ac.class = &srp_rport_class.class;
326 i->rport_attr_cont.ac.match = srp_rport_match; 325 i->rport_attr_cont.ac.match = srp_rport_match;
327 transport_container_register(&i->rport_attr_cont);
328 326
329 count = 0; 327 count = 0;
330 SETUP_RPORT_ATTRIBUTE_RD(port_id); 328 i->rport_attrs[count++] = &dev_attr_port_id;
331 SETUP_RPORT_ATTRIBUTE_RD(roles); 329 i->rport_attrs[count++] = &dev_attr_roles;
332 i->rport_attrs[count] = NULL; 330 if (ft->rport_delete)
331 i->rport_attrs[count++] = &dev_attr_delete;
332 i->rport_attrs[count++] = NULL;
333 BUG_ON(count > ARRAY_SIZE(i->rport_attrs));
334
335 transport_container_register(&i->rport_attr_cont);
333 336
334 i->f = ft; 337 i->f = ft;
335 338
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6d1acb04cd17..21821da2abfd 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -142,6 +142,8 @@ enum {
142 MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, 142 MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48,
143 MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, 143 MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55,
144 MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, 144 MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
145 MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61,
146 MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62
145}; 147};
146 148
147enum { 149enum {
@@ -151,6 +153,20 @@ enum {
151 MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3 153 MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3
152}; 154};
153 155
156enum {
157 MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0,
158 MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1
159};
160
161enum {
162 MLX4_USER_DEV_CAP_64B_CQE = 1L << 0
163};
164
165enum {
166 MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0
167};
168
169
154#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) 170#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90)
155 171
156enum { 172enum {
@@ -419,6 +435,11 @@ struct mlx4_caps {
419 u32 max_counters; 435 u32 max_counters;
420 u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; 436 u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
421 u16 sqp_demux; 437 u16 sqp_demux;
438 u32 eqe_size;
439 u32 cqe_size;
440 u8 eqe_factor;
441 u32 userspace_caps; /* userspace must be aware of these */
442 u32 function_caps; /* VFs must be aware of these */
422}; 443};
423 444
424struct mlx4_buf_list { 445struct mlx4_buf_list {
diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild
index ea56f76c0c22..e69de29bb2d1 100644
--- a/include/rdma/Kbuild
+++ b/include/rdma/Kbuild
@@ -1,6 +0,0 @@
1header-y += ib_user_cm.h
2header-y += ib_user_mad.h
3header-y += ib_user_sa.h
4header-y += ib_user_verbs.h
5header-y += rdma_netlink.h
6header-y += rdma_user_cm.h
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index bd3d8b24b420..e38de79eeb48 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -1,41 +1,9 @@
1#ifndef _RDMA_NETLINK_H 1#ifndef _RDMA_NETLINK_H
2#define _RDMA_NETLINK_H 2#define _RDMA_NETLINK_H
3 3
4#include <linux/types.h>
5
6enum {
7 RDMA_NL_RDMA_CM = 1
8};
9
10#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
11#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1))
12#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op)
13
14enum {
15 RDMA_NL_RDMA_CM_ID_STATS = 0,
16 RDMA_NL_RDMA_CM_NUM_OPS
17};
18
19enum {
20 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1,
21 RDMA_NL_RDMA_CM_ATTR_DST_ADDR,
22 RDMA_NL_RDMA_CM_NUM_ATTR,
23};
24
25struct rdma_cm_id_stats {
26 __u32 qp_num;
27 __u32 bound_dev_if;
28 __u32 port_space;
29 __s32 pid;
30 __u8 cm_state;
31 __u8 node_type;
32 __u8 port_num;
33 __u8 qp_type;
34};
35
36#ifdef __KERNEL__
37 4
38#include <linux/netlink.h> 5#include <linux/netlink.h>
6#include <uapi/rdma/rdma_netlink.h>
39 7
40struct ibnl_client_cbs { 8struct ibnl_client_cbs {
41 int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); 9 int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb);
@@ -88,6 +56,4 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
88int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, 56int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
89 int len, void *data, int type); 57 int len, void *data, int type);
90 58
91#endif /* __KERNEL__ */
92
93#endif /* _RDMA_NETLINK_H */ 59#endif /* _RDMA_NETLINK_H */
diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h
index 9c60ca1c08c5..ff0f04ac91aa 100644
--- a/include/scsi/scsi_transport_srp.h
+++ b/include/scsi/scsi_transport_srp.h
@@ -14,13 +14,21 @@ struct srp_rport_identifiers {
14}; 14};
15 15
16struct srp_rport { 16struct srp_rport {
17 /* for initiator and target drivers */
18
17 struct device dev; 19 struct device dev;
18 20
19 u8 port_id[16]; 21 u8 port_id[16];
20 u8 roles; 22 u8 roles;
23
24 /* for initiator drivers */
25
26 void *lld_data; /* LLD private data */
21}; 27};
22 28
23struct srp_function_template { 29struct srp_function_template {
30 /* for initiator drivers */
31 void (*rport_delete)(struct srp_rport *rport);
24 /* for target drivers */ 32 /* for target drivers */
25 int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int); 33 int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int);
26 int (* it_nexus_response)(struct Scsi_Host *, u64, int); 34 int (* it_nexus_response)(struct Scsi_Host *, u64, int);
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index aafaa5aa54d4..687ae332200f 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -1 +1,7 @@
1# UAPI Header export list 1# UAPI Header export list
2header-y += ib_user_cm.h
3header-y += ib_user_mad.h
4header-y += ib_user_sa.h
5header-y += ib_user_verbs.h
6header-y += rdma_netlink.h
7header-y += rdma_user_cm.h
diff --git a/include/rdma/ib_user_cm.h b/include/uapi/rdma/ib_user_cm.h
index f79014aa28f9..f79014aa28f9 100644
--- a/include/rdma/ib_user_cm.h
+++ b/include/uapi/rdma/ib_user_cm.h
diff --git a/include/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h
index d6fce1cbdb90..d6fce1cbdb90 100644
--- a/include/rdma/ib_user_mad.h
+++ b/include/uapi/rdma/ib_user_mad.h
diff --git a/include/rdma/ib_user_sa.h b/include/uapi/rdma/ib_user_sa.h
index cfc7c9ba781e..cfc7c9ba781e 100644
--- a/include/rdma/ib_user_sa.h
+++ b/include/uapi/rdma/ib_user_sa.h
diff --git a/include/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 81aba3a73aa3..81aba3a73aa3 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
new file mode 100644
index 000000000000..8297285b6288
--- /dev/null
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -0,0 +1,37 @@
1#ifndef _UAPI_RDMA_NETLINK_H
2#define _UAPI_RDMA_NETLINK_H
3
4#include <linux/types.h>
5
6enum {
7 RDMA_NL_RDMA_CM = 1
8};
9
10#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
11#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1))
12#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op)
13
14enum {
15 RDMA_NL_RDMA_CM_ID_STATS = 0,
16 RDMA_NL_RDMA_CM_NUM_OPS
17};
18
19enum {
20 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1,
21 RDMA_NL_RDMA_CM_ATTR_DST_ADDR,
22 RDMA_NL_RDMA_CM_NUM_ATTR,
23};
24
25struct rdma_cm_id_stats {
26 __u32 qp_num;
27 __u32 bound_dev_if;
28 __u32 port_space;
29 __s32 pid;
30 __u8 cm_state;
31 __u8 node_type;
32 __u8 port_num;
33 __u8 qp_type;
34};
35
36
37#endif /* _UAPI_RDMA_NETLINK_H */
diff --git a/include/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 1ee9239ff8c2..1ee9239ff8c2 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h