aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDoug Ledford <dledford@redhat.com>2016-02-03 11:10:58 -0500
committerDoug Ledford <dledford@redhat.com>2016-02-03 11:10:58 -0500
commitb85d9905a7ca128f24e3a4e60ff2a1b0cd58ae7c (patch)
tree1c499fa47a01d4a44425c9929f2cc0fd5baf2ea0
parente581d111dad3781266ae1abe1d2848e69406deb5 (diff)
staging/rdma: remove deprecated ipath driver
This driver was moved to staging for eventual deletion. Time to complete that task. Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--MAINTAINERS6
-rw-r--r--drivers/staging/rdma/Kconfig2
-rw-r--r--drivers/staging/rdma/Makefile1
-rw-r--r--drivers/staging/rdma/ipath/Kconfig16
-rw-r--r--drivers/staging/rdma/ipath/Makefile37
-rw-r--r--drivers/staging/rdma/ipath/TODO5
-rw-r--r--drivers/staging/rdma/ipath/ipath_common.h851
-rw-r--r--drivers/staging/rdma/ipath/ipath_cq.c483
-rw-r--r--drivers/staging/rdma/ipath/ipath_debug.h99
-rw-r--r--drivers/staging/rdma/ipath/ipath_diag.c551
-rw-r--r--drivers/staging/rdma/ipath/ipath_dma.c179
-rw-r--r--drivers/staging/rdma/ipath/ipath_driver.c2784
-rw-r--r--drivers/staging/rdma/ipath/ipath_eeprom.c1183
-rw-r--r--drivers/staging/rdma/ipath/ipath_file_ops.c2619
-rw-r--r--drivers/staging/rdma/ipath/ipath_fs.c415
-rw-r--r--drivers/staging/rdma/ipath/ipath_iba6110.c1939
-rw-r--r--drivers/staging/rdma/ipath/ipath_init_chip.c1062
-rw-r--r--drivers/staging/rdma/ipath/ipath_intr.c1271
-rw-r--r--drivers/staging/rdma/ipath/ipath_kernel.h1374
-rw-r--r--drivers/staging/rdma/ipath/ipath_keys.c270
-rw-r--r--drivers/staging/rdma/ipath/ipath_mad.c1521
-rw-r--r--drivers/staging/rdma/ipath/ipath_mmap.c174
-rw-r--r--drivers/staging/rdma/ipath/ipath_mr.c370
-rw-r--r--drivers/staging/rdma/ipath/ipath_qp.c1079
-rw-r--r--drivers/staging/rdma/ipath/ipath_rc.c1969
-rw-r--r--drivers/staging/rdma/ipath/ipath_registers.h512
-rw-r--r--drivers/staging/rdma/ipath/ipath_ruc.c733
-rw-r--r--drivers/staging/rdma/ipath/ipath_sdma.c818
-rw-r--r--drivers/staging/rdma/ipath/ipath_srq.c380
-rw-r--r--drivers/staging/rdma/ipath/ipath_stats.c347
-rw-r--r--drivers/staging/rdma/ipath/ipath_sysfs.c1237
-rw-r--r--drivers/staging/rdma/ipath/ipath_uc.c547
-rw-r--r--drivers/staging/rdma/ipath/ipath_ud.c579
-rw-r--r--drivers/staging/rdma/ipath/ipath_user_pages.c228
-rw-r--r--drivers/staging/rdma/ipath/ipath_user_sdma.c874
-rw-r--r--drivers/staging/rdma/ipath/ipath_user_sdma.h52
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.c2376
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.h941
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs_mcast.c363
-rw-r--r--drivers/staging/rdma/ipath/ipath_wc_ppc64.c49
-rw-r--r--drivers/staging/rdma/ipath/ipath_wc_x86_64.c144
41 files changed, 0 insertions, 30440 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index d1aeb1d5add5..5c889cd754c3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5795,12 +5795,6 @@ M: Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
5795S: Maintained 5795S: Maintained
5796F: net/ipv4/netfilter/ipt_MASQUERADE.c 5796F: net/ipv4/netfilter/ipt_MASQUERADE.c
5797 5797
5798IPATH DRIVER
5799M: Mike Marciniszyn <infinipath@intel.com>
5800L: linux-rdma@vger.kernel.org
5801S: Maintained
5802F: drivers/staging/rdma/ipath/
5803
5804IPMI SUBSYSTEM 5798IPMI SUBSYSTEM
5805M: Corey Minyard <minyard@acm.org> 5799M: Corey Minyard <minyard@acm.org>
5806L: openipmi-developer@lists.sourceforge.net (moderated for non-subscribers) 5800L: openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig
index 8a0be6961d7a..f1f3ecadf0fb 100644
--- a/drivers/staging/rdma/Kconfig
+++ b/drivers/staging/rdma/Kconfig
@@ -24,6 +24,4 @@ if STAGING_RDMA
24 24
25source "drivers/staging/rdma/hfi1/Kconfig" 25source "drivers/staging/rdma/hfi1/Kconfig"
26 26
27source "drivers/staging/rdma/ipath/Kconfig"
28
29endif 27endif
diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile
index 08e1919c819e..8c7fc1de48a7 100644
--- a/drivers/staging/rdma/Makefile
+++ b/drivers/staging/rdma/Makefile
@@ -1,3 +1,2 @@
1# Entries for RDMA_STAGING tree 1# Entries for RDMA_STAGING tree
2obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ 2obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
3obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
diff --git a/drivers/staging/rdma/ipath/Kconfig b/drivers/staging/rdma/ipath/Kconfig
deleted file mode 100644
index 041ce0634968..000000000000
--- a/drivers/staging/rdma/ipath/Kconfig
+++ /dev/null
@@ -1,16 +0,0 @@
1config INFINIBAND_IPATH
2 tristate "QLogic HTX HCA support"
3 depends on 64BIT && NET && HT_IRQ
4 ---help---
5 This is a driver for the deprecated QLogic Hyper-Transport
6 IB host channel adapter (model QHT7140),
7 including InfiniBand verbs support. This driver allows these
8 devices to be used with both kernel upper level protocols such
9 as IP-over-InfiniBand as well as with userspace applications
10 (in conjunction with InfiniBand userspace access).
11 For QLogic PCIe QLE based cards, use the QIB driver instead.
12
13 If you have this hardware you will need to boot with PAT disabled
14 on your x86-64 systems, use the nopat kernel parameter.
15
16 Note that this driver will soon be removed entirely from the kernel.
diff --git a/drivers/staging/rdma/ipath/Makefile b/drivers/staging/rdma/ipath/Makefile
deleted file mode 100644
index 4496f2820c92..000000000000
--- a/drivers/staging/rdma/ipath/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
1ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \
2 -DIPATH_KERN_TYPE=0
3
4obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
5
6ib_ipath-y := \
7 ipath_cq.o \
8 ipath_diag.o \
9 ipath_dma.o \
10 ipath_driver.o \
11 ipath_eeprom.o \
12 ipath_file_ops.o \
13 ipath_fs.o \
14 ipath_init_chip.o \
15 ipath_intr.o \
16 ipath_keys.o \
17 ipath_mad.o \
18 ipath_mmap.o \
19 ipath_mr.o \
20 ipath_qp.o \
21 ipath_rc.o \
22 ipath_ruc.o \
23 ipath_sdma.o \
24 ipath_srq.o \
25 ipath_stats.o \
26 ipath_sysfs.o \
27 ipath_uc.o \
28 ipath_ud.o \
29 ipath_user_pages.o \
30 ipath_user_sdma.o \
31 ipath_verbs_mcast.o \
32 ipath_verbs.o
33
34ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
35
36ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
37ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/staging/rdma/ipath/TODO b/drivers/staging/rdma/ipath/TODO
deleted file mode 100644
index cb00158d64c8..000000000000
--- a/drivers/staging/rdma/ipath/TODO
+++ /dev/null
@@ -1,5 +0,0 @@
1The ipath driver has been moved to staging in preparation for its removal in a
2few releases. The driver will be deleted during the 4.6 merge window.
3
4Contact Dennis Dalessandro <dennis.dalessandro@intel.com> and
5Cc: linux-rdma@vger.kernel.org
diff --git a/drivers/staging/rdma/ipath/ipath_common.h b/drivers/staging/rdma/ipath/ipath_common.h
deleted file mode 100644
index 28cfe97cf1e9..000000000000
--- a/drivers/staging/rdma/ipath/ipath_common.h
+++ /dev/null
@@ -1,851 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef _IPATH_COMMON_H
35#define _IPATH_COMMON_H
36
37/*
38 * This file contains defines, structures, etc. that are used
39 * to communicate between kernel and user code.
40 */
41
42
43/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */
44#define IPATH_SRC_OUI_1 0x00
45#define IPATH_SRC_OUI_2 0x11
46#define IPATH_SRC_OUI_3 0x75
47
48/* version of protocol header (known to chip also). In the long run,
49 * we should be able to generate and accept a range of version numbers;
50 * for now we only accept one, and it's compiled in.
51 */
52#define IPS_PROTO_VERSION 2
53
54/*
55 * These are compile time constants that you may want to enable or disable
56 * if you are trying to debug problems with code or performance.
57 * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in
58 * fastpath code
59 * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be
60 * traced in faspath code
61 * _IPATH_TRACING define as 0 if you want to remove all tracing in a
62 * compilation unit
63 * _IPATH_DEBUGGING define as 0 if you want to remove debug prints
64 */
65
66/*
67 * The value in the BTH QP field that InfiniPath uses to differentiate
68 * an infinipath protocol IB packet vs standard IB transport
69 */
70#define IPATH_KD_QP 0x656b79
71
72/*
73 * valid states passed to ipath_set_linkstate() user call
74 */
75#define IPATH_IB_LINKDOWN 0
76#define IPATH_IB_LINKARM 1
77#define IPATH_IB_LINKACTIVE 2
78#define IPATH_IB_LINKDOWN_ONLY 3
79#define IPATH_IB_LINKDOWN_SLEEP 4
80#define IPATH_IB_LINKDOWN_DISABLE 5
81#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
82#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
83#define IPATH_IB_LINK_NO_HRTBT 8 /* disable Heartbeat, e.g. for loopback */
84#define IPATH_IB_LINK_HRTBT 9 /* enable heartbeat, normal, non-loopback */
85
86/*
87 * These 3 values (SDR and DDR may be ORed for auto-speed
88 * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
89 * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs. They
90 * are also the the possible values for ipath_link_speed_enabled and active
91 * The values were chosen to match values used within the IB spec.
92 */
93#define IPATH_IB_SDR 1
94#define IPATH_IB_DDR 2
95
96/*
97 * stats maintained by the driver. For now, at least, this is global
98 * to all minor devices.
99 */
100struct infinipath_stats {
101 /* number of interrupts taken */
102 __u64 sps_ints;
103 /* number of interrupts for errors */
104 __u64 sps_errints;
105 /* number of errors from chip (not incl. packet errors or CRC) */
106 __u64 sps_errs;
107 /* number of packet errors from chip other than CRC */
108 __u64 sps_pkterrs;
109 /* number of packets with CRC errors (ICRC and VCRC) */
110 __u64 sps_crcerrs;
111 /* number of hardware errors reported (parity, etc.) */
112 __u64 sps_hwerrs;
113 /* number of times IB link changed state unexpectedly */
114 __u64 sps_iblink;
115 __u64 sps_unused; /* was fastrcvint, no longer implemented */
116 /* number of kernel (port0) packets received */
117 __u64 sps_port0pkts;
118 /* number of "ethernet" packets sent by driver */
119 __u64 sps_ether_spkts;
120 /* number of "ethernet" packets received by driver */
121 __u64 sps_ether_rpkts;
122 /* number of SMA packets sent by driver. Obsolete. */
123 __u64 sps_sma_spkts;
124 /* number of SMA packets received by driver. Obsolete. */
125 __u64 sps_sma_rpkts;
126 /* number of times all ports rcvhdrq was full and packet dropped */
127 __u64 sps_hdrqfull;
128 /* number of times all ports egrtid was full and packet dropped */
129 __u64 sps_etidfull;
130 /*
131 * number of times we tried to send from driver, but no pio buffers
132 * avail
133 */
134 __u64 sps_nopiobufs;
135 /* number of ports currently open */
136 __u64 sps_ports;
137 /* list of pkeys (other than default) accepted (0 means not set) */
138 __u16 sps_pkeys[4];
139 __u16 sps_unused16[4]; /* available; maintaining compatible layout */
140 /* number of user ports per chip (not IB ports) */
141 __u32 sps_nports;
142 /* not our interrupt, or already handled */
143 __u32 sps_nullintr;
144 /* max number of packets handled per receive call */
145 __u32 sps_maxpkts_call;
146 /* avg number of packets handled per receive call */
147 __u32 sps_avgpkts_call;
148 /* total number of pages locked */
149 __u64 sps_pagelocks;
150 /* total number of pages unlocked */
151 __u64 sps_pageunlocks;
152 /*
153 * Number of packets dropped in kernel other than errors (ether
154 * packets if ipath not configured, etc.)
155 */
156 __u64 sps_krdrops;
157 __u64 sps_txeparity; /* PIO buffer parity error, recovered */
158 /* pad for future growth */
159 __u64 __sps_pad[45];
160};
161
162/*
163 * These are the status bits readable (in ascii form, 64bit value)
164 * from the "status" sysfs file.
165 */
166#define IPATH_STATUS_INITTED 0x1 /* basic initialization done */
167#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */
168/* Device has been disabled via admin request */
169#define IPATH_STATUS_ADMIN_DISABLED 0x4
170/* Chip has been found and initted */
171#define IPATH_STATUS_CHIP_PRESENT 0x20
172/* IB link is at ACTIVE, usable for data traffic */
173#define IPATH_STATUS_IB_READY 0x40
174/* link is configured, LID, MTU, etc. have been set */
175#define IPATH_STATUS_IB_CONF 0x80
176/* no link established, probably no cable */
177#define IPATH_STATUS_IB_NOCABLE 0x100
178/* A Fatal hardware error has occurred. */
179#define IPATH_STATUS_HWERROR 0x200
180
181/*
182 * The list of usermode accessible registers. Also see Reg_* later in file.
183 */
184typedef enum _ipath_ureg {
185 /* (RO) DMA RcvHdr to be used next. */
186 ur_rcvhdrtail = 0,
187 /* (RW) RcvHdr entry to be processed next by host. */
188 ur_rcvhdrhead = 1,
189 /* (RO) Index of next Eager index to use. */
190 ur_rcvegrindextail = 2,
191 /* (RW) Eager TID to be processed next */
192 ur_rcvegrindexhead = 3,
193 /* For internal use only; max register number. */
194 _IPATH_UregMax
195} ipath_ureg;
196
197/* bit values for spi_runtime_flags */
198#define IPATH_RUNTIME_HT 0x1
199#define IPATH_RUNTIME_PCIE 0x2
200#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
201#define IPATH_RUNTIME_RCVHDR_COPY 0x8
202#define IPATH_RUNTIME_MASTER 0x10
203#define IPATH_RUNTIME_NODMA_RTAIL 0x80
204#define IPATH_RUNTIME_SDMA 0x200
205#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
206#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
207
208/*
209 * This structure is returned by ipath_userinit() immediately after
210 * open to get implementation-specific info, and info specific to this
211 * instance.
212 *
213 * This struct must have explict pad fields where type sizes
214 * may result in different alignments between 32 and 64 bit
215 * programs, since the 64 bit * bit kernel requires the user code
216 * to have matching offsets
217 */
218struct ipath_base_info {
219 /* version of hardware, for feature checking. */
220 __u32 spi_hw_version;
221 /* version of software, for feature checking. */
222 __u32 spi_sw_version;
223 /* InfiniPath port assigned, goes into sent packets */
224 __u16 spi_port;
225 __u16 spi_subport;
226 /*
227 * IB MTU, packets IB data must be less than this.
228 * The MTU is in bytes, and will be a multiple of 4 bytes.
229 */
230 __u32 spi_mtu;
231 /*
232 * Size of a PIO buffer. Any given packet's total size must be less
233 * than this (in words). Included is the starting control word, so
234 * if 513 is returned, then total pkt size is 512 words or less.
235 */
236 __u32 spi_piosize;
237 /* size of the TID cache in infinipath, in entries */
238 __u32 spi_tidcnt;
239 /* size of the TID Eager list in infinipath, in entries */
240 __u32 spi_tidegrcnt;
241 /* size of a single receive header queue entry in words. */
242 __u32 spi_rcvhdrent_size;
243 /*
244 * Count of receive header queue entries allocated.
245 * This may be less than the spu_rcvhdrcnt passed in!.
246 */
247 __u32 spi_rcvhdr_cnt;
248
249 /* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */
250 __u32 spi_runtime_flags;
251
252 /* address where receive buffer queue is mapped into */
253 __u64 spi_rcvhdr_base;
254
255 /* user program. */
256
257 /* base address of eager TID receive buffers. */
258 __u64 spi_rcv_egrbufs;
259
260 /* Allocated by initialization code, not by protocol. */
261
262 /*
263 * Size of each TID buffer in host memory, starting at
264 * spi_rcv_egrbufs. The buffers are virtually contiguous.
265 */
266 __u32 spi_rcv_egrbufsize;
267 /*
268 * The special QP (queue pair) value that identifies an infinipath
269 * protocol packet from standard IB packets. More, probably much
270 * more, to be added.
271 */
272 __u32 spi_qpair;
273
274 /*
275 * User register base for init code, not to be used directly by
276 * protocol or applications.
277 */
278 __u64 __spi_uregbase;
279 /*
280 * Maximum buffer size in bytes that can be used in a single TID
281 * entry (assuming the buffer is aligned to this boundary). This is
282 * the minimum of what the hardware and software support Guaranteed
283 * to be a power of 2.
284 */
285 __u32 spi_tid_maxsize;
286 /*
287 * alignment of each pio send buffer (byte count
288 * to add to spi_piobufbase to get to second buffer)
289 */
290 __u32 spi_pioalign;
291 /*
292 * The index of the first pio buffer available to this process;
293 * needed to do lookup in spi_pioavailaddr; not added to
294 * spi_piobufbase.
295 */
296 __u32 spi_pioindex;
297 /* number of buffers mapped for this process */
298 __u32 spi_piocnt;
299
300 /*
301 * Base address of writeonly pio buffers for this process.
302 * Each buffer has spi_piosize words, and is aligned on spi_pioalign
303 * boundaries. spi_piocnt buffers are mapped from this address
304 */
305 __u64 spi_piobufbase;
306
307 /*
308 * Base address of readonly memory copy of the pioavail registers.
309 * There are 2 bits for each buffer.
310 */
311 __u64 spi_pioavailaddr;
312
313 /*
314 * Address where driver updates a copy of the interface and driver
315 * status (IPATH_STATUS_*) as a 64 bit value. It's followed by a
316 * string indicating hardware error, if there was one.
317 */
318 __u64 spi_status;
319
320 /* number of chip ports available to user processes */
321 __u32 spi_nports;
322 /* unit number of chip we are using */
323 __u32 spi_unit;
324 /* num bufs in each contiguous set */
325 __u32 spi_rcv_egrperchunk;
326 /* size in bytes of each contiguous set */
327 __u32 spi_rcv_egrchunksize;
328 /* total size of mmap to cover full rcvegrbuffers */
329 __u32 spi_rcv_egrbuftotlen;
330 __u32 spi_filler_for_align;
331 /* address of readonly memory copy of the rcvhdrq tail register. */
332 __u64 spi_rcvhdr_tailaddr;
333
334 /* shared memory pages for subports if port is shared */
335 __u64 spi_subport_uregbase;
336 __u64 spi_subport_rcvegrbuf;
337 __u64 spi_subport_rcvhdr_base;
338
339 /* shared memory page for hardware port if it is shared */
340 __u64 spi_port_uregbase;
341 __u64 spi_port_rcvegrbuf;
342 __u64 spi_port_rcvhdr_base;
343 __u64 spi_port_rcvhdr_tailaddr;
344
345} __attribute__ ((aligned(8)));
346
347
348/*
349 * This version number is given to the driver by the user code during
350 * initialization in the spu_userversion field of ipath_user_info, so
351 * the driver can check for compatibility with user code.
352 *
353 * The major version changes when data structures
354 * change in an incompatible way. The driver must be the same or higher
355 * for initialization to succeed. In some cases, a higher version
356 * driver will not interoperate with older software, and initialization
357 * will return an error.
358 */
359#define IPATH_USER_SWMAJOR 1
360
361/*
362 * Minor version differences are always compatible
363 * a within a major version, however if user software is larger
364 * than driver software, some new features and/or structure fields
365 * may not be implemented; the user code must deal with this if it
366 * cares, or it must abort after initialization reports the difference.
367 */
368#define IPATH_USER_SWMINOR 6
369
370#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
371
372#define IPATH_KERN_TYPE 0
373
374/*
375 * Similarly, this is the kernel version going back to the user. It's
376 * slightly different, in that we want to tell if the driver was built as
377 * part of a QLogic release, or from the driver from openfabrics.org,
378 * kernel.org, or a standard distribution, for support reasons.
379 * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied.
380 *
381 * It's returned by the driver to the user code during initialization in the
382 * spi_sw_version field of ipath_base_info, so the user code can in turn
383 * check for compatibility with the kernel.
384*/
385#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION)
386
387/*
388 * This structure is passed to ipath_userinit() to tell the driver where
389 * user code buffers are, sizes, etc. The offsets and sizes of the
390 * fields must remain unchanged, for binary compatibility. It can
391 * be extended, if userversion is changed so user code can tell, if needed
392 */
393struct ipath_user_info {
394 /*
395 * version of user software, to detect compatibility issues.
396 * Should be set to IPATH_USER_SWVERSION.
397 */
398 __u32 spu_userversion;
399
400 /* desired number of receive header queue entries */
401 __u32 spu_rcvhdrcnt;
402
403 /* size of struct base_info to write to */
404 __u32 spu_base_info_size;
405
406 /*
407 * number of words in KD protocol header
408 * This tells InfiniPath how many words to copy to rcvhdrq. If 0,
409 * kernel uses a default. Once set, attempts to set any other value
410 * are an error (EAGAIN) until driver is reloaded.
411 */
412 __u32 spu_rcvhdrsize;
413
414 /*
415 * If two or more processes wish to share a port, each process
416 * must set the spu_subport_cnt and spu_subport_id to the same
417 * values. The only restriction on the spu_subport_id is that
418 * it be unique for a given node.
419 */
420 __u16 spu_subport_cnt;
421 __u16 spu_subport_id;
422
423 __u32 spu_unused; /* kept for compatible layout */
424
425 /*
426 * address of struct base_info to write to
427 */
428 __u64 spu_base_info;
429
430} __attribute__ ((aligned(8)));
431
432/* User commands. */
433
434#define IPATH_CMD_MIN 16
435
436#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */
437#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */
438#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */
439#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
440#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
441#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
442#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */
443#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
444#define IPATH_CMD_USER_INIT 24 /* set up userspace */
445#define IPATH_CMD_UNUSED_1 25
446#define IPATH_CMD_UNUSED_2 26
447#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
448#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */
449#define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */
450/* 30 is unused */
451#define IPATH_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */
452#define IPATH_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */
453
454/*
455 * Poll types
456 */
457#define IPATH_POLL_TYPE_URGENT 0x01
458#define IPATH_POLL_TYPE_OVERFLOW 0x02
459
460struct ipath_port_info {
461 __u32 num_active; /* number of active units */
462 __u32 unit; /* unit (chip) assigned to caller */
463 __u16 port; /* port on unit assigned to caller */
464 __u16 subport; /* subport on unit assigned to caller */
465 __u16 num_ports; /* number of ports available on unit */
466 __u16 num_subports; /* number of subports opened on port */
467};
468
469struct ipath_tid_info {
470 __u32 tidcnt;
471 /* make structure same size in 32 and 64 bit */
472 __u32 tid__unused;
473 /* virtual address of first page in transfer */
474 __u64 tidvaddr;
475 /* pointer (same size 32/64 bit) to __u16 tid array */
476 __u64 tidlist;
477
478 /*
479 * pointer (same size 32/64 bit) to bitmap of TIDs used
480 * for this call; checked for being large enough at open
481 */
482 __u64 tidmap;
483};
484
485struct ipath_cmd {
486 __u32 type; /* command type */
487 union {
488 struct ipath_tid_info tid_info;
489 struct ipath_user_info user_info;
490
491 /*
492 * address in userspace where we should put the sdma
493 * inflight counter
494 */
495 __u64 sdma_inflight;
496 /*
497 * address in userspace where we should put the sdma
498 * completion counter
499 */
500 __u64 sdma_complete;
501 /* address in userspace of struct ipath_port_info to
502 write result to */
503 __u64 port_info;
504 /* enable/disable receipt of packets */
505 __u32 recv_ctrl;
506 /* enable/disable armlaunch errors (non-zero to enable) */
507 __u32 armlaunch_ctrl;
508 /* partition key to set */
509 __u16 part_key;
510 /* user address of __u32 bitmask of active slaves */
511 __u64 slave_mask_addr;
512 /* type of polling we want */
513 __u16 poll_type;
514 } cmd;
515};
516
517struct ipath_iovec {
518 /* Pointer to data, but same size 32 and 64 bit */
519 __u64 iov_base;
520
521 /*
522 * Length of data; don't need 64 bits, but want
523 * ipath_sendpkt to remain same size as before 32 bit changes, so...
524 */
525 __u64 iov_len;
526};
527
528/*
529 * Describes a single packet for send. Each packet can have one or more
530 * buffers, but the total length (exclusive of IB headers) must be less
531 * than the MTU, and if using the PIO method, entire packet length,
532 * including IB headers, must be less than the ipath_piosize value (words).
533 * Use of this necessitates including sys/uio.h
534 */
535struct __ipath_sendpkt {
536 __u32 sps_flags; /* flags for packet (TBD) */
537 __u32 sps_cnt; /* number of entries to use in sps_iov */
538 /* array of iov's describing packet. TEMPORARY */
539 struct ipath_iovec sps_iov[4];
540};
541
542/*
543 * diagnostics can send a packet by "writing" one of the following
544 * two structs to diag data special file
545 * The first is the legacy version for backward compatibility
546 */
547struct ipath_diag_pkt {
548 __u32 unit;
549 __u64 data;
550 __u32 len;
551};
552
553/* The second diag_pkt struct is the expanded version that allows
554 * more control over the packet, specifically, by allowing a custom
555 * pbc (+ static rate) qword, so that special modes and deliberate
556 * changes to CRCs can be used. The elements were also re-ordered
557 * for better alignment and to avoid padding issues.
558 */
559struct ipath_diag_xpkt {
560 __u64 data;
561 __u64 pbc_wd;
562 __u32 unit;
563 __u32 len;
564};
565
566/*
567 * Data layout in I2C flash (for GUID, etc.)
568 * All fields are little-endian binary unless otherwise stated
569 */
570#define IPATH_FLASH_VERSION 2
571struct ipath_flash {
572 /* flash layout version (IPATH_FLASH_VERSION) */
573 __u8 if_fversion;
574 /* checksum protecting if_length bytes */
575 __u8 if_csum;
576 /*
577 * valid length (in use, protected by if_csum), including
578 * if_fversion and if_csum themselves)
579 */
580 __u8 if_length;
581 /* the GUID, in network order */
582 __u8 if_guid[8];
583 /* number of GUIDs to use, starting from if_guid */
584 __u8 if_numguid;
585 /* the (last 10 characters of) board serial number, in ASCII */
586 char if_serial[12];
587 /* board mfg date (YYYYMMDD ASCII) */
588 char if_mfgdate[8];
589 /* last board rework/test date (YYYYMMDD ASCII) */
590 char if_testdate[8];
591 /* logging of error counts, TBD */
592 __u8 if_errcntp[4];
593 /* powered on hours, updated at driver unload */
594 __u8 if_powerhour[2];
595 /* ASCII free-form comment field */
596 char if_comment[32];
597 /* Backwards compatible prefix for longer QLogic Serial Numbers */
598 char if_sprefix[4];
599 /* 82 bytes used, min flash size is 128 bytes */
600 __u8 if_future[46];
601};
602
603/*
604 * These are the counters implemented in the chip, and are listed in order.
605 * The InterCaps naming is taken straight from the chip spec.
606 */
607struct infinipath_counters {
608 __u64 LBIntCnt;
609 __u64 LBFlowStallCnt;
610 __u64 TxSDmaDescCnt; /* was Reserved1 */
611 __u64 TxUnsupVLErrCnt;
612 __u64 TxDataPktCnt;
613 __u64 TxFlowPktCnt;
614 __u64 TxDwordCnt;
615 __u64 TxLenErrCnt;
616 __u64 TxMaxMinLenErrCnt;
617 __u64 TxUnderrunCnt;
618 __u64 TxFlowStallCnt;
619 __u64 TxDroppedPktCnt;
620 __u64 RxDroppedPktCnt;
621 __u64 RxDataPktCnt;
622 __u64 RxFlowPktCnt;
623 __u64 RxDwordCnt;
624 __u64 RxLenErrCnt;
625 __u64 RxMaxMinLenErrCnt;
626 __u64 RxICRCErrCnt;
627 __u64 RxVCRCErrCnt;
628 __u64 RxFlowCtrlErrCnt;
629 __u64 RxBadFormatCnt;
630 __u64 RxLinkProblemCnt;
631 __u64 RxEBPCnt;
632 __u64 RxLPCRCErrCnt;
633 __u64 RxBufOvflCnt;
634 __u64 RxTIDFullErrCnt;
635 __u64 RxTIDValidErrCnt;
636 __u64 RxPKeyMismatchCnt;
637 __u64 RxP0HdrEgrOvflCnt;
638 __u64 RxP1HdrEgrOvflCnt;
639 __u64 RxP2HdrEgrOvflCnt;
640 __u64 RxP3HdrEgrOvflCnt;
641 __u64 RxP4HdrEgrOvflCnt;
642 __u64 RxP5HdrEgrOvflCnt;
643 __u64 RxP6HdrEgrOvflCnt;
644 __u64 RxP7HdrEgrOvflCnt;
645 __u64 RxP8HdrEgrOvflCnt;
646 __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */
647 __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */
648 __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */
649 __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */
650 __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */
651 __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */
652 __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */
653 __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */
654 __u64 IBStatusChangeCnt;
655 __u64 IBLinkErrRecoveryCnt;
656 __u64 IBLinkDownedCnt;
657 __u64 IBSymbolErrCnt;
658 /* The following are new for IBA7220 */
659 __u64 RxVL15DroppedPktCnt;
660 __u64 RxOtherLocalPhyErrCnt;
661 __u64 PcieRetryBufDiagQwordCnt;
662 __u64 ExcessBufferOvflCnt;
663 __u64 LocalLinkIntegrityErrCnt;
664 __u64 RxVlErrCnt;
665 __u64 RxDlidFltrCnt;
666};
667
668/*
669 * The next set of defines are for packet headers, and chip register
670 * and memory bits that are visible to and/or used by user-mode software
671 * The other bits that are used only by the driver or diags are in
672 * ipath_registers.h
673 */
674
675/* RcvHdrFlags bits */
676#define INFINIPATH_RHF_LENGTH_MASK 0x7FF
677#define INFINIPATH_RHF_LENGTH_SHIFT 0
678#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
679#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
680#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF
681#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
682#define INFINIPATH_RHF_SEQ_MASK 0xF
683#define INFINIPATH_RHF_SEQ_SHIFT 0
684#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF
685#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4
686#define INFINIPATH_RHF_H_ICRCERR 0x80000000
687#define INFINIPATH_RHF_H_VCRCERR 0x40000000
688#define INFINIPATH_RHF_H_PARITYERR 0x20000000
689#define INFINIPATH_RHF_H_LENERR 0x10000000
690#define INFINIPATH_RHF_H_MTUERR 0x08000000
691#define INFINIPATH_RHF_H_IHDRERR 0x04000000
692#define INFINIPATH_RHF_H_TIDERR 0x02000000
693#define INFINIPATH_RHF_H_MKERR 0x01000000
694#define INFINIPATH_RHF_H_IBERR 0x00800000
695#define INFINIPATH_RHF_H_ERR_MASK 0xFF800000
696#define INFINIPATH_RHF_L_USE_EGR 0x80000000
697#define INFINIPATH_RHF_L_SWA 0x00008000
698#define INFINIPATH_RHF_L_SWB 0x00004000
699
700/* infinipath header fields */
701#define INFINIPATH_I_VERS_MASK 0xF
702#define INFINIPATH_I_VERS_SHIFT 28
703#define INFINIPATH_I_PORT_MASK 0xF
704#define INFINIPATH_I_PORT_SHIFT 24
705#define INFINIPATH_I_TID_MASK 0x7FF
706#define INFINIPATH_I_TID_SHIFT 13
707#define INFINIPATH_I_OFFSET_MASK 0x1FFF
708#define INFINIPATH_I_OFFSET_SHIFT 0
709
710/* K_PktFlags bits */
711#define INFINIPATH_KPF_INTR 0x1
712#define INFINIPATH_KPF_SUBPORT_MASK 0x3
713#define INFINIPATH_KPF_SUBPORT_SHIFT 1
714
715#define INFINIPATH_MAX_SUBPORT 4
716
717/* SendPIO per-buffer control */
718#define INFINIPATH_SP_TEST 0x40
719#define INFINIPATH_SP_TESTEBP 0x20
720#define INFINIPATH_SP_TRIGGER_SHIFT 15
721
722/* SendPIOAvail bits */
723#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
724#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
725
726/* infinipath header format */
727struct ipath_header {
728 /*
729 * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
730 * 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
731 * Port 4, TID 11, offset 13.
732 */
733 __le32 ver_port_tid_offset;
734 __le16 chksum;
735 __le16 pkt_flags;
736};
737
738/* infinipath user message header format.
739 * This structure contains the first 4 fields common to all protocols
740 * that employ infinipath.
741 */
742struct ipath_message_header {
743 __be16 lrh[4];
744 __be32 bth[3];
745 /* fields below this point are in host byte order */
746 struct ipath_header iph;
747 __u8 sub_opcode;
748};
749
750/* infinipath ethernet header format */
751struct ether_header {
752 __be16 lrh[4];
753 __be32 bth[3];
754 struct ipath_header iph;
755 __u8 sub_opcode;
756 __u8 cmd;
757 __be16 lid;
758 __u16 mac[3];
759 __u8 frag_num;
760 __u8 seq_num;
761 __le32 len;
762 /* MUST be of word size due to PIO write requirements */
763 __le32 csum;
764 __le16 csum_offset;
765 __le16 flags;
766 __u16 first_2_bytes;
767 __u8 unused[2]; /* currently unused */
768};
769
770
771/* IB - LRH header consts */
772#define IPATH_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
773#define IPATH_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
774
775/* misc. */
776#define SIZE_OF_CRC 1
777
778#define IPATH_DEFAULT_P_KEY 0xFFFF
779#define IPATH_PERMISSIVE_LID 0xFFFF
780#define IPATH_AETH_CREDIT_SHIFT 24
781#define IPATH_AETH_CREDIT_MASK 0x1F
782#define IPATH_AETH_CREDIT_INVAL 0x1F
783#define IPATH_PSN_MASK 0xFFFFFF
784#define IPATH_MSN_MASK 0xFFFFFF
785#define IPATH_QPN_MASK 0xFFFFFF
786#define IPATH_MULTICAST_LID_BASE 0xC000
787#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK
788#define IPATH_MULTICAST_QPN 0xFFFFFF
789
790/* Receive Header Queue: receive type (from infinipath) */
791#define RCVHQ_RCV_TYPE_EXPECTED 0
792#define RCVHQ_RCV_TYPE_EAGER 1
793#define RCVHQ_RCV_TYPE_NON_KD 2
794#define RCVHQ_RCV_TYPE_ERROR 3
795
796
797/* sub OpCodes - ith4x */
798#define IPATH_ITH4X_OPCODE_ENCAP 0x81
799#define IPATH_ITH4X_OPCODE_LID_ARP 0x82
800
801#define IPATH_HEADER_QUEUE_WORDS 9
802
803/* functions for extracting fields from rcvhdrq entries for the driver.
804 */
805static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
806{
807 return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK;
808}
809
810static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
811{
812 return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
813 & INFINIPATH_RHF_RCVTYPE_MASK;
814}
815
816static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf)
817{
818 return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
819 & INFINIPATH_RHF_LENGTH_MASK) << 2;
820}
821
822static inline __u32 ipath_hdrget_index(const __le32 * rbuf)
823{
824 return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
825 & INFINIPATH_RHF_EGRINDEX_MASK;
826}
827
828static inline __u32 ipath_hdrget_seq(const __le32 *rbuf)
829{
830 return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT)
831 & INFINIPATH_RHF_SEQ_MASK;
832}
833
834static inline __u32 ipath_hdrget_offset(const __le32 *rbuf)
835{
836 return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT)
837 & INFINIPATH_RHF_HDRQ_OFFSET_MASK;
838}
839
840static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf)
841{
842 return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR;
843}
844
845static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
846{
847 return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
848 & INFINIPATH_I_VERS_MASK;
849}
850
851#endif /* _IPATH_COMMON_H */
diff --git a/drivers/staging/rdma/ipath/ipath_cq.c b/drivers/staging/rdma/ipath/ipath_cq.c
deleted file mode 100644
index e9dd9112e718..000000000000
--- a/drivers/staging/rdma/ipath/ipath_cq.c
+++ /dev/null
@@ -1,483 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/err.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37
38#include "ipath_verbs.h"
39
40/**
41 * ipath_cq_enter - add a new entry to the completion queue
42 * @cq: completion queue
43 * @entry: work completion entry to add
44 * @sig: true if @entry is a solicitated entry
45 *
46 * This may be called with qp->s_lock held.
47 */
48void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
49{
50 struct ipath_cq_wc *wc;
51 unsigned long flags;
52 u32 head;
53 u32 next;
54
55 spin_lock_irqsave(&cq->lock, flags);
56
57 /*
58 * Note that the head pointer might be writable by user processes.
59 * Take care to verify it is a sane value.
60 */
61 wc = cq->queue;
62 head = wc->head;
63 if (head >= (unsigned) cq->ibcq.cqe) {
64 head = cq->ibcq.cqe;
65 next = 0;
66 } else
67 next = head + 1;
68 if (unlikely(next == wc->tail)) {
69 spin_unlock_irqrestore(&cq->lock, flags);
70 if (cq->ibcq.event_handler) {
71 struct ib_event ev;
72
73 ev.device = cq->ibcq.device;
74 ev.element.cq = &cq->ibcq;
75 ev.event = IB_EVENT_CQ_ERR;
76 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
77 }
78 return;
79 }
80 if (cq->ip) {
81 wc->uqueue[head].wr_id = entry->wr_id;
82 wc->uqueue[head].status = entry->status;
83 wc->uqueue[head].opcode = entry->opcode;
84 wc->uqueue[head].vendor_err = entry->vendor_err;
85 wc->uqueue[head].byte_len = entry->byte_len;
86 wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
87 wc->uqueue[head].qp_num = entry->qp->qp_num;
88 wc->uqueue[head].src_qp = entry->src_qp;
89 wc->uqueue[head].wc_flags = entry->wc_flags;
90 wc->uqueue[head].pkey_index = entry->pkey_index;
91 wc->uqueue[head].slid = entry->slid;
92 wc->uqueue[head].sl = entry->sl;
93 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
94 wc->uqueue[head].port_num = entry->port_num;
95 /* Make sure entry is written before the head index. */
96 smp_wmb();
97 } else
98 wc->kqueue[head] = *entry;
99 wc->head = next;
100
101 if (cq->notify == IB_CQ_NEXT_COMP ||
102 (cq->notify == IB_CQ_SOLICITED && solicited)) {
103 cq->notify = IB_CQ_NONE;
104 cq->triggered++;
105 /*
106 * This will cause send_complete() to be called in
107 * another thread.
108 */
109 tasklet_hi_schedule(&cq->comptask);
110 }
111
112 spin_unlock_irqrestore(&cq->lock, flags);
113
114 if (entry->status != IB_WC_SUCCESS)
115 to_idev(cq->ibcq.device)->n_wqe_errs++;
116}
117
118/**
119 * ipath_poll_cq - poll for work completion entries
120 * @ibcq: the completion queue to poll
121 * @num_entries: the maximum number of entries to return
122 * @entry: pointer to array where work completions are placed
123 *
124 * Returns the number of completion entries polled.
125 *
126 * This may be called from interrupt context. Also called by ib_poll_cq()
127 * in the generic verbs code.
128 */
129int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
130{
131 struct ipath_cq *cq = to_icq(ibcq);
132 struct ipath_cq_wc *wc;
133 unsigned long flags;
134 int npolled;
135 u32 tail;
136
137 /* The kernel can only poll a kernel completion queue */
138 if (cq->ip) {
139 npolled = -EINVAL;
140 goto bail;
141 }
142
143 spin_lock_irqsave(&cq->lock, flags);
144
145 wc = cq->queue;
146 tail = wc->tail;
147 if (tail > (u32) cq->ibcq.cqe)
148 tail = (u32) cq->ibcq.cqe;
149 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
150 if (tail == wc->head)
151 break;
152 /* The kernel doesn't need a RMB since it has the lock. */
153 *entry = wc->kqueue[tail];
154 if (tail >= cq->ibcq.cqe)
155 tail = 0;
156 else
157 tail++;
158 }
159 wc->tail = tail;
160
161 spin_unlock_irqrestore(&cq->lock, flags);
162
163bail:
164 return npolled;
165}
166
167static void send_complete(unsigned long data)
168{
169 struct ipath_cq *cq = (struct ipath_cq *)data;
170
171 /*
172 * The completion handler will most likely rearm the notification
173 * and poll for all pending entries. If a new completion entry
174 * is added while we are in this routine, tasklet_hi_schedule()
175 * won't call us again until we return so we check triggered to
176 * see if we need to call the handler again.
177 */
178 for (;;) {
179 u8 triggered = cq->triggered;
180
181 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
182
183 if (cq->triggered == triggered)
184 return;
185 }
186}
187
188/**
189 * ipath_create_cq - create a completion queue
190 * @ibdev: the device this completion queue is attached to
191 * @attr: creation attributes
192 * @context: unused by the InfiniPath driver
193 * @udata: unused by the InfiniPath driver
194 *
195 * Returns a pointer to the completion queue or negative errno values
196 * for failure.
197 *
198 * Called by ib_create_cq() in the generic verbs code.
199 */
200struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
201 const struct ib_cq_init_attr *attr,
202 struct ib_ucontext *context,
203 struct ib_udata *udata)
204{
205 int entries = attr->cqe;
206 struct ipath_ibdev *dev = to_idev(ibdev);
207 struct ipath_cq *cq;
208 struct ipath_cq_wc *wc;
209 struct ib_cq *ret;
210 u32 sz;
211
212 if (attr->flags)
213 return ERR_PTR(-EINVAL);
214
215 if (entries < 1 || entries > ib_ipath_max_cqes) {
216 ret = ERR_PTR(-EINVAL);
217 goto done;
218 }
219
220 /* Allocate the completion queue structure. */
221 cq = kmalloc(sizeof(*cq), GFP_KERNEL);
222 if (!cq) {
223 ret = ERR_PTR(-ENOMEM);
224 goto done;
225 }
226
227 /*
228 * Allocate the completion queue entries and head/tail pointers.
229 * This is allocated separately so that it can be resized and
230 * also mapped into user space.
231 * We need to use vmalloc() in order to support mmap and large
232 * numbers of entries.
233 */
234 sz = sizeof(*wc);
235 if (udata && udata->outlen >= sizeof(__u64))
236 sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
237 else
238 sz += sizeof(struct ib_wc) * (entries + 1);
239 wc = vmalloc_user(sz);
240 if (!wc) {
241 ret = ERR_PTR(-ENOMEM);
242 goto bail_cq;
243 }
244
245 /*
246 * Return the address of the WC as the offset to mmap.
247 * See ipath_mmap() for details.
248 */
249 if (udata && udata->outlen >= sizeof(__u64)) {
250 int err;
251
252 cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
253 if (!cq->ip) {
254 ret = ERR_PTR(-ENOMEM);
255 goto bail_wc;
256 }
257
258 err = ib_copy_to_udata(udata, &cq->ip->offset,
259 sizeof(cq->ip->offset));
260 if (err) {
261 ret = ERR_PTR(err);
262 goto bail_ip;
263 }
264 } else
265 cq->ip = NULL;
266
267 spin_lock(&dev->n_cqs_lock);
268 if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
269 spin_unlock(&dev->n_cqs_lock);
270 ret = ERR_PTR(-ENOMEM);
271 goto bail_ip;
272 }
273
274 dev->n_cqs_allocated++;
275 spin_unlock(&dev->n_cqs_lock);
276
277 if (cq->ip) {
278 spin_lock_irq(&dev->pending_lock);
279 list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
280 spin_unlock_irq(&dev->pending_lock);
281 }
282
283 /*
284 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
285 * The number of entries should be >= the number requested or return
286 * an error.
287 */
288 cq->ibcq.cqe = entries;
289 cq->notify = IB_CQ_NONE;
290 cq->triggered = 0;
291 spin_lock_init(&cq->lock);
292 tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
293 wc->head = 0;
294 wc->tail = 0;
295 cq->queue = wc;
296
297 ret = &cq->ibcq;
298
299 goto done;
300
301bail_ip:
302 kfree(cq->ip);
303bail_wc:
304 vfree(wc);
305bail_cq:
306 kfree(cq);
307done:
308 return ret;
309}
310
311/**
312 * ipath_destroy_cq - destroy a completion queue
313 * @ibcq: the completion queue to destroy.
314 *
315 * Returns 0 for success.
316 *
317 * Called by ib_destroy_cq() in the generic verbs code.
318 */
319int ipath_destroy_cq(struct ib_cq *ibcq)
320{
321 struct ipath_ibdev *dev = to_idev(ibcq->device);
322 struct ipath_cq *cq = to_icq(ibcq);
323
324 tasklet_kill(&cq->comptask);
325 spin_lock(&dev->n_cqs_lock);
326 dev->n_cqs_allocated--;
327 spin_unlock(&dev->n_cqs_lock);
328 if (cq->ip)
329 kref_put(&cq->ip->ref, ipath_release_mmap_info);
330 else
331 vfree(cq->queue);
332 kfree(cq);
333
334 return 0;
335}
336
337/**
338 * ipath_req_notify_cq - change the notification type for a completion queue
339 * @ibcq: the completion queue
340 * @notify_flags: the type of notification to request
341 *
342 * Returns 0 for success.
343 *
344 * This may be called from interrupt context. Also called by
345 * ib_req_notify_cq() in the generic verbs code.
346 */
347int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
348{
349 struct ipath_cq *cq = to_icq(ibcq);
350 unsigned long flags;
351 int ret = 0;
352
353 spin_lock_irqsave(&cq->lock, flags);
354 /*
355 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
356 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
357 */
358 if (cq->notify != IB_CQ_NEXT_COMP)
359 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
360
361 if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
362 cq->queue->head != cq->queue->tail)
363 ret = 1;
364
365 spin_unlock_irqrestore(&cq->lock, flags);
366
367 return ret;
368}
369
370/**
371 * ipath_resize_cq - change the size of the CQ
372 * @ibcq: the completion queue
373 *
374 * Returns 0 for success.
375 */
376int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
377{
378 struct ipath_cq *cq = to_icq(ibcq);
379 struct ipath_cq_wc *old_wc;
380 struct ipath_cq_wc *wc;
381 u32 head, tail, n;
382 int ret;
383 u32 sz;
384
385 if (cqe < 1 || cqe > ib_ipath_max_cqes) {
386 ret = -EINVAL;
387 goto bail;
388 }
389
390 /*
391 * Need to use vmalloc() if we want to support large #s of entries.
392 */
393 sz = sizeof(*wc);
394 if (udata && udata->outlen >= sizeof(__u64))
395 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
396 else
397 sz += sizeof(struct ib_wc) * (cqe + 1);
398 wc = vmalloc_user(sz);
399 if (!wc) {
400 ret = -ENOMEM;
401 goto bail;
402 }
403
404 /* Check that we can write the offset to mmap. */
405 if (udata && udata->outlen >= sizeof(__u64)) {
406 __u64 offset = 0;
407
408 ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
409 if (ret)
410 goto bail_free;
411 }
412
413 spin_lock_irq(&cq->lock);
414 /*
415 * Make sure head and tail are sane since they
416 * might be user writable.
417 */
418 old_wc = cq->queue;
419 head = old_wc->head;
420 if (head > (u32) cq->ibcq.cqe)
421 head = (u32) cq->ibcq.cqe;
422 tail = old_wc->tail;
423 if (tail > (u32) cq->ibcq.cqe)
424 tail = (u32) cq->ibcq.cqe;
425 if (head < tail)
426 n = cq->ibcq.cqe + 1 + head - tail;
427 else
428 n = head - tail;
429 if (unlikely((u32)cqe < n)) {
430 ret = -EINVAL;
431 goto bail_unlock;
432 }
433 for (n = 0; tail != head; n++) {
434 if (cq->ip)
435 wc->uqueue[n] = old_wc->uqueue[tail];
436 else
437 wc->kqueue[n] = old_wc->kqueue[tail];
438 if (tail == (u32) cq->ibcq.cqe)
439 tail = 0;
440 else
441 tail++;
442 }
443 cq->ibcq.cqe = cqe;
444 wc->head = n;
445 wc->tail = 0;
446 cq->queue = wc;
447 spin_unlock_irq(&cq->lock);
448
449 vfree(old_wc);
450
451 if (cq->ip) {
452 struct ipath_ibdev *dev = to_idev(ibcq->device);
453 struct ipath_mmap_info *ip = cq->ip;
454
455 ipath_update_mmap_info(dev, ip, sz, wc);
456
457 /*
458 * Return the offset to mmap.
459 * See ipath_mmap() for details.
460 */
461 if (udata && udata->outlen >= sizeof(__u64)) {
462 ret = ib_copy_to_udata(udata, &ip->offset,
463 sizeof(ip->offset));
464 if (ret)
465 goto bail;
466 }
467
468 spin_lock_irq(&dev->pending_lock);
469 if (list_empty(&ip->pending_mmaps))
470 list_add(&ip->pending_mmaps, &dev->pending_mmaps);
471 spin_unlock_irq(&dev->pending_lock);
472 }
473
474 ret = 0;
475 goto bail;
476
477bail_unlock:
478 spin_unlock_irq(&cq->lock);
479bail_free:
480 vfree(wc);
481bail:
482 return ret;
483}
diff --git a/drivers/staging/rdma/ipath/ipath_debug.h b/drivers/staging/rdma/ipath/ipath_debug.h
deleted file mode 100644
index 65926cd35759..000000000000
--- a/drivers/staging/rdma/ipath/ipath_debug.h
+++ /dev/null
@@ -1,99 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef _IPATH_DEBUG_H
35#define _IPATH_DEBUG_H
36
37#ifndef _IPATH_DEBUGGING /* debugging enabled or not */
38#define _IPATH_DEBUGGING 1
39#endif
40
41#if _IPATH_DEBUGGING
42
43/*
44 * Mask values for debugging. The scheme allows us to compile out any
45 * of the debug tracing stuff, and if compiled in, to enable or disable
46 * dynamically. This can be set at modprobe time also:
47 * modprobe infinipath.ko infinipath_debug=7
48 */
49
50#define __IPATH_INFO 0x1 /* generic low verbosity stuff */
51#define __IPATH_DBG 0x2 /* generic debug */
52#define __IPATH_TRSAMPLE 0x8 /* generate trace buffer sample entries */
53/* leave some low verbosity spots open */
54#define __IPATH_VERBDBG 0x40 /* very verbose debug */
55#define __IPATH_PKTDBG 0x80 /* print packet data */
56/* print process startup (init)/exit messages */
57#define __IPATH_PROCDBG 0x100
58/* print mmap/fault stuff, not using VDBG any more */
59#define __IPATH_MMDBG 0x200
60#define __IPATH_ERRPKTDBG 0x400
61#define __IPATH_USER_SEND 0x1000 /* use user mode send */
62#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
63#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
64#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */
65#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */
66#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */
67#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */
68#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */
69#define __IPATH_LINKVERBDBG 0x200000 /* very verbose linkchange debug */
70
71#else /* _IPATH_DEBUGGING */
72
73/*
74 * define all of these even with debugging off, for the few places that do
75 * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the
76 * compiler eliminate the code
77 */
78
79#define __IPATH_INFO 0x0 /* generic low verbosity stuff */
80#define __IPATH_DBG 0x0 /* generic debug */
81#define __IPATH_TRSAMPLE 0x0 /* generate trace buffer sample entries */
82#define __IPATH_VERBDBG 0x0 /* very verbose debug */
83#define __IPATH_PKTDBG 0x0 /* print packet data */
84#define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */
85/* print mmap/fault stuff, not using VDBG any more */
86#define __IPATH_MMDBG 0x0
87#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */
88#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */
89#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */
90#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */
91#define __IPATH_IPATHPD 0x0 /* Ethernet (IPATH) packet dump on */
92#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on */
93#define __IPATH_LINKVERBDBG 0x0 /* very verbose linkchange debug */
94
95#endif /* _IPATH_DEBUGGING */
96
97#define __IPATH_VERBOSEDBG __IPATH_VERBDBG
98
99#endif /* _IPATH_DEBUG_H */
diff --git a/drivers/staging/rdma/ipath/ipath_diag.c b/drivers/staging/rdma/ipath/ipath_diag.c
deleted file mode 100644
index 45802e97332e..000000000000
--- a/drivers/staging/rdma/ipath/ipath_diag.c
+++ /dev/null
@@ -1,551 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34/*
35 * This file contains support for diagnostic functions. It is accessed by
36 * opening the ipath_diag device, normally minor number 129. Diagnostic use
37 * of the InfiniPath chip may render the chip or board unusable until the
38 * driver is unloaded, or in some cases, until the system is rebooted.
39 *
40 * Accesses to the chip through this interface are not similar to going
41 * through the /sys/bus/pci resource mmap interface.
42 */
43
44#include <linux/io.h>
45#include <linux/pci.h>
46#include <linux/vmalloc.h>
47#include <linux/fs.h>
48#include <linux/export.h>
49#include <asm/uaccess.h>
50
51#include "ipath_kernel.h"
52#include "ipath_common.h"
53
54int ipath_diag_inuse;
55static int diag_set_link;
56
57static int ipath_diag_open(struct inode *in, struct file *fp);
58static int ipath_diag_release(struct inode *in, struct file *fp);
59static ssize_t ipath_diag_read(struct file *fp, char __user *data,
60 size_t count, loff_t *off);
61static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
62 size_t count, loff_t *off);
63
64static const struct file_operations diag_file_ops = {
65 .owner = THIS_MODULE,
66 .write = ipath_diag_write,
67 .read = ipath_diag_read,
68 .open = ipath_diag_open,
69 .release = ipath_diag_release,
70 .llseek = default_llseek,
71};
72
73static ssize_t ipath_diagpkt_write(struct file *fp,
74 const char __user *data,
75 size_t count, loff_t *off);
76
77static const struct file_operations diagpkt_file_ops = {
78 .owner = THIS_MODULE,
79 .write = ipath_diagpkt_write,
80 .llseek = noop_llseek,
81};
82
83static atomic_t diagpkt_count = ATOMIC_INIT(0);
84static struct cdev *diagpkt_cdev;
85static struct device *diagpkt_dev;
86
87int ipath_diag_add(struct ipath_devdata *dd)
88{
89 char name[16];
90 int ret = 0;
91
92 if (atomic_inc_return(&diagpkt_count) == 1) {
93 ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR,
94 "ipath_diagpkt", &diagpkt_file_ops,
95 &diagpkt_cdev, &diagpkt_dev);
96
97 if (ret) {
98 ipath_dev_err(dd, "Couldn't create ipath_diagpkt "
99 "device: %d", ret);
100 goto done;
101 }
102 }
103
104 snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
105
106 ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
107 &diag_file_ops, &dd->diag_cdev,
108 &dd->diag_dev);
109 if (ret)
110 ipath_dev_err(dd, "Couldn't create %s device: %d",
111 name, ret);
112
113done:
114 return ret;
115}
116
117void ipath_diag_remove(struct ipath_devdata *dd)
118{
119 if (atomic_dec_and_test(&diagpkt_count))
120 ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev);
121
122 ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev);
123}
124
125/**
126 * ipath_read_umem64 - read a 64-bit quantity from the chip into user space
127 * @dd: the infinipath device
128 * @uaddr: the location to store the data in user memory
129 * @caddr: the source chip address (full pointer, not offset)
130 * @count: number of bytes to copy (multiple of 32 bits)
131 *
132 * This function also localizes all chip memory accesses.
133 * The copy should be written such that we read full cacheline packets
134 * from the chip. This is usually used for a single qword
135 *
136 * NOTE: This assumes the chip address is 64-bit aligned.
137 */
138static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
139 const void __iomem *caddr, size_t count)
140{
141 const u64 __iomem *reg_addr = caddr;
142 const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
143 int ret;
144
145 /* not very efficient, but it works for now */
146 if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
147 ret = -EINVAL;
148 goto bail;
149 }
150 while (reg_addr < reg_end) {
151 u64 data = readq(reg_addr);
152 if (copy_to_user(uaddr, &data, sizeof(u64))) {
153 ret = -EFAULT;
154 goto bail;
155 }
156 reg_addr++;
157 uaddr += sizeof(u64);
158 }
159 ret = 0;
160bail:
161 return ret;
162}
163
164/**
165 * ipath_write_umem64 - write a 64-bit quantity to the chip from user space
166 * @dd: the infinipath device
167 * @caddr: the destination chip address (full pointer, not offset)
168 * @uaddr: the source of the data in user memory
169 * @count: the number of bytes to copy (multiple of 32 bits)
170 *
171 * This is usually used for a single qword
172 * NOTE: This assumes the chip address is 64-bit aligned.
173 */
174
175static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
176 const void __user *uaddr, size_t count)
177{
178 u64 __iomem *reg_addr = caddr;
179 const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
180 int ret;
181
182 /* not very efficient, but it works for now */
183 if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
184 ret = -EINVAL;
185 goto bail;
186 }
187 while (reg_addr < reg_end) {
188 u64 data;
189 if (copy_from_user(&data, uaddr, sizeof(data))) {
190 ret = -EFAULT;
191 goto bail;
192 }
193 writeq(data, reg_addr);
194
195 reg_addr++;
196 uaddr += sizeof(u64);
197 }
198 ret = 0;
199bail:
200 return ret;
201}
202
203/**
204 * ipath_read_umem32 - read a 32-bit quantity from the chip into user space
205 * @dd: the infinipath device
206 * @uaddr: the location to store the data in user memory
207 * @caddr: the source chip address (full pointer, not offset)
208 * @count: number of bytes to copy
209 *
210 * read 32 bit values, not 64 bit; for memories that only
211 * support 32 bit reads; usually a single dword.
212 */
213static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
214 const void __iomem *caddr, size_t count)
215{
216 const u32 __iomem *reg_addr = caddr;
217 const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
218 int ret;
219
220 if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
221 reg_end > (u32 __iomem *) dd->ipath_kregend) {
222 ret = -EINVAL;
223 goto bail;
224 }
225 /* not very efficient, but it works for now */
226 while (reg_addr < reg_end) {
227 u32 data = readl(reg_addr);
228 if (copy_to_user(uaddr, &data, sizeof(data))) {
229 ret = -EFAULT;
230 goto bail;
231 }
232
233 reg_addr++;
234 uaddr += sizeof(u32);
235
236 }
237 ret = 0;
238bail:
239 return ret;
240}
241
242/**
243 * ipath_write_umem32 - write a 32-bit quantity to the chip from user space
244 * @dd: the infinipath device
245 * @caddr: the destination chip address (full pointer, not offset)
246 * @uaddr: the source of the data in user memory
247 * @count: number of bytes to copy
248 *
249 * write 32 bit values, not 64 bit; for memories that only
250 * support 32 bit write; usually a single dword.
251 */
252
253static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
254 const void __user *uaddr, size_t count)
255{
256 u32 __iomem *reg_addr = caddr;
257 const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
258 int ret;
259
260 if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
261 reg_end > (u32 __iomem *) dd->ipath_kregend) {
262 ret = -EINVAL;
263 goto bail;
264 }
265 while (reg_addr < reg_end) {
266 u32 data;
267 if (copy_from_user(&data, uaddr, sizeof(data))) {
268 ret = -EFAULT;
269 goto bail;
270 }
271 writel(data, reg_addr);
272
273 reg_addr++;
274 uaddr += sizeof(u32);
275 }
276 ret = 0;
277bail:
278 return ret;
279}
280
281static int ipath_diag_open(struct inode *in, struct file *fp)
282{
283 int unit = iminor(in) - IPATH_DIAG_MINOR_BASE;
284 struct ipath_devdata *dd;
285 int ret;
286
287 mutex_lock(&ipath_mutex);
288
289 if (ipath_diag_inuse) {
290 ret = -EBUSY;
291 goto bail;
292 }
293
294 dd = ipath_lookup(unit);
295
296 if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) ||
297 !dd->ipath_kregbase) {
298 ret = -ENODEV;
299 goto bail;
300 }
301
302 fp->private_data = dd;
303 ipath_diag_inuse = -2;
304 diag_set_link = 0;
305 ret = 0;
306
307 /* Only expose a way to reset the device if we
308 make it into diag mode. */
309 ipath_expose_reset(&dd->pcidev->dev);
310
311bail:
312 mutex_unlock(&ipath_mutex);
313
314 return ret;
315}
316
317/**
318 * ipath_diagpkt_write - write an IB packet
319 * @fp: the diag data device file pointer
320 * @data: ipath_diag_pkt structure saying where to get the packet
321 * @count: size of data to write
322 * @off: unused by this code
323 */
324static ssize_t ipath_diagpkt_write(struct file *fp,
325 const char __user *data,
326 size_t count, loff_t *off)
327{
328 u32 __iomem *piobuf;
329 u32 plen, pbufn, maxlen_reserve;
330 struct ipath_diag_pkt odp;
331 struct ipath_diag_xpkt dp;
332 u32 *tmpbuf = NULL;
333 struct ipath_devdata *dd;
334 ssize_t ret = 0;
335 u64 val;
336 u32 l_state, lt_state; /* LinkState, LinkTrainingState */
337
338
339 if (count == sizeof(dp)) {
340 if (copy_from_user(&dp, data, sizeof(dp))) {
341 ret = -EFAULT;
342 goto bail;
343 }
344 } else if (count == sizeof(odp)) {
345 if (copy_from_user(&odp, data, sizeof(odp))) {
346 ret = -EFAULT;
347 goto bail;
348 }
349 dp.len = odp.len;
350 dp.unit = odp.unit;
351 dp.data = odp.data;
352 dp.pbc_wd = 0;
353 } else {
354 ret = -EINVAL;
355 goto bail;
356 }
357
358 /* send count must be an exact number of dwords */
359 if (dp.len & 3) {
360 ret = -EINVAL;
361 goto bail;
362 }
363
364 plen = dp.len >> 2;
365
366 dd = ipath_lookup(dp.unit);
367 if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
368 !dd->ipath_kregbase) {
369 ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
370 dp.unit);
371 ret = -ENODEV;
372 goto bail;
373 }
374
375 if (ipath_diag_inuse && !diag_set_link &&
376 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
377 diag_set_link = 1;
378 ipath_cdbg(VERBOSE, "Trying to set to set link active for "
379 "diag pkt\n");
380 ipath_set_linkstate(dd, IPATH_IB_LINKARM);
381 ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
382 }
383
384 if (!(dd->ipath_flags & IPATH_INITTED)) {
385 /* no hardware, freeze, etc. */
386 ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
387 ret = -ENODEV;
388 goto bail;
389 }
390 /*
391 * Want to skip check for l_state if using custom PBC,
392 * because we might be trying to force an SM packet out.
393 * first-cut, skip _all_ state checking in that case.
394 */
395 val = ipath_ib_state(dd, dd->ipath_lastibcstat);
396 lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
397 l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
398 if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
399 (val != dd->ib_init && val != dd->ib_arm &&
400 val != dd->ib_active))) {
401 ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
402 dd->ipath_unit, (unsigned long long) val);
403 ret = -EINVAL;
404 goto bail;
405 }
406
407 /*
408 * need total length before first word written, plus 2 Dwords. One Dword
409 * is for padding so we get the full user data when not aligned on
410 * a word boundary. The other Dword is to make sure we have room for the
411 * ICRC which gets tacked on later.
412 */
413 maxlen_reserve = 2 * sizeof(u32);
414 if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
415 ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
416 dp.len, dd->ipath_ibmaxlen);
417 ret = -EINVAL;
418 goto bail;
419 }
420
421 plen = sizeof(u32) + dp.len;
422
423 tmpbuf = vmalloc(plen);
424 if (!tmpbuf) {
425 dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
426 "failing\n");
427 ret = -ENOMEM;
428 goto bail;
429 }
430
431 if (copy_from_user(tmpbuf,
432 (const void __user *) (unsigned long) dp.data,
433 dp.len)) {
434 ret = -EFAULT;
435 goto bail;
436 }
437
438 plen >>= 2; /* in dwords */
439
440 piobuf = ipath_getpiobuf(dd, plen, &pbufn);
441 if (!piobuf) {
442 ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
443 dd->ipath_unit);
444 ret = -EBUSY;
445 goto bail;
446 }
447 /* disarm it just to be extra sure */
448 ipath_disarm_piobufs(dd, pbufn, 1);
449
450 if (ipath_debug & __IPATH_PKTDBG)
451 ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
452 dd->ipath_unit, plen - 1, pbufn);
453
454 if (dp.pbc_wd == 0)
455 dp.pbc_wd = plen;
456 writeq(dp.pbc_wd, piobuf);
457 /*
458 * Copy all by the trigger word, then flush, so it's written
459 * to chip before trigger word, then write trigger word, then
460 * flush again, so packet is sent.
461 */
462 if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
463 ipath_flush_wc();
464 __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
465 ipath_flush_wc();
466 __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
467 } else
468 __iowrite32_copy(piobuf + 2, tmpbuf, plen);
469
470 ipath_flush_wc();
471
472 ret = sizeof(dp);
473
474bail:
475 vfree(tmpbuf);
476 return ret;
477}
478
479static int ipath_diag_release(struct inode *in, struct file *fp)
480{
481 mutex_lock(&ipath_mutex);
482 ipath_diag_inuse = 0;
483 fp->private_data = NULL;
484 mutex_unlock(&ipath_mutex);
485 return 0;
486}
487
488static ssize_t ipath_diag_read(struct file *fp, char __user *data,
489 size_t count, loff_t *off)
490{
491 struct ipath_devdata *dd = fp->private_data;
492 void __iomem *kreg_base;
493 ssize_t ret;
494
495 kreg_base = dd->ipath_kregbase;
496
497 if (count == 0)
498 ret = 0;
499 else if ((count % 4) || (*off % 4))
500 /* address or length is not 32-bit aligned, hence invalid */
501 ret = -EINVAL;
502 else if (ipath_diag_inuse < 1 && (*off || count != 8))
503 ret = -EINVAL; /* prevent cat /dev/ipath_diag* */
504 else if ((count % 8) || (*off % 8))
505 /* address or length not 64-bit aligned; do 32-bit reads */
506 ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
507 else
508 ret = ipath_read_umem64(dd, data, kreg_base + *off, count);
509
510 if (ret >= 0) {
511 *off += count;
512 ret = count;
513 if (ipath_diag_inuse == -2)
514 ipath_diag_inuse++;
515 }
516
517 return ret;
518}
519
520static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
521 size_t count, loff_t *off)
522{
523 struct ipath_devdata *dd = fp->private_data;
524 void __iomem *kreg_base;
525 ssize_t ret;
526
527 kreg_base = dd->ipath_kregbase;
528
529 if (count == 0)
530 ret = 0;
531 else if ((count % 4) || (*off % 4))
532 /* address or length is not 32-bit aligned, hence invalid */
533 ret = -EINVAL;
534 else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
535 ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */
536 ret = -EINVAL; /* before any other write allowed */
537 else if ((count % 8) || (*off % 8))
538 /* address or length not 64-bit aligned; do 32-bit writes */
539 ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
540 else
541 ret = ipath_write_umem64(dd, kreg_base + *off, data, count);
542
543 if (ret >= 0) {
544 *off += count;
545 ret = count;
546 if (ipath_diag_inuse == -1)
547 ipath_diag_inuse = 1; /* all read/write OK now */
548 }
549
550 return ret;
551}
diff --git a/drivers/staging/rdma/ipath/ipath_dma.c b/drivers/staging/rdma/ipath/ipath_dma.c
deleted file mode 100644
index 123a8c053539..000000000000
--- a/drivers/staging/rdma/ipath/ipath_dma.c
+++ /dev/null
@@ -1,179 +0,0 @@
1/*
2 * Copyright (c) 2006 QLogic, Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/scatterlist.h>
34#include <linux/gfp.h>
35#include <rdma/ib_verbs.h>
36
37#include "ipath_verbs.h"
38
39#define BAD_DMA_ADDRESS ((u64) 0)
40
41/*
42 * The following functions implement driver specific replacements
43 * for the ib_dma_*() functions.
44 *
45 * These functions return kernel virtual addresses instead of
46 * device bus addresses since the driver uses the CPU to copy
47 * data instead of using hardware DMA.
48 */
49
50static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr)
51{
52 return dma_addr == BAD_DMA_ADDRESS;
53}
54
55static u64 ipath_dma_map_single(struct ib_device *dev,
56 void *cpu_addr, size_t size,
57 enum dma_data_direction direction)
58{
59 BUG_ON(!valid_dma_direction(direction));
60 return (u64) cpu_addr;
61}
62
63static void ipath_dma_unmap_single(struct ib_device *dev,
64 u64 addr, size_t size,
65 enum dma_data_direction direction)
66{
67 BUG_ON(!valid_dma_direction(direction));
68}
69
70static u64 ipath_dma_map_page(struct ib_device *dev,
71 struct page *page,
72 unsigned long offset,
73 size_t size,
74 enum dma_data_direction direction)
75{
76 u64 addr;
77
78 BUG_ON(!valid_dma_direction(direction));
79
80 if (offset + size > PAGE_SIZE) {
81 addr = BAD_DMA_ADDRESS;
82 goto done;
83 }
84
85 addr = (u64) page_address(page);
86 if (addr)
87 addr += offset;
88 /* TODO: handle highmem pages */
89
90done:
91 return addr;
92}
93
94static void ipath_dma_unmap_page(struct ib_device *dev,
95 u64 addr, size_t size,
96 enum dma_data_direction direction)
97{
98 BUG_ON(!valid_dma_direction(direction));
99}
100
101static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
102 int nents, enum dma_data_direction direction)
103{
104 struct scatterlist *sg;
105 u64 addr;
106 int i;
107 int ret = nents;
108
109 BUG_ON(!valid_dma_direction(direction));
110
111 for_each_sg(sgl, sg, nents, i) {
112 addr = (u64) page_address(sg_page(sg));
113 /* TODO: handle highmem pages */
114 if (!addr) {
115 ret = 0;
116 break;
117 }
118 sg->dma_address = addr + sg->offset;
119#ifdef CONFIG_NEED_SG_DMA_LENGTH
120 sg->dma_length = sg->length;
121#endif
122 }
123 return ret;
124}
125
126static void ipath_unmap_sg(struct ib_device *dev,
127 struct scatterlist *sg, int nents,
128 enum dma_data_direction direction)
129{
130 BUG_ON(!valid_dma_direction(direction));
131}
132
133static void ipath_sync_single_for_cpu(struct ib_device *dev,
134 u64 addr,
135 size_t size,
136 enum dma_data_direction dir)
137{
138}
139
140static void ipath_sync_single_for_device(struct ib_device *dev,
141 u64 addr,
142 size_t size,
143 enum dma_data_direction dir)
144{
145}
146
147static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
148 u64 *dma_handle, gfp_t flag)
149{
150 struct page *p;
151 void *addr = NULL;
152
153 p = alloc_pages(flag, get_order(size));
154 if (p)
155 addr = page_address(p);
156 if (dma_handle)
157 *dma_handle = (u64) addr;
158 return addr;
159}
160
161static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
162 void *cpu_addr, u64 dma_handle)
163{
164 free_pages((unsigned long) cpu_addr, get_order(size));
165}
166
167struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
168 .mapping_error = ipath_mapping_error,
169 .map_single = ipath_dma_map_single,
170 .unmap_single = ipath_dma_unmap_single,
171 .map_page = ipath_dma_map_page,
172 .unmap_page = ipath_dma_unmap_page,
173 .map_sg = ipath_map_sg,
174 .unmap_sg = ipath_unmap_sg,
175 .sync_single_for_cpu = ipath_sync_single_for_cpu,
176 .sync_single_for_device = ipath_sync_single_for_device,
177 .alloc_coherent = ipath_dma_alloc_coherent,
178 .free_coherent = ipath_dma_free_coherent
179};
diff --git a/drivers/staging/rdma/ipath/ipath_driver.c b/drivers/staging/rdma/ipath/ipath_driver.c
deleted file mode 100644
index 2ab22f98e3ba..000000000000
--- a/drivers/staging/rdma/ipath/ipath_driver.c
+++ /dev/null
@@ -1,2784 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35
36#include <linux/spinlock.h>
37#include <linux/idr.h>
38#include <linux/pci.h>
39#include <linux/io.h>
40#include <linux/delay.h>
41#include <linux/netdevice.h>
42#include <linux/vmalloc.h>
43#include <linux/bitmap.h>
44#include <linux/slab.h>
45#include <linux/module.h>
46#ifdef CONFIG_X86_64
47#include <asm/pat.h>
48#endif
49
50#include "ipath_kernel.h"
51#include "ipath_verbs.h"
52
53static void ipath_update_pio_bufs(struct ipath_devdata *);
54
55const char *ipath_get_unit_name(int unit)
56{
57 static char iname[16];
58 snprintf(iname, sizeof iname, "infinipath%u", unit);
59 return iname;
60}
61
62#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
63#define PFX IPATH_DRV_NAME ": "
64
65/*
66 * The size has to be longer than this string, so we can append
67 * board/chip information to it in the init code.
68 */
69const char ib_ipath_version[] = IPATH_IDSTR "\n";
70
71static struct idr unit_table;
72DEFINE_SPINLOCK(ipath_devs_lock);
73LIST_HEAD(ipath_dev_list);
74
75wait_queue_head_t ipath_state_wait;
76
77unsigned ipath_debug = __IPATH_INFO;
78
79module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
80MODULE_PARM_DESC(debug, "mask for debug prints");
81EXPORT_SYMBOL_GPL(ipath_debug);
82
83unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
84module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
85MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
86
87static unsigned ipath_hol_timeout_ms = 13000;
88module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
89MODULE_PARM_DESC(hol_timeout_ms,
90 "duration of user app suspension after link failure");
91
92unsigned ipath_linkrecovery = 1;
93module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
94MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
95
96MODULE_LICENSE("GPL");
97MODULE_AUTHOR("QLogic <support@qlogic.com>");
98MODULE_DESCRIPTION("QLogic InfiniPath driver");
99
100/*
101 * Table to translate the LINKTRAININGSTATE portion of
102 * IBCStatus to a human-readable form.
103 */
104const char *ipath_ibcstatus_str[] = {
105 "Disabled",
106 "LinkUp",
107 "PollActive",
108 "PollQuiet",
109 "SleepDelay",
110 "SleepQuiet",
111 "LState6", /* unused */
112 "LState7", /* unused */
113 "CfgDebounce",
114 "CfgRcvfCfg",
115 "CfgWaitRmt",
116 "CfgIdle",
117 "RecovRetrain",
118 "CfgTxRevLane", /* unused before IBA7220 */
119 "RecovWaitRmt",
120 "RecovIdle",
121 /* below were added for IBA7220 */
122 "CfgEnhanced",
123 "CfgTest",
124 "CfgWaitRmtTest",
125 "CfgWaitCfgEnhanced",
126 "SendTS_T",
127 "SendTstIdles",
128 "RcvTS_T",
129 "SendTst_TS1s",
130 "LTState18", "LTState19", "LTState1A", "LTState1B",
131 "LTState1C", "LTState1D", "LTState1E", "LTState1F"
132};
133
134static void ipath_remove_one(struct pci_dev *);
135static int ipath_init_one(struct pci_dev *, const struct pci_device_id *);
136
137/* Only needed for registration, nothing else needs this info */
138#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
139#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
140
141/* Number of seconds before our card status check... */
142#define STATUS_TIMEOUT 60
143
144static const struct pci_device_id ipath_pci_tbl[] = {
145 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
146 { 0, }
147};
148
149MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
150
151static struct pci_driver ipath_driver = {
152 .name = IPATH_DRV_NAME,
153 .probe = ipath_init_one,
154 .remove = ipath_remove_one,
155 .id_table = ipath_pci_tbl,
156 .driver = {
157 .groups = ipath_driver_attr_groups,
158 },
159};
160
161static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
162 u32 *bar0, u32 *bar1)
163{
164 int ret;
165
166 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
167 if (ret)
168 ipath_dev_err(dd, "failed to read bar0 before enable: "
169 "error %d\n", -ret);
170
171 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
172 if (ret)
173 ipath_dev_err(dd, "failed to read bar1 before enable: "
174 "error %d\n", -ret);
175
176 ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
177}
178
179static void ipath_free_devdata(struct pci_dev *pdev,
180 struct ipath_devdata *dd)
181{
182 unsigned long flags;
183
184 pci_set_drvdata(pdev, NULL);
185
186 if (dd->ipath_unit != -1) {
187 spin_lock_irqsave(&ipath_devs_lock, flags);
188 idr_remove(&unit_table, dd->ipath_unit);
189 list_del(&dd->ipath_list);
190 spin_unlock_irqrestore(&ipath_devs_lock, flags);
191 }
192 vfree(dd);
193}
194
195static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
196{
197 unsigned long flags;
198 struct ipath_devdata *dd;
199 int ret;
200
201 dd = vzalloc(sizeof(*dd));
202 if (!dd) {
203 dd = ERR_PTR(-ENOMEM);
204 goto bail;
205 }
206 dd->ipath_unit = -1;
207
208 idr_preload(GFP_KERNEL);
209 spin_lock_irqsave(&ipath_devs_lock, flags);
210
211 ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT);
212 if (ret < 0) {
213 printk(KERN_ERR IPATH_DRV_NAME
214 ": Could not allocate unit ID: error %d\n", -ret);
215 ipath_free_devdata(pdev, dd);
216 dd = ERR_PTR(ret);
217 goto bail_unlock;
218 }
219 dd->ipath_unit = ret;
220
221 dd->pcidev = pdev;
222 pci_set_drvdata(pdev, dd);
223
224 list_add(&dd->ipath_list, &ipath_dev_list);
225
226bail_unlock:
227 spin_unlock_irqrestore(&ipath_devs_lock, flags);
228 idr_preload_end();
229bail:
230 return dd;
231}
232
233static inline struct ipath_devdata *__ipath_lookup(int unit)
234{
235 return idr_find(&unit_table, unit);
236}
237
238struct ipath_devdata *ipath_lookup(int unit)
239{
240 struct ipath_devdata *dd;
241 unsigned long flags;
242
243 spin_lock_irqsave(&ipath_devs_lock, flags);
244 dd = __ipath_lookup(unit);
245 spin_unlock_irqrestore(&ipath_devs_lock, flags);
246
247 return dd;
248}
249
250int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
251{
252 int nunits, npresent, nup;
253 struct ipath_devdata *dd;
254 unsigned long flags;
255 int maxports;
256
257 nunits = npresent = nup = maxports = 0;
258
259 spin_lock_irqsave(&ipath_devs_lock, flags);
260
261 list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
262 nunits++;
263 if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
264 npresent++;
265 if (dd->ipath_lid &&
266 !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
267 | IPATH_LINKUNK)))
268 nup++;
269 if (dd->ipath_cfgports > maxports)
270 maxports = dd->ipath_cfgports;
271 }
272
273 spin_unlock_irqrestore(&ipath_devs_lock, flags);
274
275 if (npresentp)
276 *npresentp = npresent;
277 if (nupp)
278 *nupp = nup;
279 if (maxportsp)
280 *maxportsp = maxports;
281
282 return nunits;
283}
284
285/*
286 * These next two routines are placeholders in case we don't have per-arch
287 * code for controlling write combining. If explicit control of write
288 * combining is not available, performance will probably be awful.
289 */
290
291int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
292{
293 return -EOPNOTSUPP;
294}
295
296void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
297{
298}
299
300/*
301 * Perform a PIO buffer bandwidth write test, to verify proper system
302 * configuration. Even when all the setup calls work, occasionally
303 * BIOS or other issues can prevent write combining from working, or
304 * can cause other bandwidth problems to the chip.
305 *
306 * This test simply writes the same buffer over and over again, and
307 * measures close to the peak bandwidth to the chip (not testing
308 * data bandwidth to the wire). On chips that use an address-based
309 * trigger to send packets to the wire, this is easy. On chips that
310 * use a count to trigger, we want to make sure that the packet doesn't
311 * go out on the wire, or trigger flow control checks.
312 */
313static void ipath_verify_pioperf(struct ipath_devdata *dd)
314{
315 u32 pbnum, cnt, lcnt;
316 u32 __iomem *piobuf;
317 u32 *addr;
318 u64 msecs, emsecs;
319
320 piobuf = ipath_getpiobuf(dd, 0, &pbnum);
321 if (!piobuf) {
322 dev_info(&dd->pcidev->dev,
323 "No PIObufs for checking perf, skipping\n");
324 return;
325 }
326
327 /*
328 * Enough to give us a reasonable test, less than piobuf size, and
329 * likely multiple of store buffer length.
330 */
331 cnt = 1024;
332
333 addr = vmalloc(cnt);
334 if (!addr) {
335 dev_info(&dd->pcidev->dev,
336 "Couldn't get memory for checking PIO perf,"
337 " skipping\n");
338 goto done;
339 }
340
341 preempt_disable(); /* we want reasonably accurate elapsed time */
342 msecs = 1 + jiffies_to_msecs(jiffies);
343 for (lcnt = 0; lcnt < 10000U; lcnt++) {
344 /* wait until we cross msec boundary */
345 if (jiffies_to_msecs(jiffies) >= msecs)
346 break;
347 udelay(1);
348 }
349
350 ipath_disable_armlaunch(dd);
351
352 /*
353 * length 0, no dwords actually sent, and mark as VL15
354 * on chips where that may matter (due to IB flowcontrol)
355 */
356 if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))
357 writeq(1UL << 63, piobuf);
358 else
359 writeq(0, piobuf);
360 ipath_flush_wc();
361
362 /*
363 * this is only roughly accurate, since even with preempt we
364 * still take interrupts that could take a while. Running for
365 * >= 5 msec seems to get us "close enough" to accurate values
366 */
367 msecs = jiffies_to_msecs(jiffies);
368 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
369 __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
370 emsecs = jiffies_to_msecs(jiffies) - msecs;
371 }
372
373 /* 1 GiB/sec, slightly over IB SDR line rate */
374 if (lcnt < (emsecs * 1024U))
375 ipath_dev_err(dd,
376 "Performance problem: bandwidth to PIO buffers is "
377 "only %u MiB/sec\n",
378 lcnt / (u32) emsecs);
379 else
380 ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
381 lcnt / (u32) emsecs);
382
383 preempt_enable();
384
385 vfree(addr);
386
387done:
388 /* disarm piobuf, so it's available again */
389 ipath_disarm_piobufs(dd, pbnum, 1);
390 ipath_enable_armlaunch(dd);
391}
392
393static void cleanup_device(struct ipath_devdata *dd);
394
395static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
396{
397 int ret, len, j;
398 struct ipath_devdata *dd;
399 unsigned long long addr;
400 u32 bar0 = 0, bar1 = 0;
401
402#ifdef CONFIG_X86_64
403 if (pat_enabled()) {
404 pr_warn("ipath needs PAT disabled, boot with nopat kernel parameter\n");
405 ret = -ENODEV;
406 goto bail;
407 }
408#endif
409
410 dd = ipath_alloc_devdata(pdev);
411 if (IS_ERR(dd)) {
412 ret = PTR_ERR(dd);
413 printk(KERN_ERR IPATH_DRV_NAME
414 ": Could not allocate devdata: error %d\n", -ret);
415 goto bail;
416 }
417
418 ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
419
420 ret = pci_enable_device(pdev);
421 if (ret) {
422 /* This can happen iff:
423 *
424 * We did a chip reset, and then failed to reprogram the
425 * BAR, or the chip reset due to an internal error. We then
426 * unloaded the driver and reloaded it.
427 *
428 * Both reset cases set the BAR back to initial state. For
429 * the latter case, the AER sticky error bit at offset 0x718
430 * should be set, but the Linux kernel doesn't yet know
431 * about that, it appears. If the original BAR was retained
432 * in the kernel data structures, this may be OK.
433 */
434 ipath_dev_err(dd, "enable unit %d failed: error %d\n",
435 dd->ipath_unit, -ret);
436 goto bail_devdata;
437 }
438 addr = pci_resource_start(pdev, 0);
439 len = pci_resource_len(pdev, 0);
440 ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "
441 "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
442 ent->device, ent->driver_data);
443
444 read_bars(dd, pdev, &bar0, &bar1);
445
446 if (!bar1 && !(bar0 & ~0xf)) {
447 if (addr) {
448 dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
449 "rewriting as %llx\n", addr);
450 ret = pci_write_config_dword(
451 pdev, PCI_BASE_ADDRESS_0, addr);
452 if (ret) {
453 ipath_dev_err(dd, "rewrite of BAR0 "
454 "failed: err %d\n", -ret);
455 goto bail_disable;
456 }
457 ret = pci_write_config_dword(
458 pdev, PCI_BASE_ADDRESS_1, addr >> 32);
459 if (ret) {
460 ipath_dev_err(dd, "rewrite of BAR1 "
461 "failed: err %d\n", -ret);
462 goto bail_disable;
463 }
464 } else {
465 ipath_dev_err(dd, "BAR is 0 (probable RESET), "
466 "not usable until reboot\n");
467 ret = -ENODEV;
468 goto bail_disable;
469 }
470 }
471
472 ret = pci_request_regions(pdev, IPATH_DRV_NAME);
473 if (ret) {
474 dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
475 "err %d\n", dd->ipath_unit, -ret);
476 goto bail_disable;
477 }
478
479 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
480 if (ret) {
481 /*
482 * if the 64 bit setup fails, try 32 bit. Some systems
483 * do not setup 64 bit maps on systems with 2GB or less
484 * memory installed.
485 */
486 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
487 if (ret) {
488 dev_info(&pdev->dev,
489 "Unable to set DMA mask for unit %u: %d\n",
490 dd->ipath_unit, ret);
491 goto bail_regions;
492 } else {
493 ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
494 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
495 if (ret)
496 dev_info(&pdev->dev,
497 "Unable to set DMA consistent mask "
498 "for unit %u: %d\n",
499 dd->ipath_unit, ret);
500
501 }
502 } else {
503 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
504 if (ret)
505 dev_info(&pdev->dev,
506 "Unable to set DMA consistent mask "
507 "for unit %u: %d\n",
508 dd->ipath_unit, ret);
509 }
510
511 pci_set_master(pdev);
512
513 /*
514 * Save BARs to rewrite after device reset. Save all 64 bits of
515 * BAR, just in case.
516 */
517 dd->ipath_pcibar0 = addr;
518 dd->ipath_pcibar1 = addr >> 32;
519 dd->ipath_deviceid = ent->device; /* save for later use */
520 dd->ipath_vendorid = ent->vendor;
521
522 /* setup the chip-specific functions, as early as possible. */
523 switch (ent->device) {
524 case PCI_DEVICE_ID_INFINIPATH_HT:
525 ipath_init_iba6110_funcs(dd);
526 break;
527
528 default:
529 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
530 "failing\n", ent->device);
531 return -ENODEV;
532 }
533
534 for (j = 0; j < 6; j++) {
535 if (!pdev->resource[j].start)
536 continue;
537 ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",
538 j, &pdev->resource[j],
539 (unsigned long long)pci_resource_len(pdev, j));
540 }
541
542 if (!addr) {
543 ipath_dev_err(dd, "No valid address in BAR 0!\n");
544 ret = -ENODEV;
545 goto bail_regions;
546 }
547
548 dd->ipath_pcirev = pdev->revision;
549
550#if defined(__powerpc__)
551 /* There isn't a generic way to specify writethrough mappings */
552 dd->ipath_kregbase = __ioremap(addr, len,
553 (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
554#else
555 /* XXX: split this properly to enable on PAT */
556 dd->ipath_kregbase = ioremap_nocache(addr, len);
557#endif
558
559 if (!dd->ipath_kregbase) {
560 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
561 addr);
562 ret = -ENOMEM;
563 goto bail_iounmap;
564 }
565 dd->ipath_kregend = (u64 __iomem *)
566 ((void __iomem *)dd->ipath_kregbase + len);
567 dd->ipath_physaddr = addr; /* used for io_remap, etc. */
568 /* for user mmap */
569 ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
570 addr, dd->ipath_kregbase);
571
572 if (dd->ipath_f_bus(dd, pdev))
573 ipath_dev_err(dd, "Failed to setup config space; "
574 "continuing anyway\n");
575
576 /*
577 * set up our interrupt handler; IRQF_SHARED probably not needed,
578 * since MSI interrupts shouldn't be shared but won't hurt for now.
579 * check 0 irq after we return from chip-specific bus setup, since
580 * that can affect this due to setup
581 */
582 if (!dd->ipath_irq)
583 ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
584 "work\n");
585 else {
586 ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
587 IPATH_DRV_NAME, dd);
588 if (ret) {
589 ipath_dev_err(dd, "Couldn't setup irq handler, "
590 "irq=%d: %d\n", dd->ipath_irq, ret);
591 goto bail_iounmap;
592 }
593 }
594
595 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
596 if (ret)
597 goto bail_irqsetup;
598
599 ret = ipath_enable_wc(dd);
600
601 if (ret)
602 ret = 0;
603
604 ipath_verify_pioperf(dd);
605
606 ipath_device_create_group(&pdev->dev, dd);
607 ipathfs_add_device(dd);
608 ipath_user_add(dd);
609 ipath_diag_add(dd);
610 ipath_register_ib_device(dd);
611
612 goto bail;
613
614bail_irqsetup:
615 cleanup_device(dd);
616
617 if (dd->ipath_irq)
618 dd->ipath_f_free_irq(dd);
619
620 if (dd->ipath_f_cleanup)
621 dd->ipath_f_cleanup(dd);
622
623bail_iounmap:
624 iounmap((volatile void __iomem *) dd->ipath_kregbase);
625
626bail_regions:
627 pci_release_regions(pdev);
628
629bail_disable:
630 pci_disable_device(pdev);
631
632bail_devdata:
633 ipath_free_devdata(pdev, dd);
634
635bail:
636 return ret;
637}
638
639static void cleanup_device(struct ipath_devdata *dd)
640{
641 int port;
642 struct ipath_portdata **tmp;
643 unsigned long flags;
644
645 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
646 /* can't do anything more with chip; needs re-init */
647 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
648 if (dd->ipath_kregbase) {
649 /*
650 * if we haven't already cleaned up before these are
651 * to ensure any register reads/writes "fail" until
652 * re-init
653 */
654 dd->ipath_kregbase = NULL;
655 dd->ipath_uregbase = 0;
656 dd->ipath_sregbase = 0;
657 dd->ipath_cregbase = 0;
658 dd->ipath_kregsize = 0;
659 }
660 ipath_disable_wc(dd);
661 }
662
663 if (dd->ipath_spectriggerhit)
664 dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",
665 dd->ipath_spectriggerhit);
666
667 if (dd->ipath_pioavailregs_dma) {
668 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
669 (void *) dd->ipath_pioavailregs_dma,
670 dd->ipath_pioavailregs_phys);
671 dd->ipath_pioavailregs_dma = NULL;
672 }
673 if (dd->ipath_dummy_hdrq) {
674 dma_free_coherent(&dd->pcidev->dev,
675 dd->ipath_pd[0]->port_rcvhdrq_size,
676 dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
677 dd->ipath_dummy_hdrq = NULL;
678 }
679
680 if (dd->ipath_pageshadow) {
681 struct page **tmpp = dd->ipath_pageshadow;
682 dma_addr_t *tmpd = dd->ipath_physshadow;
683 int i, cnt = 0;
684
685 ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
686 "locked\n");
687 for (port = 0; port < dd->ipath_cfgports; port++) {
688 int port_tidbase = port * dd->ipath_rcvtidcnt;
689 int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
690 for (i = port_tidbase; i < maxtid; i++) {
691 if (!tmpp[i])
692 continue;
693 pci_unmap_page(dd->pcidev, tmpd[i],
694 PAGE_SIZE, PCI_DMA_FROMDEVICE);
695 ipath_release_user_pages(&tmpp[i], 1);
696 tmpp[i] = NULL;
697 cnt++;
698 }
699 }
700 if (cnt) {
701 ipath_stats.sps_pageunlocks += cnt;
702 ipath_cdbg(VERBOSE, "There were still %u expTID "
703 "entries locked\n", cnt);
704 }
705 if (ipath_stats.sps_pagelocks ||
706 ipath_stats.sps_pageunlocks)
707 ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
708 "unlocked via ipath_m{un}lock\n",
709 (unsigned long long)
710 ipath_stats.sps_pagelocks,
711 (unsigned long long)
712 ipath_stats.sps_pageunlocks);
713
714 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
715 dd->ipath_pageshadow);
716 tmpp = dd->ipath_pageshadow;
717 dd->ipath_pageshadow = NULL;
718 vfree(tmpp);
719
720 dd->ipath_egrtidbase = NULL;
721 }
722
723 /*
724 * free any resources still in use (usually just kernel ports)
725 * at unload; we do for portcnt, because that's what we allocate.
726 * We acquire lock to be really paranoid that ipath_pd isn't being
727 * accessed from some interrupt-related code (that should not happen,
728 * but best to be sure).
729 */
730 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
731 tmp = dd->ipath_pd;
732 dd->ipath_pd = NULL;
733 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
734 for (port = 0; port < dd->ipath_portcnt; port++) {
735 struct ipath_portdata *pd = tmp[port];
736 tmp[port] = NULL; /* debugging paranoia */
737 ipath_free_pddata(dd, pd);
738 }
739 kfree(tmp);
740}
741
742static void ipath_remove_one(struct pci_dev *pdev)
743{
744 struct ipath_devdata *dd = pci_get_drvdata(pdev);
745
746 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
747
748 /*
749 * disable the IB link early, to be sure no new packets arrive, which
750 * complicates the shutdown process
751 */
752 ipath_shutdown_device(dd);
753
754 flush_workqueue(ib_wq);
755
756 if (dd->verbs_dev)
757 ipath_unregister_ib_device(dd->verbs_dev);
758
759 ipath_diag_remove(dd);
760 ipath_user_remove(dd);
761 ipathfs_remove_device(dd);
762 ipath_device_remove_group(&pdev->dev, dd);
763
764 ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
765 "unit %u\n", dd, (u32) dd->ipath_unit);
766
767 cleanup_device(dd);
768
769 /*
770 * turn off rcv, send, and interrupts for all ports, all drivers
771 * should also hard reset the chip here?
772 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
773 * for all versions of the driver, if they were allocated
774 */
775 if (dd->ipath_irq) {
776 ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
777 dd->ipath_unit, dd->ipath_irq);
778 dd->ipath_f_free_irq(dd);
779 } else
780 ipath_dbg("irq is 0, not doing free_irq "
781 "for unit %u\n", dd->ipath_unit);
782 /*
783 * we check for NULL here, because it's outside
784 * the kregbase check, and we need to call it
785 * after the free_irq. Thus it's possible that
786 * the function pointers were never initialized.
787 */
788 if (dd->ipath_f_cleanup)
789 /* clean up chip-specific stuff */
790 dd->ipath_f_cleanup(dd);
791
792 ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
793 iounmap((volatile void __iomem *) dd->ipath_kregbase);
794 pci_release_regions(pdev);
795 ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
796 pci_disable_device(pdev);
797
798 ipath_free_devdata(pdev, dd);
799}
800
801/* general driver use */
802DEFINE_MUTEX(ipath_mutex);
803
804static DEFINE_SPINLOCK(ipath_pioavail_lock);
805
806/**
807 * ipath_disarm_piobufs - cancel a range of PIO buffers
808 * @dd: the infinipath device
809 * @first: the first PIO buffer to cancel
810 * @cnt: the number of PIO buffers to cancel
811 *
812 * cancel a range of PIO buffers, used when they might be armed, but
813 * not triggered. Used at init to ensure buffer state, and also user
814 * process close, in case it died while writing to a PIO buffer
815 * Also after errors.
816 */
817void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
818 unsigned cnt)
819{
820 unsigned i, last = first + cnt;
821 unsigned long flags;
822
823 ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
824 for (i = first; i < last; i++) {
825 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
826 /*
827 * The disarm-related bits are write-only, so it
828 * is ok to OR them in with our copy of sendctrl
829 * while we hold the lock.
830 */
831 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
832 dd->ipath_sendctrl | INFINIPATH_S_DISARM |
833 (i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
834 /* can't disarm bufs back-to-back per iba7220 spec */
835 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
836 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
837 }
838 /* on some older chips, update may not happen after cancel */
839 ipath_force_pio_avail_update(dd);
840}
841
842/**
843 * ipath_wait_linkstate - wait for an IB link state change to occur
844 * @dd: the infinipath device
845 * @state: the state to wait for
846 * @msecs: the number of milliseconds to wait
847 *
848 * wait up to msecs milliseconds for IB link state change to occur for
849 * now, take the easy polling route. Currently used only by
850 * ipath_set_linkstate. Returns 0 if state reached, otherwise
851 * -ETIMEDOUT state can have multiple states set, for any of several
852 * transitions.
853 */
854int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
855{
856 dd->ipath_state_wanted = state;
857 wait_event_interruptible_timeout(ipath_state_wait,
858 (dd->ipath_flags & state),
859 msecs_to_jiffies(msecs));
860 dd->ipath_state_wanted = 0;
861
862 if (!(dd->ipath_flags & state)) {
863 u64 val;
864 ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
865 " ms\n",
866 /* test INIT ahead of DOWN, both can be set */
867 (state & IPATH_LINKINIT) ? "INIT" :
868 ((state & IPATH_LINKDOWN) ? "DOWN" :
869 ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
870 msecs);
871 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
872 ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
873 (unsigned long long) ipath_read_kreg64(
874 dd, dd->ipath_kregs->kr_ibcctrl),
875 (unsigned long long) val,
876 ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);
877 }
878 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
879}
880
881static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
882 char *buf, size_t blen)
883{
884 static const struct {
885 ipath_err_t err;
886 const char *msg;
887 } errs[] = {
888 { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
889 { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
890 { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
891 { INFINIPATH_E_SDMABASE, "SDmaBase" },
892 { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
893 { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
894 { INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
895 { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
896 { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
897 { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
898 { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
899 { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
900 };
901 int i;
902 int expected;
903 size_t bidx = 0;
904
905 for (i = 0; i < ARRAY_SIZE(errs); i++) {
906 expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
907 test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
908 if ((err & errs[i].err) && !expected)
909 bidx += snprintf(buf + bidx, blen - bidx,
910 "%s ", errs[i].msg);
911 }
912}
913
914/*
915 * Decode the error status into strings, deciding whether to always
916 * print * it or not depending on "normal packet errors" vs everything
917 * else. Return 1 if "real" errors, otherwise 0 if only packet
918 * errors, so caller can decide what to print with the string.
919 */
920int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
921 ipath_err_t err)
922{
923 int iserr = 1;
924 *buf = '\0';
925 if (err & INFINIPATH_E_PKTERRS) {
926 if (!(err & ~INFINIPATH_E_PKTERRS))
927 iserr = 0; // if only packet errors.
928 if (ipath_debug & __IPATH_ERRPKTDBG) {
929 if (err & INFINIPATH_E_REBP)
930 strlcat(buf, "EBP ", blen);
931 if (err & INFINIPATH_E_RVCRC)
932 strlcat(buf, "VCRC ", blen);
933 if (err & INFINIPATH_E_RICRC) {
934 strlcat(buf, "CRC ", blen);
935 // clear for check below, so only once
936 err &= INFINIPATH_E_RICRC;
937 }
938 if (err & INFINIPATH_E_RSHORTPKTLEN)
939 strlcat(buf, "rshortpktlen ", blen);
940 if (err & INFINIPATH_E_SDROPPEDDATAPKT)
941 strlcat(buf, "sdroppeddatapkt ", blen);
942 if (err & INFINIPATH_E_SPKTLEN)
943 strlcat(buf, "spktlen ", blen);
944 }
945 if ((err & INFINIPATH_E_RICRC) &&
946 !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
947 strlcat(buf, "CRC ", blen);
948 if (!iserr)
949 goto done;
950 }
951 if (err & INFINIPATH_E_RHDRLEN)
952 strlcat(buf, "rhdrlen ", blen);
953 if (err & INFINIPATH_E_RBADTID)
954 strlcat(buf, "rbadtid ", blen);
955 if (err & INFINIPATH_E_RBADVERSION)
956 strlcat(buf, "rbadversion ", blen);
957 if (err & INFINIPATH_E_RHDR)
958 strlcat(buf, "rhdr ", blen);
959 if (err & INFINIPATH_E_SENDSPECIALTRIGGER)
960 strlcat(buf, "sendspecialtrigger ", blen);
961 if (err & INFINIPATH_E_RLONGPKTLEN)
962 strlcat(buf, "rlongpktlen ", blen);
963 if (err & INFINIPATH_E_RMAXPKTLEN)
964 strlcat(buf, "rmaxpktlen ", blen);
965 if (err & INFINIPATH_E_RMINPKTLEN)
966 strlcat(buf, "rminpktlen ", blen);
967 if (err & INFINIPATH_E_SMINPKTLEN)
968 strlcat(buf, "sminpktlen ", blen);
969 if (err & INFINIPATH_E_RFORMATERR)
970 strlcat(buf, "rformaterr ", blen);
971 if (err & INFINIPATH_E_RUNSUPVL)
972 strlcat(buf, "runsupvl ", blen);
973 if (err & INFINIPATH_E_RUNEXPCHAR)
974 strlcat(buf, "runexpchar ", blen);
975 if (err & INFINIPATH_E_RIBFLOW)
976 strlcat(buf, "ribflow ", blen);
977 if (err & INFINIPATH_E_SUNDERRUN)
978 strlcat(buf, "sunderrun ", blen);
979 if (err & INFINIPATH_E_SPIOARMLAUNCH)
980 strlcat(buf, "spioarmlaunch ", blen);
981 if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
982 strlcat(buf, "sunexperrpktnum ", blen);
983 if (err & INFINIPATH_E_SDROPPEDSMPPKT)
984 strlcat(buf, "sdroppedsmppkt ", blen);
985 if (err & INFINIPATH_E_SMAXPKTLEN)
986 strlcat(buf, "smaxpktlen ", blen);
987 if (err & INFINIPATH_E_SUNSUPVL)
988 strlcat(buf, "sunsupVL ", blen);
989 if (err & INFINIPATH_E_INVALIDADDR)
990 strlcat(buf, "invalidaddr ", blen);
991 if (err & INFINIPATH_E_RRCVEGRFULL)
992 strlcat(buf, "rcvegrfull ", blen);
993 if (err & INFINIPATH_E_RRCVHDRFULL)
994 strlcat(buf, "rcvhdrfull ", blen);
995 if (err & INFINIPATH_E_IBSTATUSCHANGED)
996 strlcat(buf, "ibcstatuschg ", blen);
997 if (err & INFINIPATH_E_RIBLOSTLINK)
998 strlcat(buf, "riblostlink ", blen);
999 if (err & INFINIPATH_E_HARDWARE)
1000 strlcat(buf, "hardware ", blen);
1001 if (err & INFINIPATH_E_RESET)
1002 strlcat(buf, "reset ", blen);
1003 if (err & INFINIPATH_E_SDMAERRS)
1004 decode_sdma_errs(dd, err, buf, blen);
1005 if (err & INFINIPATH_E_INVALIDEEPCMD)
1006 strlcat(buf, "invalideepromcmd ", blen);
1007done:
1008 return iserr;
1009}
1010
1011/**
1012 * get_rhf_errstring - decode RHF errors
1013 * @err: the err number
1014 * @msg: the output buffer
1015 * @len: the length of the output buffer
1016 *
1017 * only used one place now, may want more later
1018 */
1019static void get_rhf_errstring(u32 err, char *msg, size_t len)
1020{
1021 /* if no errors, and so don't need to check what's first */
1022 *msg = '\0';
1023
1024 if (err & INFINIPATH_RHF_H_ICRCERR)
1025 strlcat(msg, "icrcerr ", len);
1026 if (err & INFINIPATH_RHF_H_VCRCERR)
1027 strlcat(msg, "vcrcerr ", len);
1028 if (err & INFINIPATH_RHF_H_PARITYERR)
1029 strlcat(msg, "parityerr ", len);
1030 if (err & INFINIPATH_RHF_H_LENERR)
1031 strlcat(msg, "lenerr ", len);
1032 if (err & INFINIPATH_RHF_H_MTUERR)
1033 strlcat(msg, "mtuerr ", len);
1034 if (err & INFINIPATH_RHF_H_IHDRERR)
1035 /* infinipath hdr checksum error */
1036 strlcat(msg, "ipathhdrerr ", len);
1037 if (err & INFINIPATH_RHF_H_TIDERR)
1038 strlcat(msg, "tiderr ", len);
1039 if (err & INFINIPATH_RHF_H_MKERR)
1040 /* bad port, offset, etc. */
1041 strlcat(msg, "invalid ipathhdr ", len);
1042 if (err & INFINIPATH_RHF_H_IBERR)
1043 strlcat(msg, "iberr ", len);
1044 if (err & INFINIPATH_RHF_L_SWA)
1045 strlcat(msg, "swA ", len);
1046 if (err & INFINIPATH_RHF_L_SWB)
1047 strlcat(msg, "swB ", len);
1048}
1049
1050/**
1051 * ipath_get_egrbuf - get an eager buffer
1052 * @dd: the infinipath device
1053 * @bufnum: the eager buffer to get
1054 *
1055 * must only be called if ipath_pd[port] is known to be allocated
1056 */
1057static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
1058{
1059 return dd->ipath_port0_skbinfo ?
1060 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
1061}
1062
1063/**
1064 * ipath_alloc_skb - allocate an skb and buffer with possible constraints
1065 * @dd: the infinipath device
1066 * @gfp_mask: the sk_buff SFP mask
1067 */
1068struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
1069 gfp_t gfp_mask)
1070{
1071 struct sk_buff *skb;
1072 u32 len;
1073
1074 /*
1075 * Only fully supported way to handle this is to allocate lots
1076 * extra, align as needed, and then do skb_reserve(). That wastes
1077 * a lot of memory... I'll have to hack this into infinipath_copy
1078 * also.
1079 */
1080
1081 /*
1082 * We need 2 extra bytes for ipath_ether data sent in the
1083 * key header. In order to keep everything dword aligned,
1084 * we'll reserve 4 bytes.
1085 */
1086 len = dd->ipath_ibmaxlen + 4;
1087
1088 if (dd->ipath_flags & IPATH_4BYTE_TID) {
1089 /* We need a 2KB multiple alignment, and there is no way
1090 * to do it except to allocate extra and then skb_reserve
1091 * enough to bring it up to the right alignment.
1092 */
1093 len += 2047;
1094 }
1095
1096 skb = __dev_alloc_skb(len, gfp_mask);
1097 if (!skb) {
1098 ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
1099 len);
1100 goto bail;
1101 }
1102
1103 skb_reserve(skb, 4);
1104
1105 if (dd->ipath_flags & IPATH_4BYTE_TID) {
1106 u32 una = (unsigned long)skb->data & 2047;
1107 if (una)
1108 skb_reserve(skb, 2048 - una);
1109 }
1110
1111bail:
1112 return skb;
1113}
1114
1115static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
1116 u32 eflags,
1117 u32 l,
1118 u32 etail,
1119 __le32 *rhf_addr,
1120 struct ipath_message_header *hdr)
1121{
1122 char emsg[128];
1123
1124 get_rhf_errstring(eflags, emsg, sizeof emsg);
1125 ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
1126 "tlen=%x opcode=%x egridx=%x: %s\n",
1127 eflags, l,
1128 ipath_hdrget_rcv_type(rhf_addr),
1129 ipath_hdrget_length_in_bytes(rhf_addr),
1130 be32_to_cpu(hdr->bth[0]) >> 24,
1131 etail, emsg);
1132
1133 /* Count local link integrity errors. */
1134 if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
1135 u8 n = (dd->ipath_ibcctrl >>
1136 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
1137 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
1138
1139 if (++dd->ipath_lli_counter > n) {
1140 dd->ipath_lli_counter = 0;
1141 dd->ipath_lli_errors++;
1142 }
1143 }
1144}
1145
1146/*
1147 * ipath_kreceive - receive a packet
1148 * @pd: the infinipath port
1149 *
1150 * called from interrupt handler for errors or receive interrupt
1151 */
1152void ipath_kreceive(struct ipath_portdata *pd)
1153{
1154 struct ipath_devdata *dd = pd->port_dd;
1155 __le32 *rhf_addr;
1156 void *ebuf;
1157 const u32 rsize = dd->ipath_rcvhdrentsize; /* words */
1158 const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
1159 u32 etail = -1, l, hdrqtail;
1160 struct ipath_message_header *hdr;
1161 u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;
1162 static u64 totcalls; /* stats, may eventually remove */
1163 int last;
1164
1165 l = pd->port_head;
1166 rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;
1167 if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
1168 u32 seq = ipath_hdrget_seq(rhf_addr);
1169
1170 if (seq != pd->port_seq_cnt)
1171 goto bail;
1172 hdrqtail = 0;
1173 } else {
1174 hdrqtail = ipath_get_rcvhdrtail(pd);
1175 if (l == hdrqtail)
1176 goto bail;
1177 smp_rmb();
1178 }
1179
1180reloop:
1181 for (last = 0, i = 1; !last; i += !last) {
1182 hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
1183 eflags = ipath_hdrget_err_flags(rhf_addr);
1184 etype = ipath_hdrget_rcv_type(rhf_addr);
1185 /* total length */
1186 tlen = ipath_hdrget_length_in_bytes(rhf_addr);
1187 ebuf = NULL;
1188 if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?
1189 ipath_hdrget_use_egr_buf(rhf_addr) :
1190 (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
1191 /*
1192 * It turns out that the chip uses an eager buffer
1193 * for all non-expected packets, whether it "needs"
1194 * one or not. So always get the index, but don't
1195 * set ebuf (so we try to copy data) unless the
1196 * length requires it.
1197 */
1198 etail = ipath_hdrget_index(rhf_addr);
1199 updegr = 1;
1200 if (tlen > sizeof(*hdr) ||
1201 etype == RCVHQ_RCV_TYPE_NON_KD)
1202 ebuf = ipath_get_egrbuf(dd, etail);
1203 }
1204
1205 /*
1206 * both tiderr and ipathhdrerr are set for all plain IB
1207 * packets; only ipathhdrerr should be set.
1208 */
1209
1210 if (etype != RCVHQ_RCV_TYPE_NON_KD &&
1211 etype != RCVHQ_RCV_TYPE_ERROR &&
1212 ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=
1213 IPS_PROTO_VERSION)
1214 ipath_cdbg(PKT, "Bad InfiniPath protocol version "
1215 "%x\n", etype);
1216
1217 if (unlikely(eflags))
1218 ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);
1219 else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
1220 ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);
1221 if (dd->ipath_lli_counter)
1222 dd->ipath_lli_counter--;
1223 } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
1224 u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;
1225 u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;
1226 ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
1227 "qp=%x), len %x; ignored\n",
1228 etype, opcode, qp, tlen);
1229 } else if (etype == RCVHQ_RCV_TYPE_EXPECTED) {
1230 ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
1231 be32_to_cpu(hdr->bth[0]) >> 24);
1232 } else {
1233 /*
1234 * error packet, type of error unknown.
1235 * Probably type 3, but we don't know, so don't
1236 * even try to print the opcode, etc.
1237 * Usually caused by a "bad packet", that has no
1238 * BTH, when the LRH says it should.
1239 */
1240 ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
1241 " %x, len %x hdrq+%x rhf: %Lx\n",
1242 etail, tlen, l, (unsigned long long)
1243 le64_to_cpu(*(__le64 *) rhf_addr));
1244 if (ipath_debug & __IPATH_ERRPKTDBG) {
1245 u32 j, *d, dw = rsize-2;
1246 if (rsize > (tlen>>2))
1247 dw = tlen>>2;
1248 d = (u32 *)hdr;
1249 printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",
1250 dw);
1251 for (j = 0; j < dw; j++)
1252 printk(KERN_DEBUG "%8x%s", d[j],
1253 (j%8) == 7 ? "\n" : " ");
1254 printk(KERN_DEBUG ".\n");
1255 }
1256 }
1257 l += rsize;
1258 if (l >= maxcnt)
1259 l = 0;
1260 rhf_addr = (__le32 *) pd->port_rcvhdrq +
1261 l + dd->ipath_rhf_offset;
1262 if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
1263 u32 seq = ipath_hdrget_seq(rhf_addr);
1264
1265 if (++pd->port_seq_cnt > 13)
1266 pd->port_seq_cnt = 1;
1267 if (seq != pd->port_seq_cnt)
1268 last = 1;
1269 } else if (l == hdrqtail) {
1270 last = 1;
1271 }
1272 /*
1273 * update head regs on last packet, and every 16 packets.
1274 * Reduce bus traffic, while still trying to prevent
1275 * rcvhdrq overflows, for when the queue is nearly full
1276 */
1277 if (last || !(i & 0xf)) {
1278 u64 lval = l;
1279
1280 /* request IBA6120 and 7220 interrupt only on last */
1281 if (last)
1282 lval |= dd->ipath_rhdrhead_intr_off;
1283 ipath_write_ureg(dd, ur_rcvhdrhead, lval,
1284 pd->port_port);
1285 if (updegr) {
1286 ipath_write_ureg(dd, ur_rcvegrindexhead,
1287 etail, pd->port_port);
1288 updegr = 0;
1289 }
1290 }
1291 }
1292
1293 if (!dd->ipath_rhdrhead_intr_off && !reloop &&
1294 !(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
1295 /* IBA6110 workaround; we can have a race clearing chip
1296 * interrupt with another interrupt about to be delivered,
1297 * and can clear it before it is delivered on the GPIO
1298 * workaround. By doing the extra check here for the
1299 * in-memory tail register updating while we were doing
1300 * earlier packets, we "almost" guarantee we have covered
1301 * that case.
1302 */
1303 u32 hqtail = ipath_get_rcvhdrtail(pd);
1304 if (hqtail != hdrqtail) {
1305 hdrqtail = hqtail;
1306 reloop = 1; /* loop 1 extra time at most */
1307 goto reloop;
1308 }
1309 }
1310
1311 pkttot += i;
1312
1313 pd->port_head = l;
1314
1315 if (pkttot > ipath_stats.sps_maxpkts_call)
1316 ipath_stats.sps_maxpkts_call = pkttot;
1317 ipath_stats.sps_port0pkts += pkttot;
1318 ipath_stats.sps_avgpkts_call =
1319 ipath_stats.sps_port0pkts / ++totcalls;
1320
1321bail:;
1322}
1323
1324/**
1325 * ipath_update_pio_bufs - update shadow copy of the PIO availability map
1326 * @dd: the infinipath device
1327 *
1328 * called whenever our local copy indicates we have run out of send buffers
1329 * NOTE: This can be called from interrupt context by some code
1330 * and from non-interrupt context by ipath_getpiobuf().
1331 */
1332
1333static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1334{
1335 unsigned long flags;
1336 int i;
1337 const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
1338
1339 /* If the generation (check) bits have changed, then we update the
1340 * busy bit for the corresponding PIO buffer. This algorithm will
1341 * modify positions to the value they already have in some cases
1342 * (i.e., no change), but it's faster than changing only the bits
1343 * that have changed.
1344 *
1345 * We would like to do this atomicly, to avoid spinlocks in the
1346 * critical send path, but that's not really possible, given the
1347 * type of changes, and that this routine could be called on
1348 * multiple cpu's simultaneously, so we lock in this routine only,
1349 * to avoid conflicting updates; all we change is the shadow, and
1350 * it's a single 64 bit memory location, so by definition the update
1351 * is atomic in terms of what other cpu's can see in testing the
1352 * bits. The spin_lock overhead isn't too bad, since it only
1353 * happens when all buffers are in use, so only cpu overhead, not
1354 * latency or bandwidth is affected.
1355 */
1356 if (!dd->ipath_pioavailregs_dma) {
1357 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
1358 return;
1359 }
1360 if (ipath_debug & __IPATH_VERBDBG) {
1361 /* only if packet debug and verbose */
1362 volatile __le64 *dma = dd->ipath_pioavailregs_dma;
1363 unsigned long *shadow = dd->ipath_pioavailshadow;
1364
1365 ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
1366 "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
1367 "s3=%lx\n",
1368 (unsigned long long) le64_to_cpu(dma[0]),
1369 shadow[0],
1370 (unsigned long long) le64_to_cpu(dma[1]),
1371 shadow[1],
1372 (unsigned long long) le64_to_cpu(dma[2]),
1373 shadow[2],
1374 (unsigned long long) le64_to_cpu(dma[3]),
1375 shadow[3]);
1376 if (piobregs > 4)
1377 ipath_cdbg(
1378 PKT, "2nd group, dma4=%llx shad4=%lx, "
1379 "d5=%llx s5=%lx, d6=%llx s6=%lx, "
1380 "d7=%llx s7=%lx\n",
1381 (unsigned long long) le64_to_cpu(dma[4]),
1382 shadow[4],
1383 (unsigned long long) le64_to_cpu(dma[5]),
1384 shadow[5],
1385 (unsigned long long) le64_to_cpu(dma[6]),
1386 shadow[6],
1387 (unsigned long long) le64_to_cpu(dma[7]),
1388 shadow[7]);
1389 }
1390 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1391 for (i = 0; i < piobregs; i++) {
1392 u64 pchbusy, pchg, piov, pnew;
1393 /*
1394 * Chip Errata: bug 6641; even and odd qwords>3 are swapped
1395 */
1396 if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
1397 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
1398 else
1399 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
1400 pchg = dd->ipath_pioavailkernel[i] &
1401 ~(dd->ipath_pioavailshadow[i] ^ piov);
1402 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
1403 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
1404 pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
1405 pnew |= piov & pchbusy;
1406 dd->ipath_pioavailshadow[i] = pnew;
1407 }
1408 }
1409 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1410}
1411
1412/*
1413 * used to force update of pioavailshadow if we can't get a pio buffer.
1414 * Needed primarily due to exitting freeze mode after recovering
1415 * from errors. Done lazily, because it's safer (known to not
1416 * be writing pio buffers).
1417 */
1418static void ipath_reset_availshadow(struct ipath_devdata *dd)
1419{
1420 int i, im;
1421 unsigned long flags;
1422
1423 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1424 for (i = 0; i < dd->ipath_pioavregs; i++) {
1425 u64 val, oldval;
1426 /* deal with 6110 chip bug on high register #s */
1427 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
1428 i ^ 1 : i;
1429 val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
1430 /*
1431 * busy out the buffers not in the kernel avail list,
1432 * without changing the generation bits.
1433 */
1434 oldval = dd->ipath_pioavailshadow[i];
1435 dd->ipath_pioavailshadow[i] = val |
1436 ((~dd->ipath_pioavailkernel[i] <<
1437 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
1438 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
1439 if (oldval != dd->ipath_pioavailshadow[i])
1440 ipath_dbg("shadow[%d] was %Lx, now %lx\n",
1441 i, (unsigned long long) oldval,
1442 dd->ipath_pioavailshadow[i]);
1443 }
1444 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1445}
1446
1447/**
1448 * ipath_setrcvhdrsize - set the receive header size
1449 * @dd: the infinipath device
1450 * @rhdrsize: the receive header size
1451 *
1452 * called from user init code, and also layered driver init
1453 */
1454int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
1455{
1456 int ret = 0;
1457
1458 if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
1459 if (dd->ipath_rcvhdrsize != rhdrsize) {
1460 dev_info(&dd->pcidev->dev,
1461 "Error: can't set protocol header "
1462 "size %u, already %u\n",
1463 rhdrsize, dd->ipath_rcvhdrsize);
1464 ret = -EAGAIN;
1465 } else
1466 ipath_cdbg(VERBOSE, "Reuse same protocol header "
1467 "size %u\n", dd->ipath_rcvhdrsize);
1468 } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
1469 (sizeof(u64) / sizeof(u32)))) {
1470 ipath_dbg("Error: can't set protocol header size %u "
1471 "(> max %u)\n", rhdrsize,
1472 dd->ipath_rcvhdrentsize -
1473 (u32) (sizeof(u64) / sizeof(u32)));
1474 ret = -EOVERFLOW;
1475 } else {
1476 dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
1477 dd->ipath_rcvhdrsize = rhdrsize;
1478 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
1479 dd->ipath_rcvhdrsize);
1480 ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
1481 dd->ipath_rcvhdrsize);
1482 }
1483 return ret;
1484}
1485
1486/*
1487 * debugging code and stats updates if no pio buffers available.
1488 */
1489static noinline void no_pio_bufs(struct ipath_devdata *dd)
1490{
1491 unsigned long *shadow = dd->ipath_pioavailshadow;
1492 __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
1493
1494 dd->ipath_upd_pio_shadow = 1;
1495
1496 /*
1497 * not atomic, but if we lose a stat count in a while, that's OK
1498 */
1499 ipath_stats.sps_nopiobufs++;
1500 if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1501 ipath_force_pio_avail_update(dd); /* at start */
1502 ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
1503 "%llx %llx %llx %llx\n"
1504 "ipath shadow: %lx %lx %lx %lx\n",
1505 dd->ipath_consec_nopiobuf,
1506 (unsigned long)get_cycles(),
1507 (unsigned long long) le64_to_cpu(dma[0]),
1508 (unsigned long long) le64_to_cpu(dma[1]),
1509 (unsigned long long) le64_to_cpu(dma[2]),
1510 (unsigned long long) le64_to_cpu(dma[3]),
1511 shadow[0], shadow[1], shadow[2], shadow[3]);
1512 /*
1513 * 4 buffers per byte, 4 registers above, cover rest
1514 * below
1515 */
1516 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
1517 (sizeof(shadow[0]) * 4 * 4))
1518 ipath_dbg("2nd group: dmacopy: "
1519 "%llx %llx %llx %llx\n"
1520 "ipath shadow: %lx %lx %lx %lx\n",
1521 (unsigned long long)le64_to_cpu(dma[4]),
1522 (unsigned long long)le64_to_cpu(dma[5]),
1523 (unsigned long long)le64_to_cpu(dma[6]),
1524 (unsigned long long)le64_to_cpu(dma[7]),
1525 shadow[4], shadow[5], shadow[6], shadow[7]);
1526
1527 /* at end, so update likely happened */
1528 ipath_reset_availshadow(dd);
1529 }
1530}
1531
1532/*
1533 * common code for normal driver pio buffer allocation, and reserved
1534 * allocation.
1535 *
1536 * do appropriate marking as busy, etc.
1537 * returns buffer number if one found (>=0), negative number is error.
1538 */
1539static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
1540 u32 *pbufnum, u32 first, u32 last, u32 firsti)
1541{
1542 int i, j, updated = 0;
1543 unsigned piobcnt;
1544 unsigned long flags;
1545 unsigned long *shadow = dd->ipath_pioavailshadow;
1546 u32 __iomem *buf;
1547
1548 piobcnt = last - first;
1549 if (dd->ipath_upd_pio_shadow) {
1550 /*
1551 * Minor optimization. If we had no buffers on last call,
1552 * start out by doing the update; continue and do scan even
1553 * if no buffers were updated, to be paranoid
1554 */
1555 ipath_update_pio_bufs(dd);
1556 updated++;
1557 i = first;
1558 } else
1559 i = firsti;
1560rescan:
1561 /*
1562 * while test_and_set_bit() is atomic, we do that and then the
1563 * change_bit(), and the pair is not. See if this is the cause
1564 * of the remaining armlaunch errors.
1565 */
1566 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1567 for (j = 0; j < piobcnt; j++, i++) {
1568 if (i >= last)
1569 i = first;
1570 if (__test_and_set_bit((2 * i) + 1, shadow))
1571 continue;
1572 /* flip generation bit */
1573 __change_bit(2 * i, shadow);
1574 break;
1575 }
1576 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1577
1578 if (j == piobcnt) {
1579 if (!updated) {
1580 /*
1581 * first time through; shadow exhausted, but may be
1582 * buffers available, try an update and then rescan.
1583 */
1584 ipath_update_pio_bufs(dd);
1585 updated++;
1586 i = first;
1587 goto rescan;
1588 } else if (updated == 1 && piobcnt <=
1589 ((dd->ipath_sendctrl
1590 >> INFINIPATH_S_UPDTHRESH_SHIFT) &
1591 INFINIPATH_S_UPDTHRESH_MASK)) {
1592 /*
1593 * for chips supporting and using the update
1594 * threshold we need to force an update of the
1595 * in-memory copy if the count is less than the
1596 * thershold, then check one more time.
1597 */
1598 ipath_force_pio_avail_update(dd);
1599 ipath_update_pio_bufs(dd);
1600 updated++;
1601 i = first;
1602 goto rescan;
1603 }
1604
1605 no_pio_bufs(dd);
1606 buf = NULL;
1607 } else {
1608 if (i < dd->ipath_piobcnt2k)
1609 buf = (u32 __iomem *) (dd->ipath_pio2kbase +
1610 i * dd->ipath_palign);
1611 else
1612 buf = (u32 __iomem *)
1613 (dd->ipath_pio4kbase +
1614 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
1615 if (pbufnum)
1616 *pbufnum = i;
1617 }
1618
1619 return buf;
1620}
1621
1622/**
1623 * ipath_getpiobuf - find an available pio buffer
1624 * @dd: the infinipath device
1625 * @plen: the size of the PIO buffer needed in 32-bit words
1626 * @pbufnum: the buffer number is placed here
1627 */
1628u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
1629{
1630 u32 __iomem *buf;
1631 u32 pnum, nbufs;
1632 u32 first, lasti;
1633
1634 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
1635 first = dd->ipath_piobcnt2k;
1636 lasti = dd->ipath_lastpioindexl;
1637 } else {
1638 first = 0;
1639 lasti = dd->ipath_lastpioindex;
1640 }
1641 nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
1642 buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
1643
1644 if (buf) {
1645 /*
1646 * Set next starting place. It's just an optimization,
1647 * it doesn't matter who wins on this, so no locking
1648 */
1649 if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1650 dd->ipath_lastpioindexl = pnum + 1;
1651 else
1652 dd->ipath_lastpioindex = pnum + 1;
1653 if (dd->ipath_upd_pio_shadow)
1654 dd->ipath_upd_pio_shadow = 0;
1655 if (dd->ipath_consec_nopiobuf)
1656 dd->ipath_consec_nopiobuf = 0;
1657 ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
1658 pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
1659 if (pbufnum)
1660 *pbufnum = pnum;
1661
1662 }
1663 return buf;
1664}
1665
1666/**
1667 * ipath_chg_pioavailkernel - change which send buffers are available for kernel
1668 * @dd: the infinipath device
1669 * @start: the starting send buffer number
1670 * @len: the number of send buffers
1671 * @avail: true if the buffers are available for kernel use, false otherwise
1672 */
1673void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1674 unsigned len, int avail)
1675{
1676 unsigned long flags;
1677 unsigned end, cnt = 0;
1678
1679 /* There are two bits per send buffer (busy and generation) */
1680 start *= 2;
1681 end = start + len * 2;
1682
1683 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1684 /* Set or clear the busy bit in the shadow. */
1685 while (start < end) {
1686 if (avail) {
1687 unsigned long dma;
1688 int i, im;
1689 /*
1690 * the BUSY bit will never be set, because we disarm
1691 * the user buffers before we hand them back to the
1692 * kernel. We do have to make sure the generation
1693 * bit is set correctly in shadow, since it could
1694 * have changed many times while allocated to user.
1695 * We can't use the bitmap functions on the full
1696 * dma array because it is always little-endian, so
1697 * we have to flip to host-order first.
1698 * BITS_PER_LONG is slightly wrong, since it's
1699 * always 64 bits per register in chip...
1700 * We only work on 64 bit kernels, so that's OK.
1701 */
1702 /* deal with 6110 chip bug on high register #s */
1703 i = start / BITS_PER_LONG;
1704 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
1705 i ^ 1 : i;
1706 __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
1707 + start, dd->ipath_pioavailshadow);
1708 dma = (unsigned long) le64_to_cpu(
1709 dd->ipath_pioavailregs_dma[im]);
1710 if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1711 + start) % BITS_PER_LONG, &dma))
1712 __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1713 + start, dd->ipath_pioavailshadow);
1714 else
1715 __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1716 + start, dd->ipath_pioavailshadow);
1717 __set_bit(start, dd->ipath_pioavailkernel);
1718 } else {
1719 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
1720 dd->ipath_pioavailshadow);
1721 __clear_bit(start, dd->ipath_pioavailkernel);
1722 }
1723 start += 2;
1724 }
1725
1726 if (dd->ipath_pioupd_thresh) {
1727 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
1728 cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
1729 }
1730 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1731
1732 /*
1733 * When moving buffers from kernel to user, if number assigned to
1734 * the user is less than the pio update threshold, and threshold
1735 * is supported (cnt was computed > 0), drop the update threshold
1736 * so we update at least once per allocated number of buffers.
1737 * In any case, if the kernel buffers are less than the threshold,
1738 * drop the threshold. We don't bother increasing it, having once
1739 * decreased it, since it would typically just cycle back and forth.
1740 * If we don't decrease below buffers in use, we can wait a long
1741 * time for an update, until some other context uses PIO buffers.
1742 */
1743 if (!avail && len < cnt)
1744 cnt = len;
1745 if (cnt < dd->ipath_pioupd_thresh) {
1746 dd->ipath_pioupd_thresh = cnt;
1747 ipath_dbg("Decreased pio update threshold to %u\n",
1748 dd->ipath_pioupd_thresh);
1749 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1750 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
1751 << INFINIPATH_S_UPDTHRESH_SHIFT);
1752 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
1753 << INFINIPATH_S_UPDTHRESH_SHIFT;
1754 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1755 dd->ipath_sendctrl);
1756 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1757 }
1758}
1759
1760/**
1761 * ipath_create_rcvhdrq - create a receive header queue
1762 * @dd: the infinipath device
1763 * @pd: the port data
1764 *
1765 * this must be contiguous memory (from an i/o perspective), and must be
1766 * DMA'able (which means for some systems, it will go through an IOMMU,
1767 * or be forced into a low address range).
1768 */
1769int ipath_create_rcvhdrq(struct ipath_devdata *dd,
1770 struct ipath_portdata *pd)
1771{
1772 int ret = 0;
1773
1774 if (!pd->port_rcvhdrq) {
1775 dma_addr_t phys_hdrqtail;
1776 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
1777 int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1778 sizeof(u32), PAGE_SIZE);
1779
1780 pd->port_rcvhdrq = dma_alloc_coherent(
1781 &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
1782 gfp_flags);
1783
1784 if (!pd->port_rcvhdrq) {
1785 ipath_dev_err(dd, "attempt to allocate %d bytes "
1786 "for port %u rcvhdrq failed\n",
1787 amt, pd->port_port);
1788 ret = -ENOMEM;
1789 goto bail;
1790 }
1791
1792 if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
1793 pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
1794 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
1795 GFP_KERNEL);
1796 if (!pd->port_rcvhdrtail_kvaddr) {
1797 ipath_dev_err(dd, "attempt to allocate 1 page "
1798 "for port %u rcvhdrqtailaddr "
1799 "failed\n", pd->port_port);
1800 ret = -ENOMEM;
1801 dma_free_coherent(&dd->pcidev->dev, amt,
1802 pd->port_rcvhdrq,
1803 pd->port_rcvhdrq_phys);
1804 pd->port_rcvhdrq = NULL;
1805 goto bail;
1806 }
1807 pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
1808 ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "
1809 "physical\n", pd->port_port,
1810 (unsigned long long) phys_hdrqtail);
1811 }
1812
1813 pd->port_rcvhdrq_size = amt;
1814
1815 ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
1816 "for port %u rcvhdr Q\n",
1817 amt >> PAGE_SHIFT, pd->port_rcvhdrq,
1818 (unsigned long) pd->port_rcvhdrq_phys,
1819 (unsigned long) pd->port_rcvhdrq_size,
1820 pd->port_port);
1821 } else {
1822 ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
1823 "hdrtailaddr@%p %llx physical\n",
1824 pd->port_port, pd->port_rcvhdrq,
1825 (unsigned long long) pd->port_rcvhdrq_phys,
1826 pd->port_rcvhdrtail_kvaddr, (unsigned long long)
1827 pd->port_rcvhdrqtailaddr_phys);
1828 }
1829 /* clear for security and sanity on each use */
1830 memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
1831 if (pd->port_rcvhdrtail_kvaddr)
1832 memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
1833
1834 /*
1835 * tell chip each time we init it, even if we are re-using previous
1836 * memory (we zero the register at process close)
1837 */
1838 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
1839 pd->port_port, pd->port_rcvhdrqtailaddr_phys);
1840 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
1841 pd->port_port, pd->port_rcvhdrq_phys);
1842
1843bail:
1844 return ret;
1845}
1846
1847
1848/*
1849 * Flush all sends that might be in the ready to send state, as well as any
1850 * that are in the process of being sent. Used whenever we need to be
1851 * sure the send side is idle. Cleans up all buffer state by canceling
1852 * all pio buffers, and issuing an abort, which cleans up anything in the
1853 * launch fifo. The cancel is superfluous on some chip versions, but
1854 * it's safer to always do it.
1855 * PIOAvail bits are updated by the chip as if normal send had happened.
1856 */
1857void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1858{
1859 unsigned long flags;
1860
1861 if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
1862 ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
1863 goto bail;
1864 }
1865 /*
1866 * If we have SDMA, and it's not disabled, we have to kick off the
1867 * abort state machine, provided we aren't already aborting.
1868 * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
1869 * we skip the rest of this routine. It is already "in progress"
1870 */
1871 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
1872 int skip_cancel;
1873 unsigned long *statp = &dd->ipath_sdma_status;
1874
1875 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
1876 skip_cancel =
1877 test_and_set_bit(IPATH_SDMA_ABORTING, statp)
1878 && !test_bit(IPATH_SDMA_DISABLED, statp);
1879 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
1880 if (skip_cancel)
1881 goto bail;
1882 }
1883
1884 ipath_dbg("Cancelling all in-progress send buffers\n");
1885
1886 /* skip armlaunch errs for a while */
1887 dd->ipath_lastcancel = jiffies + HZ / 2;
1888
1889 /*
1890 * The abort bit is auto-clearing. We also don't want pioavail
1891 * update happening during this, and we don't want any other
1892 * sends going out, so turn those off for the duration. We read
1893 * the scratch register to be sure that cancels and the abort
1894 * have taken effect in the chip. Otherwise two parts are same
1895 * as ipath_force_pio_avail_update()
1896 */
1897 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1898 dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
1899 | INFINIPATH_S_PIOENABLE);
1900 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1901 dd->ipath_sendctrl | INFINIPATH_S_ABORT);
1902 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1903 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1904
1905 /* disarm all send buffers */
1906 ipath_disarm_piobufs(dd, 0,
1907 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
1908
1909 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
1910 set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
1911
1912 if (restore_sendctrl) {
1913 /* else done by caller later if needed */
1914 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1915 dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
1916 INFINIPATH_S_PIOENABLE;
1917 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1918 dd->ipath_sendctrl);
1919 /* and again, be sure all have hit the chip */
1920 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1921 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1922 }
1923
1924 if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
1925 !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
1926 test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
1927 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
1928 /* only wait so long for intr */
1929 dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
1930 dd->ipath_sdma_reset_wait = 200;
1931 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
1932 tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
1933 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
1934 }
1935bail:;
1936}
1937
1938/*
1939 * Force an update of in-memory copy of the pioavail registers, when
1940 * needed for any of a variety of reasons. We read the scratch register
1941 * to make it highly likely that the update will have happened by the
1942 * time we return. If already off (as in cancel_sends above), this
1943 * routine is a nop, on the assumption that the caller will "do the
1944 * right thing".
1945 */
1946void ipath_force_pio_avail_update(struct ipath_devdata *dd)
1947{
1948 unsigned long flags;
1949
1950 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1951 if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
1952 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1953 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
1954 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1955 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1956 dd->ipath_sendctrl);
1957 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1958 }
1959 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1960}
1961
1962static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
1963 int linitcmd)
1964{
1965 u64 mod_wd;
1966 static const char *what[4] = {
1967 [0] = "NOP",
1968 [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
1969 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
1970 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
1971 };
1972
1973 if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
1974 /*
1975 * If we are told to disable, note that so link-recovery
1976 * code does not attempt to bring us back up.
1977 */
1978 preempt_disable();
1979 dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
1980 preempt_enable();
1981 } else if (linitcmd) {
1982 /*
1983 * Any other linkinitcmd will lead to LINKDOWN and then
1984 * to INIT (if all is well), so clear flag to let
1985 * link-recovery code attempt to bring us back up.
1986 */
1987 preempt_disable();
1988 dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
1989 preempt_enable();
1990 }
1991
1992 mod_wd = (linkcmd << dd->ibcc_lc_shift) |
1993 (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1994 ipath_cdbg(VERBOSE,
1995 "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
1996 dd->ipath_unit, what[linkcmd], linitcmd,
1997 ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
1998 ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
1999
2000 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
2001 dd->ipath_ibcctrl | mod_wd);
2002 /* read from chip so write is flushed */
2003 (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
2004}
2005
2006int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
2007{
2008 u32 lstate;
2009 int ret;
2010
2011 switch (newstate) {
2012 case IPATH_IB_LINKDOWN_ONLY:
2013 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
2014 /* don't wait */
2015 ret = 0;
2016 goto bail;
2017
2018 case IPATH_IB_LINKDOWN:
2019 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
2020 INFINIPATH_IBCC_LINKINITCMD_POLL);
2021 /* don't wait */
2022 ret = 0;
2023 goto bail;
2024
2025 case IPATH_IB_LINKDOWN_SLEEP:
2026 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
2027 INFINIPATH_IBCC_LINKINITCMD_SLEEP);
2028 /* don't wait */
2029 ret = 0;
2030 goto bail;
2031
2032 case IPATH_IB_LINKDOWN_DISABLE:
2033 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
2034 INFINIPATH_IBCC_LINKINITCMD_DISABLE);
2035 /* don't wait */
2036 ret = 0;
2037 goto bail;
2038
2039 case IPATH_IB_LINKARM:
2040 if (dd->ipath_flags & IPATH_LINKARMED) {
2041 ret = 0;
2042 goto bail;
2043 }
2044 if (!(dd->ipath_flags &
2045 (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
2046 ret = -EINVAL;
2047 goto bail;
2048 }
2049 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
2050
2051 /*
2052 * Since the port can transition to ACTIVE by receiving
2053 * a non VL 15 packet, wait for either state.
2054 */
2055 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
2056 break;
2057
2058 case IPATH_IB_LINKACTIVE:
2059 if (dd->ipath_flags & IPATH_LINKACTIVE) {
2060 ret = 0;
2061 goto bail;
2062 }
2063 if (!(dd->ipath_flags & IPATH_LINKARMED)) {
2064 ret = -EINVAL;
2065 goto bail;
2066 }
2067 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
2068 lstate = IPATH_LINKACTIVE;
2069 break;
2070
2071 case IPATH_IB_LINK_LOOPBACK:
2072 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
2073 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
2074 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
2075 dd->ipath_ibcctrl);
2076
2077 /* turn heartbeat off, as it causes loopback to fail */
2078 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
2079 IPATH_IB_HRTBT_OFF);
2080 /* don't wait */
2081 ret = 0;
2082 goto bail;
2083
2084 case IPATH_IB_LINK_EXTERNAL:
2085 dev_info(&dd->pcidev->dev,
2086 "Disabling IB local loopback (normal)\n");
2087 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
2088 IPATH_IB_HRTBT_ON);
2089 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
2090 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
2091 dd->ipath_ibcctrl);
2092 /* don't wait */
2093 ret = 0;
2094 goto bail;
2095
2096 /*
2097 * Heartbeat can be explicitly enabled by the user via
2098 * "hrtbt_enable" "file", and if disabled, trying to enable here
2099 * will have no effect. Implicit changes (heartbeat off when
2100 * loopback on, and vice versa) are included to ease testing.
2101 */
2102 case IPATH_IB_LINK_HRTBT:
2103 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
2104 IPATH_IB_HRTBT_ON);
2105 goto bail;
2106
2107 case IPATH_IB_LINK_NO_HRTBT:
2108 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
2109 IPATH_IB_HRTBT_OFF);
2110 goto bail;
2111
2112 default:
2113 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
2114 ret = -EINVAL;
2115 goto bail;
2116 }
2117 ret = ipath_wait_linkstate(dd, lstate, 2000);
2118
2119bail:
2120 return ret;
2121}
2122
2123/**
2124 * ipath_set_mtu - set the MTU
2125 * @dd: the infinipath device
2126 * @arg: the new MTU
2127 *
2128 * we can handle "any" incoming size, the issue here is whether we
2129 * need to restrict our outgoing size. For now, we don't do any
2130 * sanity checking on this, and we don't deal with what happens to
2131 * programs that are already running when the size changes.
2132 * NOTE: changing the MTU will usually cause the IBC to go back to
2133 * link INIT state...
2134 */
2135int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
2136{
2137 u32 piosize;
2138 int changed = 0;
2139 int ret;
2140
2141 /*
2142 * mtu is IB data payload max. It's the largest power of 2 less
2143 * than piosize (or even larger, since it only really controls the
2144 * largest we can receive; we can send the max of the mtu and
2145 * piosize). We check that it's one of the valid IB sizes.
2146 */
2147 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
2148 (arg != 4096 || !ipath_mtu4096)) {
2149 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
2150 ret = -EINVAL;
2151 goto bail;
2152 }
2153 if (dd->ipath_ibmtu == arg) {
2154 ret = 0; /* same as current */
2155 goto bail;
2156 }
2157
2158 piosize = dd->ipath_ibmaxlen;
2159 dd->ipath_ibmtu = arg;
2160
2161 if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
2162 /* Only if it's not the initial value (or reset to it) */
2163 if (piosize != dd->ipath_init_ibmaxlen) {
2164 if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
2165 piosize = dd->ipath_init_ibmaxlen;
2166 dd->ipath_ibmaxlen = piosize;
2167 changed = 1;
2168 }
2169 } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
2170 piosize = arg + IPATH_PIO_MAXIBHDR;
2171 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
2172 "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
2173 arg);
2174 dd->ipath_ibmaxlen = piosize;
2175 changed = 1;
2176 }
2177
2178 if (changed) {
2179 u64 ibc = dd->ipath_ibcctrl, ibdw;
2180 /*
2181 * update our housekeeping variables, and set IBC max
2182 * size, same as init code; max IBC is max we allow in
2183 * buffer, less the qword pbc, plus 1 for ICRC, in dwords
2184 */
2185 dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
2186 ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
2187 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
2188 dd->ibcc_mpl_shift);
2189 ibc |= ibdw << dd->ibcc_mpl_shift;
2190 dd->ipath_ibcctrl = ibc;
2191 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
2192 dd->ipath_ibcctrl);
2193 dd->ipath_f_tidtemplate(dd);
2194 }
2195
2196 ret = 0;
2197
2198bail:
2199 return ret;
2200}
2201
2202int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)
2203{
2204 dd->ipath_lid = lid;
2205 dd->ipath_lmc = lmc;
2206
2207 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |
2208 (~((1U << lmc) - 1)) << 16);
2209
2210 dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);
2211
2212 return 0;
2213}
2214
2215
2216/**
2217 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
2218 * @dd: the infinipath device
2219 * @regno: the register number to write
2220 * @port: the port containing the register
2221 * @value: the value to write
2222 *
2223 * Registers that vary with the chip implementation constants (port)
2224 * use this routine.
2225 */
2226void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
2227 unsigned port, u64 value)
2228{
2229 u16 where;
2230
2231 if (port < dd->ipath_portcnt &&
2232 (regno == dd->ipath_kregs->kr_rcvhdraddr ||
2233 regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
2234 where = regno + port;
2235 else
2236 where = -1;
2237
2238 ipath_write_kreg(dd, where, value);
2239}
2240
2241/*
2242 * Following deal with the "obviously simple" task of overriding the state
2243 * of the LEDS, which normally indicate link physical and logical status.
2244 * The complications arise in dealing with different hardware mappings
2245 * and the board-dependent routine being called from interrupts.
2246 * and then there's the requirement to _flash_ them.
2247 */
2248#define LED_OVER_FREQ_SHIFT 8
2249#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
2250/* Below is "non-zero" to force override, but both actual LEDs are off */
2251#define LED_OVER_BOTH_OFF (8)
2252
2253static void ipath_run_led_override(unsigned long opaque)
2254{
2255 struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
2256 int timeoff;
2257 int pidx;
2258 u64 lstate, ltstate, val;
2259
2260 if (!(dd->ipath_flags & IPATH_INITTED))
2261 return;
2262
2263 pidx = dd->ipath_led_override_phase++ & 1;
2264 dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
2265 timeoff = dd->ipath_led_override_timeoff;
2266
2267 /*
2268 * below potentially restores the LED values per current status,
2269 * should also possibly setup the traffic-blink register,
2270 * but leave that to per-chip functions.
2271 */
2272 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
2273 ltstate = ipath_ib_linktrstate(dd, val);
2274 lstate = ipath_ib_linkstate(dd, val);
2275
2276 dd->ipath_f_setextled(dd, lstate, ltstate);
2277 mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
2278}
2279
2280void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
2281{
2282 int timeoff, freq;
2283
2284 if (!(dd->ipath_flags & IPATH_INITTED))
2285 return;
2286
2287 /* First check if we are blinking. If not, use 1HZ polling */
2288 timeoff = HZ;
2289 freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
2290
2291 if (freq) {
2292 /* For blink, set each phase from one nybble of val */
2293 dd->ipath_led_override_vals[0] = val & 0xF;
2294 dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
2295 timeoff = (HZ << 4)/freq;
2296 } else {
2297 /* Non-blink set both phases the same. */
2298 dd->ipath_led_override_vals[0] = val & 0xF;
2299 dd->ipath_led_override_vals[1] = val & 0xF;
2300 }
2301 dd->ipath_led_override_timeoff = timeoff;
2302
2303 /*
2304 * If the timer has not already been started, do so. Use a "quick"
2305 * timeout so the function will be called soon, to look at our request.
2306 */
2307 if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
2308 /* Need to start timer */
2309 setup_timer(&dd->ipath_led_override_timer,
2310 ipath_run_led_override, (unsigned long)dd);
2311
2312 dd->ipath_led_override_timer.expires = jiffies + 1;
2313 add_timer(&dd->ipath_led_override_timer);
2314 } else
2315 atomic_dec(&dd->ipath_led_override_timer_active);
2316}
2317
2318/**
2319 * ipath_shutdown_device - shut down a device
2320 * @dd: the infinipath device
2321 *
2322 * This is called to make the device quiet when we are about to
2323 * unload the driver, and also when the device is administratively
2324 * disabled. It does not free any data structures.
2325 * Everything it does has to be setup again by ipath_init_chip(dd,1)
2326 */
2327void ipath_shutdown_device(struct ipath_devdata *dd)
2328{
2329 unsigned long flags;
2330
2331 ipath_dbg("Shutting down the device\n");
2332
2333 ipath_hol_up(dd); /* make sure user processes aren't suspended */
2334
2335 dd->ipath_flags |= IPATH_LINKUNK;
2336 dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
2337 IPATH_LINKINIT | IPATH_LINKARMED |
2338 IPATH_LINKACTIVE);
2339 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
2340 IPATH_STATUS_IB_READY);
2341
2342 /* mask interrupts, but not errors */
2343 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
2344
2345 dd->ipath_rcvctrl = 0;
2346 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
2347 dd->ipath_rcvctrl);
2348
2349 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
2350 teardown_sdma(dd);
2351
2352 /*
2353 * gracefully stop all sends allowing any in progress to trickle out
2354 * first.
2355 */
2356 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
2357 dd->ipath_sendctrl = 0;
2358 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
2359 /* flush it */
2360 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2361 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
2362
2363 /*
2364 * enough for anything that's going to trickle out to have actually
2365 * done so.
2366 */
2367 udelay(5);
2368
2369 dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
2370
2371 ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
2372 ipath_cancel_sends(dd, 0);
2373
2374 /*
2375 * we are shutting down, so tell components that care. We don't do
2376 * this on just a link state change, much like ethernet, a cable
2377 * unplug, etc. doesn't change driver state
2378 */
2379 signal_ib_event(dd, IB_EVENT_PORT_ERR);
2380
2381 /* disable IBC */
2382 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
2383 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
2384 dd->ipath_control | INFINIPATH_C_FREEZEMODE);
2385
2386 /*
2387 * clear SerdesEnable and turn the leds off; do this here because
2388 * we are unloading, so don't count on interrupts to move along
2389 * Turn the LEDs off explicitly for the same reason.
2390 */
2391 dd->ipath_f_quiet_serdes(dd);
2392
2393 /* stop all the timers that might still be running */
2394 del_timer_sync(&dd->ipath_hol_timer);
2395 if (dd->ipath_stats_timer_active) {
2396 del_timer_sync(&dd->ipath_stats_timer);
2397 dd->ipath_stats_timer_active = 0;
2398 }
2399 if (dd->ipath_intrchk_timer.data) {
2400 del_timer_sync(&dd->ipath_intrchk_timer);
2401 dd->ipath_intrchk_timer.data = 0;
2402 }
2403 if (atomic_read(&dd->ipath_led_override_timer_active)) {
2404 del_timer_sync(&dd->ipath_led_override_timer);
2405 atomic_set(&dd->ipath_led_override_timer_active, 0);
2406 }
2407
2408 /*
2409 * clear all interrupts and errors, so that the next time the driver
2410 * is loaded or device is enabled, we know that whatever is set
2411 * happened while we were unloaded
2412 */
2413 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
2414 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
2415 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
2416 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
2417
2418 ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
2419 ipath_update_eeprom_log(dd);
2420}
2421
2422/**
2423 * ipath_free_pddata - free a port's allocated data
2424 * @dd: the infinipath device
2425 * @pd: the portdata structure
2426 *
2427 * free up any allocated data for a port
2428 * This should not touch anything that would affect a simultaneous
2429 * re-allocation of port data, because it is called after ipath_mutex
2430 * is released (and can be called from reinit as well).
2431 * It should never change any chip state, or global driver state.
2432 * (The only exception to global state is freeing the port0 port0_skbs.)
2433 */
2434void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
2435{
2436 if (!pd)
2437 return;
2438
2439 if (pd->port_rcvhdrq) {
2440 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
2441 "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
2442 (unsigned long) pd->port_rcvhdrq_size);
2443 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
2444 pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
2445 pd->port_rcvhdrq = NULL;
2446 if (pd->port_rcvhdrtail_kvaddr) {
2447 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
2448 pd->port_rcvhdrtail_kvaddr,
2449 pd->port_rcvhdrqtailaddr_phys);
2450 pd->port_rcvhdrtail_kvaddr = NULL;
2451 }
2452 }
2453 if (pd->port_port && pd->port_rcvegrbuf) {
2454 unsigned e;
2455
2456 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
2457 void *base = pd->port_rcvegrbuf[e];
2458 size_t size = pd->port_rcvegrbuf_size;
2459
2460 ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
2461 "chunk %u/%u\n", base,
2462 (unsigned long) size,
2463 e, pd->port_rcvegrbuf_chunks);
2464 dma_free_coherent(&dd->pcidev->dev, size,
2465 base, pd->port_rcvegrbuf_phys[e]);
2466 }
2467 kfree(pd->port_rcvegrbuf);
2468 pd->port_rcvegrbuf = NULL;
2469 kfree(pd->port_rcvegrbuf_phys);
2470 pd->port_rcvegrbuf_phys = NULL;
2471 pd->port_rcvegrbuf_chunks = 0;
2472 } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
2473 unsigned e;
2474 struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
2475
2476 dd->ipath_port0_skbinfo = NULL;
2477 ipath_cdbg(VERBOSE, "free closed port %d "
2478 "ipath_port0_skbinfo @ %p\n", pd->port_port,
2479 skbinfo);
2480 for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)
2481 if (skbinfo[e].skb) {
2482 pci_unmap_single(dd->pcidev, skbinfo[e].phys,
2483 dd->ipath_ibmaxlen,
2484 PCI_DMA_FROMDEVICE);
2485 dev_kfree_skb(skbinfo[e].skb);
2486 }
2487 vfree(skbinfo);
2488 }
2489 kfree(pd->port_tid_pg_list);
2490 vfree(pd->subport_uregbase);
2491 vfree(pd->subport_rcvegrbuf);
2492 vfree(pd->subport_rcvhdr_base);
2493 kfree(pd);
2494}
2495
2496static int __init infinipath_init(void)
2497{
2498 int ret;
2499
2500 if (ipath_debug & __IPATH_DBG)
2501 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
2502
2503 /*
2504 * These must be called before the driver is registered with
2505 * the PCI subsystem.
2506 */
2507 idr_init(&unit_table);
2508
2509 ret = pci_register_driver(&ipath_driver);
2510 if (ret < 0) {
2511 printk(KERN_ERR IPATH_DRV_NAME
2512 ": Unable to register driver: error %d\n", -ret);
2513 goto bail_unit;
2514 }
2515
2516 ret = ipath_init_ipathfs();
2517 if (ret < 0) {
2518 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
2519 "ipathfs: error %d\n", -ret);
2520 goto bail_pci;
2521 }
2522
2523 goto bail;
2524
2525bail_pci:
2526 pci_unregister_driver(&ipath_driver);
2527
2528bail_unit:
2529 idr_destroy(&unit_table);
2530
2531bail:
2532 return ret;
2533}
2534
2535static void __exit infinipath_cleanup(void)
2536{
2537 ipath_exit_ipathfs();
2538
2539 ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
2540 pci_unregister_driver(&ipath_driver);
2541
2542 idr_destroy(&unit_table);
2543}
2544
2545/**
2546 * ipath_reset_device - reset the chip if possible
2547 * @unit: the device to reset
2548 *
2549 * Whether or not reset is successful, we attempt to re-initialize the chip
2550 * (that is, much like a driver unload/reload). We clear the INITTED flag
2551 * so that the various entry points will fail until we reinitialize. For
2552 * now, we only allow this if no user ports are open that use chip resources
2553 */
2554int ipath_reset_device(int unit)
2555{
2556 int ret, i;
2557 struct ipath_devdata *dd = ipath_lookup(unit);
2558 unsigned long flags;
2559
2560 if (!dd) {
2561 ret = -ENODEV;
2562 goto bail;
2563 }
2564
2565 if (atomic_read(&dd->ipath_led_override_timer_active)) {
2566 /* Need to stop LED timer, _then_ shut off LEDs */
2567 del_timer_sync(&dd->ipath_led_override_timer);
2568 atomic_set(&dd->ipath_led_override_timer_active, 0);
2569 }
2570
2571 /* Shut off LEDs after we are sure timer is not running */
2572 dd->ipath_led_override = LED_OVER_BOTH_OFF;
2573 dd->ipath_f_setextled(dd, 0, 0);
2574
2575 dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
2576
2577 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
2578 dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
2579 "not initialized or not present\n", unit);
2580 ret = -ENXIO;
2581 goto bail;
2582 }
2583
2584 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
2585 if (dd->ipath_pd)
2586 for (i = 1; i < dd->ipath_cfgports; i++) {
2587 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
2588 continue;
2589 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2590 ipath_dbg("unit %u port %d is in use "
2591 "(PID %u cmd %s), can't reset\n",
2592 unit, i,
2593 pid_nr(dd->ipath_pd[i]->port_pid),
2594 dd->ipath_pd[i]->port_comm);
2595 ret = -EBUSY;
2596 goto bail;
2597 }
2598 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2599
2600 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
2601 teardown_sdma(dd);
2602
2603 dd->ipath_flags &= ~IPATH_INITTED;
2604 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
2605 ret = dd->ipath_f_reset(dd);
2606 if (ret == 1) {
2607 ipath_dbg("Reinitializing unit %u after reset attempt\n",
2608 unit);
2609 ret = ipath_init_chip(dd, 1);
2610 } else
2611 ret = -EAGAIN;
2612 if (ret)
2613 ipath_dev_err(dd, "Reinitialize unit %u after "
2614 "reset failed with %d\n", unit, ret);
2615 else
2616 dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
2617 "resetting\n", unit);
2618
2619bail:
2620 return ret;
2621}
2622
2623/*
2624 * send a signal to all the processes that have the driver open
2625 * through the normal interfaces (i.e., everything other than diags
2626 * interface). Returns number of signalled processes.
2627 */
2628static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
2629{
2630 int i, sub, any = 0;
2631 struct pid *pid;
2632 unsigned long flags;
2633
2634 if (!dd->ipath_pd)
2635 return 0;
2636
2637 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
2638 for (i = 1; i < dd->ipath_cfgports; i++) {
2639 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
2640 continue;
2641 pid = dd->ipath_pd[i]->port_pid;
2642 if (!pid)
2643 continue;
2644
2645 dev_info(&dd->pcidev->dev, "context %d in use "
2646 "(PID %u), sending signal %d\n",
2647 i, pid_nr(pid), sig);
2648 kill_pid(pid, sig, 1);
2649 any++;
2650 for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
2651 pid = dd->ipath_pd[i]->port_subpid[sub];
2652 if (!pid)
2653 continue;
2654 dev_info(&dd->pcidev->dev, "sub-context "
2655 "%d:%d in use (PID %u), sending "
2656 "signal %d\n", i, sub, pid_nr(pid), sig);
2657 kill_pid(pid, sig, 1);
2658 any++;
2659 }
2660 }
2661 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2662 return any;
2663}
2664
2665static void ipath_hol_signal_down(struct ipath_devdata *dd)
2666{
2667 if (ipath_signal_procs(dd, SIGSTOP))
2668 ipath_dbg("Stopped some processes\n");
2669 ipath_cancel_sends(dd, 1);
2670}
2671
2672
2673static void ipath_hol_signal_up(struct ipath_devdata *dd)
2674{
2675 if (ipath_signal_procs(dd, SIGCONT))
2676 ipath_dbg("Continued some processes\n");
2677}
2678
2679/*
2680 * link is down, stop any users processes, and flush pending sends
2681 * to prevent HoL blocking, then start the HoL timer that
2682 * periodically continues, then stop procs, so they can detect
2683 * link down if they want, and do something about it.
2684 * Timer may already be running, so use mod_timer, not add_timer.
2685 */
2686void ipath_hol_down(struct ipath_devdata *dd)
2687{
2688 dd->ipath_hol_state = IPATH_HOL_DOWN;
2689 ipath_hol_signal_down(dd);
2690 dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
2691 dd->ipath_hol_timer.expires = jiffies +
2692 msecs_to_jiffies(ipath_hol_timeout_ms);
2693 mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
2694}
2695
2696/*
2697 * link is up, continue any user processes, and ensure timer
2698 * is a nop, if running. Let timer keep running, if set; it
2699 * will nop when it sees the link is up
2700 */
2701void ipath_hol_up(struct ipath_devdata *dd)
2702{
2703 ipath_hol_signal_up(dd);
2704 dd->ipath_hol_state = IPATH_HOL_UP;
2705}
2706
2707/*
2708 * toggle the running/not running state of user proceses
2709 * to prevent HoL blocking on chip resources, but still allow
2710 * user processes to do link down special case handling.
2711 * Should only be called via the timer
2712 */
2713void ipath_hol_event(unsigned long opaque)
2714{
2715 struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
2716
2717 if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
2718 && dd->ipath_hol_state != IPATH_HOL_UP) {
2719 dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
2720 ipath_dbg("Stopping processes\n");
2721 ipath_hol_signal_down(dd);
2722 } else { /* may do "extra" if also in ipath_hol_up() */
2723 dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
2724 ipath_dbg("Continuing processes\n");
2725 ipath_hol_signal_up(dd);
2726 }
2727 if (dd->ipath_hol_state == IPATH_HOL_UP)
2728 ipath_dbg("link's up, don't resched timer\n");
2729 else {
2730 dd->ipath_hol_timer.expires = jiffies +
2731 msecs_to_jiffies(ipath_hol_timeout_ms);
2732 mod_timer(&dd->ipath_hol_timer,
2733 dd->ipath_hol_timer.expires);
2734 }
2735}
2736
2737int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
2738{
2739 u64 val;
2740
2741 if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
2742 return -1;
2743 if (dd->ipath_rx_pol_inv != new_pol_inv) {
2744 dd->ipath_rx_pol_inv = new_pol_inv;
2745 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
2746 val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
2747 INFINIPATH_XGXS_RX_POL_SHIFT);
2748 val |= ((u64)dd->ipath_rx_pol_inv) <<
2749 INFINIPATH_XGXS_RX_POL_SHIFT;
2750 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
2751 }
2752 return 0;
2753}
2754
2755/*
2756 * Disable and enable the armlaunch error. Used for PIO bandwidth testing on
2757 * the 7220, which is count-based, rather than trigger-based. Safe for the
2758 * driver check, since it's at init. Not completely safe when used for
2759 * user-mode checking, since some error checking can be lost, but not
2760 * particularly risky, and only has problematic side-effects in the face of
2761 * very buggy user code. There is no reference counting, but that's also
2762 * fine, given the intended use.
2763 */
2764void ipath_enable_armlaunch(struct ipath_devdata *dd)
2765{
2766 dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
2767 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
2768 INFINIPATH_E_SPIOARMLAUNCH);
2769 dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
2770 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
2771 dd->ipath_errormask);
2772}
2773
2774void ipath_disable_armlaunch(struct ipath_devdata *dd)
2775{
2776 /* so don't re-enable if already set */
2777 dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
2778 dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
2779 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
2780 dd->ipath_errormask);
2781}
2782
2783module_init(infinipath_init);
2784module_exit(infinipath_cleanup);
diff --git a/drivers/staging/rdma/ipath/ipath_eeprom.c b/drivers/staging/rdma/ipath/ipath_eeprom.c
deleted file mode 100644
index ef84107c7ce0..000000000000
--- a/drivers/staging/rdma/ipath/ipath_eeprom.c
+++ /dev/null
@@ -1,1183 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/delay.h>
35#include <linux/pci.h>
36#include <linux/vmalloc.h>
37
38#include "ipath_kernel.h"
39
40/*
41 * InfiniPath I2C driver for a serial eeprom. This is not a generic
42 * I2C interface. For a start, the device we're using (Atmel AT24C11)
43 * doesn't work like a regular I2C device. It looks like one
44 * electrically, but not logically. Normal I2C devices have a single
45 * 7-bit or 10-bit I2C address that they respond to. Valid 7-bit
46 * addresses range from 0x03 to 0x77. Addresses 0x00 to 0x02 and 0x78
47 * to 0x7F are special reserved addresses (e.g. 0x00 is the "general
48 * call" address.) The Atmel device, on the other hand, responds to ALL
49 * 7-bit addresses. It's designed to be the only device on a given I2C
50 * bus. A 7-bit address corresponds to the memory address within the
51 * Atmel device itself.
52 *
53 * Also, the timing requirements mean more than simple software
54 * bitbanging, with readbacks from chip to ensure timing (simple udelay
55 * is not enough).
56 *
57 * This all means that accessing the device is specialized enough
58 * that using the standard kernel I2C bitbanging interface would be
59 * impossible. For example, the core I2C eeprom driver expects to find
60 * a device at one or more of a limited set of addresses only. It doesn't
61 * allow writing to an eeprom. It also doesn't provide any means of
62 * accessing eeprom contents from within the kernel, only via sysfs.
63 */
64
65/* Added functionality for IBA7220-based cards */
66#define IPATH_EEPROM_DEV_V1 0xA0
67#define IPATH_EEPROM_DEV_V2 0xA2
68#define IPATH_TEMP_DEV 0x98
69#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2)
70#define IPATH_NO_DEV (0xFF)
71
72/*
73 * The number of I2C chains is proliferating. Table below brings
74 * some order to the madness. The basic principle is that the
75 * table is scanned from the top, and a "probe" is made to the
76 * device probe_dev. If that succeeds, the chain is considered
77 * to be of that type, and dd->i2c_chain_type is set to the index+1
78 * of the entry.
79 * The +1 is so static initialization can mean "unknown, do probe."
80 */
81static struct i2c_chain_desc {
82 u8 probe_dev; /* If seen at probe, chain is this type */
83 u8 eeprom_dev; /* Dev addr (if any) for EEPROM */
84 u8 temp_dev; /* Dev Addr (if any) for Temp-sense */
85} i2c_chains[] = {
86 { IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */
87 { IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */
88 { IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */
89 { IPATH_NO_DEV }
90};
91
92enum i2c_type {
93 i2c_line_scl = 0,
94 i2c_line_sda
95};
96
97enum i2c_state {
98 i2c_line_low = 0,
99 i2c_line_high
100};
101
102#define READ_CMD 1
103#define WRITE_CMD 0
104
105/**
106 * i2c_gpio_set - set a GPIO line
107 * @dd: the infinipath device
108 * @line: the line to set
109 * @new_line_state: the state to set
110 *
111 * Returns 0 if the line was set to the new state successfully, non-zero
112 * on error.
113 */
114static int i2c_gpio_set(struct ipath_devdata *dd,
115 enum i2c_type line,
116 enum i2c_state new_line_state)
117{
118 u64 out_mask, dir_mask, *gpioval;
119 unsigned long flags = 0;
120
121 gpioval = &dd->ipath_gpio_out;
122
123 if (line == i2c_line_scl) {
124 dir_mask = dd->ipath_gpio_scl;
125 out_mask = (1UL << dd->ipath_gpio_scl_num);
126 } else {
127 dir_mask = dd->ipath_gpio_sda;
128 out_mask = (1UL << dd->ipath_gpio_sda_num);
129 }
130
131 spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
132 if (new_line_state == i2c_line_high) {
133 /* tri-state the output rather than force high */
134 dd->ipath_extctrl &= ~dir_mask;
135 } else {
136 /* config line to be an output */
137 dd->ipath_extctrl |= dir_mask;
138 }
139 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
140
141 /* set output as well (no real verify) */
142 if (new_line_state == i2c_line_high)
143 *gpioval |= out_mask;
144 else
145 *gpioval &= ~out_mask;
146
147 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
148 spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
149
150 return 0;
151}
152
153/**
154 * i2c_gpio_get - get a GPIO line state
155 * @dd: the infinipath device
156 * @line: the line to get
157 * @curr_statep: where to put the line state
158 *
159 * Returns 0 if the line was set to the new state successfully, non-zero
160 * on error. curr_state is not set on error.
161 */
162static int i2c_gpio_get(struct ipath_devdata *dd,
163 enum i2c_type line,
164 enum i2c_state *curr_statep)
165{
166 u64 read_val, mask;
167 int ret;
168 unsigned long flags = 0;
169
170 /* check args */
171 if (curr_statep == NULL) {
172 ret = 1;
173 goto bail;
174 }
175
176 /* config line to be an input */
177 if (line == i2c_line_scl)
178 mask = dd->ipath_gpio_scl;
179 else
180 mask = dd->ipath_gpio_sda;
181
182 spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
183 dd->ipath_extctrl &= ~mask;
184 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
185 /*
186 * Below is very unlikely to reflect true input state if Output
187 * Enable actually changed.
188 */
189 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
190 spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
191
192 if (read_val & mask)
193 *curr_statep = i2c_line_high;
194 else
195 *curr_statep = i2c_line_low;
196
197 ret = 0;
198
199bail:
200 return ret;
201}
202
203/**
204 * i2c_wait_for_writes - wait for a write
205 * @dd: the infinipath device
206 *
207 * We use this instead of udelay directly, so we can make sure
208 * that previous register writes have been flushed all the way
209 * to the chip. Since we are delaying anyway, the cost doesn't
210 * hurt, and makes the bit twiddling more regular
211 */
212static void i2c_wait_for_writes(struct ipath_devdata *dd)
213{
214 (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
215 rmb();
216}
217
218static void scl_out(struct ipath_devdata *dd, u8 bit)
219{
220 udelay(1);
221 i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
222
223 i2c_wait_for_writes(dd);
224}
225
226static void sda_out(struct ipath_devdata *dd, u8 bit)
227{
228 i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low);
229
230 i2c_wait_for_writes(dd);
231}
232
233static u8 sda_in(struct ipath_devdata *dd, int wait)
234{
235 enum i2c_state bit;
236
237 if (i2c_gpio_get(dd, i2c_line_sda, &bit))
238 ipath_dbg("get bit failed!\n");
239
240 if (wait)
241 i2c_wait_for_writes(dd);
242
243 return bit == i2c_line_high ? 1U : 0;
244}
245
246/**
247 * i2c_ackrcv - see if ack following write is true
248 * @dd: the infinipath device
249 */
250static int i2c_ackrcv(struct ipath_devdata *dd)
251{
252 u8 ack_received;
253
254 /* AT ENTRY SCL = LOW */
255 /* change direction, ignore data */
256 ack_received = sda_in(dd, 1);
257 scl_out(dd, i2c_line_high);
258 ack_received = sda_in(dd, 1) == 0;
259 scl_out(dd, i2c_line_low);
260 return ack_received;
261}
262
263/**
264 * rd_byte - read a byte, leaving ACK, STOP, etc up to caller
265 * @dd: the infinipath device
266 *
267 * Returns byte shifted out of device
268 */
269static int rd_byte(struct ipath_devdata *dd)
270{
271 int bit_cntr, data;
272
273 data = 0;
274
275 for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
276 data <<= 1;
277 scl_out(dd, i2c_line_high);
278 data |= sda_in(dd, 0);
279 scl_out(dd, i2c_line_low);
280 }
281 return data;
282}
283
284/**
285 * wr_byte - write a byte, one bit at a time
286 * @dd: the infinipath device
287 * @data: the byte to write
288 *
289 * Returns 0 if we got the following ack, otherwise 1
290 */
291static int wr_byte(struct ipath_devdata *dd, u8 data)
292{
293 int bit_cntr;
294 u8 bit;
295
296 for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
297 bit = (data >> bit_cntr) & 1;
298 sda_out(dd, bit);
299 scl_out(dd, i2c_line_high);
300 scl_out(dd, i2c_line_low);
301 }
302 return (!i2c_ackrcv(dd)) ? 1 : 0;
303}
304
305static void send_ack(struct ipath_devdata *dd)
306{
307 sda_out(dd, i2c_line_low);
308 scl_out(dd, i2c_line_high);
309 scl_out(dd, i2c_line_low);
310 sda_out(dd, i2c_line_high);
311}
312
313/**
314 * i2c_startcmd - transmit the start condition, followed by address/cmd
315 * @dd: the infinipath device
316 * @offset_dir: direction byte
317 *
318 * (both clock/data high, clock high, data low while clock is high)
319 */
320static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir)
321{
322 int res;
323
324 /* issue start sequence */
325 sda_out(dd, i2c_line_high);
326 scl_out(dd, i2c_line_high);
327 sda_out(dd, i2c_line_low);
328 scl_out(dd, i2c_line_low);
329
330 /* issue length and direction byte */
331 res = wr_byte(dd, offset_dir);
332
333 if (res)
334 ipath_cdbg(VERBOSE, "No ack to complete start\n");
335
336 return res;
337}
338
339/**
340 * stop_cmd - transmit the stop condition
341 * @dd: the infinipath device
342 *
343 * (both clock/data low, clock high, data high while clock is high)
344 */
345static void stop_cmd(struct ipath_devdata *dd)
346{
347 scl_out(dd, i2c_line_low);
348 sda_out(dd, i2c_line_low);
349 scl_out(dd, i2c_line_high);
350 sda_out(dd, i2c_line_high);
351 udelay(2);
352}
353
354/**
355 * eeprom_reset - reset I2C communication
356 * @dd: the infinipath device
357 */
358
359static int eeprom_reset(struct ipath_devdata *dd)
360{
361 int clock_cycles_left = 9;
362 u64 *gpioval = &dd->ipath_gpio_out;
363 int ret;
364 unsigned long flags;
365
366 spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
367 /* Make sure shadows are consistent */
368 dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
369 *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
370 spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
371
372 ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
373 "is %llx\n", (unsigned long long) *gpioval);
374
375 /*
376 * This is to get the i2c into a known state, by first going low,
377 * then tristate sda (and then tristate scl as first thing
378 * in loop)
379 */
380 scl_out(dd, i2c_line_low);
381 sda_out(dd, i2c_line_high);
382
383 /* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */
384 while (clock_cycles_left--) {
385 scl_out(dd, i2c_line_high);
386
387 /* SDA seen high, issue START by dropping it while SCL high */
388 if (sda_in(dd, 0)) {
389 sda_out(dd, i2c_line_low);
390 scl_out(dd, i2c_line_low);
391 /* ATMEL spec says must be followed by STOP. */
392 scl_out(dd, i2c_line_high);
393 sda_out(dd, i2c_line_high);
394 ret = 0;
395 goto bail;
396 }
397
398 scl_out(dd, i2c_line_low);
399 }
400
401 ret = 1;
402
403bail:
404 return ret;
405}
406
407/*
408 * Probe for I2C device at specified address. Returns 0 for "success"
409 * to match rest of this file.
410 * Leave bus in "reasonable" state for further commands.
411 */
412static int i2c_probe(struct ipath_devdata *dd, int devaddr)
413{
414 int ret;
415
416 ret = eeprom_reset(dd);
417 if (ret) {
418 ipath_dev_err(dd, "Failed reset probing device 0x%02X\n",
419 devaddr);
420 return ret;
421 }
422 /*
423 * Reset no longer leaves bus in start condition, so normal
424 * i2c_startcmd() will do.
425 */
426 ret = i2c_startcmd(dd, devaddr | READ_CMD);
427 if (ret)
428 ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n",
429 devaddr);
430 else {
431 /*
432 * Device did respond. Complete a single-byte read, because some
433 * devices apparently cannot handle STOP immediately after they
434 * ACK the start-cmd.
435 */
436 int data;
437 data = rd_byte(dd);
438 stop_cmd(dd);
439 ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr);
440 }
441 return ret;
442}
443
444/*
445 * Returns the "i2c type". This is a pointer to a struct that describes
446 * the I2C chain on this board. To minimize impact on struct ipath_devdata,
447 * the (small integer) index into the table is actually memoized, rather
448 * then the pointer.
449 * Memoization is because the type is determined on the first call per chip.
450 * An alternative would be to move type determination to early
451 * init code.
452 */
453static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd)
454{
455 int idx;
456
457 /* Get memoized index, from previous successful probes */
458 idx = dd->ipath_i2c_chain_type - 1;
459 if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1))
460 goto done;
461
462 idx = 0;
463 while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) {
464 /* if probe succeeds, this is type */
465 if (!i2c_probe(dd, i2c_chains[idx].probe_dev))
466 break;
467 ++idx;
468 }
469
470 /*
471 * Old EEPROM (first entry) may require a reset after probe,
472 * rather than being able to "start" after "stop"
473 */
474 if (idx == 0)
475 eeprom_reset(dd);
476
477 if (i2c_chains[idx].probe_dev == IPATH_NO_DEV)
478 idx = -1;
479 else
480 dd->ipath_i2c_chain_type = idx + 1;
481done:
482 return (idx >= 0) ? i2c_chains + idx : NULL;
483}
484
485static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
486 u8 eeprom_offset, void *buffer, int len)
487{
488 int ret;
489 struct i2c_chain_desc *icd;
490 u8 *bp = buffer;
491
492 ret = 1;
493 icd = ipath_i2c_type(dd);
494 if (!icd)
495 goto bail;
496
497 if (icd->eeprom_dev == IPATH_NO_DEV) {
498 /* legacy not-really-I2C */
499 ipath_cdbg(VERBOSE, "Start command only address\n");
500 eeprom_offset = (eeprom_offset << 1) | READ_CMD;
501 ret = i2c_startcmd(dd, eeprom_offset);
502 } else {
503 /* Actual I2C */
504 ipath_cdbg(VERBOSE, "Start command uses devaddr\n");
505 if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
506 ipath_dbg("Failed EEPROM startcmd\n");
507 stop_cmd(dd);
508 ret = 1;
509 goto bail;
510 }
511 ret = wr_byte(dd, eeprom_offset);
512 stop_cmd(dd);
513 if (ret) {
514 ipath_dev_err(dd, "Failed to write EEPROM address\n");
515 ret = 1;
516 goto bail;
517 }
518 ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD);
519 }
520 if (ret) {
521 ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev);
522 stop_cmd(dd);
523 ret = 1;
524 goto bail;
525 }
526
527 /*
528 * eeprom keeps clocking data out as long as we ack, automatically
529 * incrementing the address.
530 */
531 while (len-- > 0) {
532 /* get and store data */
533 *bp++ = rd_byte(dd);
534 /* send ack if not the last byte */
535 if (len)
536 send_ack(dd);
537 }
538
539 stop_cmd(dd);
540
541 ret = 0;
542
543bail:
544 return ret;
545}
546
547static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
548 const void *buffer, int len)
549{
550 int sub_len;
551 const u8 *bp = buffer;
552 int max_wait_time, i;
553 int ret;
554 struct i2c_chain_desc *icd;
555
556 ret = 1;
557 icd = ipath_i2c_type(dd);
558 if (!icd)
559 goto bail;
560
561 while (len > 0) {
562 if (icd->eeprom_dev == IPATH_NO_DEV) {
563 if (i2c_startcmd(dd,
564 (eeprom_offset << 1) | WRITE_CMD)) {
565 ipath_dbg("Failed to start cmd offset %u\n",
566 eeprom_offset);
567 goto failed_write;
568 }
569 } else {
570 /* Real I2C */
571 if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
572 ipath_dbg("Failed EEPROM startcmd\n");
573 goto failed_write;
574 }
575 ret = wr_byte(dd, eeprom_offset);
576 if (ret) {
577 ipath_dev_err(dd, "Failed to write EEPROM "
578 "address\n");
579 goto failed_write;
580 }
581 }
582
583 sub_len = min(len, 4);
584 eeprom_offset += sub_len;
585 len -= sub_len;
586
587 for (i = 0; i < sub_len; i++) {
588 if (wr_byte(dd, *bp++)) {
589 ipath_dbg("no ack after byte %u/%u (%u "
590 "total remain)\n", i, sub_len,
591 len + sub_len - i);
592 goto failed_write;
593 }
594 }
595
596 stop_cmd(dd);
597
598 /*
599 * wait for write complete by waiting for a successful
600 * read (the chip replies with a zero after the write
601 * cmd completes, and before it writes to the eeprom.
602 * The startcmd for the read will fail the ack until
603 * the writes have completed. We do this inline to avoid
604 * the debug prints that are in the real read routine
605 * if the startcmd fails.
606 * We also use the proper device address, so it doesn't matter
607 * whether we have real eeprom_dev. legacy likes any address.
608 */
609 max_wait_time = 100;
610 while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) {
611 stop_cmd(dd);
612 if (!--max_wait_time) {
613 ipath_dbg("Did not get successful read to "
614 "complete write\n");
615 goto failed_write;
616 }
617 }
618 /* now read (and ignore) the resulting byte */
619 rd_byte(dd);
620 stop_cmd(dd);
621 }
622
623 ret = 0;
624 goto bail;
625
626failed_write:
627 stop_cmd(dd);
628 ret = 1;
629
630bail:
631 return ret;
632}
633
634/**
635 * ipath_eeprom_read - receives bytes from the eeprom via I2C
636 * @dd: the infinipath device
637 * @eeprom_offset: address to read from
638 * @buffer: where to store result
639 * @len: number of bytes to receive
640 */
641int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
642 void *buff, int len)
643{
644 int ret;
645
646 ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
647 if (!ret) {
648 ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
649 mutex_unlock(&dd->ipath_eep_lock);
650 }
651
652 return ret;
653}
654
655/**
656 * ipath_eeprom_write - writes data to the eeprom via I2C
657 * @dd: the infinipath device
658 * @eeprom_offset: where to place data
659 * @buffer: data to write
660 * @len: number of bytes to write
661 */
662int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
663 const void *buff, int len)
664{
665 int ret;
666
667 ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
668 if (!ret) {
669 ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
670 mutex_unlock(&dd->ipath_eep_lock);
671 }
672
673 return ret;
674}
675
676static u8 flash_csum(struct ipath_flash *ifp, int adjust)
677{
678 u8 *ip = (u8 *) ifp;
679 u8 csum = 0, len;
680
681 /*
682 * Limit length checksummed to max length of actual data.
683 * Checksum of erased eeprom will still be bad, but we avoid
684 * reading past the end of the buffer we were passed.
685 */
686 len = ifp->if_length;
687 if (len > sizeof(struct ipath_flash))
688 len = sizeof(struct ipath_flash);
689 while (len--)
690 csum += *ip++;
691 csum -= ifp->if_csum;
692 csum = ~csum;
693 if (adjust)
694 ifp->if_csum = csum;
695
696 return csum;
697}
698
699/**
700 * ipath_get_guid - get the GUID from the i2c device
701 * @dd: the infinipath device
702 *
703 * We have the capability to use the ipath_nguid field, and get
704 * the guid from the first chip's flash, to use for all of them.
705 */
706void ipath_get_eeprom_info(struct ipath_devdata *dd)
707{
708 void *buf;
709 struct ipath_flash *ifp;
710 __be64 guid;
711 int len, eep_stat;
712 u8 csum, *bguid;
713 int t = dd->ipath_unit;
714 struct ipath_devdata *dd0 = ipath_lookup(0);
715
716 if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
717 u8 oguid;
718 dd->ipath_guid = dd0->ipath_guid;
719 bguid = (u8 *) & dd->ipath_guid;
720
721 oguid = bguid[7];
722 bguid[7] += t;
723 if (oguid > bguid[7]) {
724 if (bguid[6] == 0xff) {
725 if (bguid[5] == 0xff) {
726 ipath_dev_err(
727 dd,
728 "Can't set %s GUID from "
729 "base, wraps to OUI!\n",
730 ipath_get_unit_name(t));
731 dd->ipath_guid = 0;
732 goto bail;
733 }
734 bguid[5]++;
735 }
736 bguid[6]++;
737 }
738 dd->ipath_nguid = 1;
739
740 ipath_dbg("nguid %u, so adding %u to device 0 guid, "
741 "for %llx\n",
742 dd0->ipath_nguid, t,
743 (unsigned long long) be64_to_cpu(dd->ipath_guid));
744 goto bail;
745 }
746
747 /*
748 * read full flash, not just currently used part, since it may have
749 * been written with a newer definition
750 * */
751 len = sizeof(struct ipath_flash);
752 buf = vmalloc(len);
753 if (!buf) {
754 ipath_dev_err(dd, "Couldn't allocate memory to read %u "
755 "bytes from eeprom for GUID\n", len);
756 goto bail;
757 }
758
759 mutex_lock(&dd->ipath_eep_lock);
760 eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
761 mutex_unlock(&dd->ipath_eep_lock);
762
763 if (eep_stat) {
764 ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
765 goto done;
766 }
767 ifp = (struct ipath_flash *)buf;
768
769 csum = flash_csum(ifp, 0);
770 if (csum != ifp->if_csum) {
771 dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: "
772 "0x%x, not 0x%x\n", csum, ifp->if_csum);
773 goto done;
774 }
775 if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
776 *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
777 ipath_dev_err(dd, "Invalid GUID %llx from flash; "
778 "ignoring\n",
779 *(unsigned long long *) ifp->if_guid);
780 /* don't allow GUID if all 0 or all 1's */
781 goto done;
782 }
783
784 /* complain, but allow it */
785 if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
786 dev_info(&dd->pcidev->dev, "Warning, GUID %llx is "
787 "default, probably not correct!\n",
788 *(unsigned long long *) ifp->if_guid);
789
790 bguid = ifp->if_guid;
791 if (!bguid[0] && !bguid[1] && !bguid[2]) {
792 /* original incorrect GUID format in flash; fix in
793 * core copy, by shifting up 2 octets; don't need to
794 * change top octet, since both it and shifted are
795 * 0.. */
796 bguid[1] = bguid[3];
797 bguid[2] = bguid[4];
798 bguid[3] = bguid[4] = 0;
799 guid = *(__be64 *) ifp->if_guid;
800 ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, "
801 "shifting 2 octets\n");
802 } else
803 guid = *(__be64 *) ifp->if_guid;
804 dd->ipath_guid = guid;
805 dd->ipath_nguid = ifp->if_numguid;
806 /*
807 * Things are slightly complicated by the desire to transparently
808 * support both the Pathscale 10-digit serial number and the QLogic
809 * 13-character version.
810 */
811 if ((ifp->if_fversion > 1) && ifp->if_sprefix[0]
812 && ((u8 *)ifp->if_sprefix)[0] != 0xFF) {
813 /* This board has a Serial-prefix, which is stored
814 * elsewhere for backward-compatibility.
815 */
816 char *snp = dd->ipath_serial;
817 memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
818 snp[sizeof ifp->if_sprefix] = '\0';
819 len = strlen(snp);
820 snp += len;
821 len = (sizeof dd->ipath_serial) - len;
822 if (len > sizeof ifp->if_serial) {
823 len = sizeof ifp->if_serial;
824 }
825 memcpy(snp, ifp->if_serial, len);
826 } else
827 memcpy(dd->ipath_serial, ifp->if_serial,
828 sizeof ifp->if_serial);
829 if (!strstr(ifp->if_comment, "Tested successfully"))
830 ipath_dev_err(dd, "Board SN %s did not pass functional "
831 "test: %s\n", dd->ipath_serial,
832 ifp->if_comment);
833
834 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
835 (unsigned long long) be64_to_cpu(dd->ipath_guid));
836
837 memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
838 /*
839 * Power-on (actually "active") hours are kept as little-endian value
840 * in EEPROM, but as seconds in a (possibly as small as 24-bit)
841 * atomic_t while running.
842 */
843 atomic_set(&dd->ipath_active_time, 0);
844 dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
845
846done:
847 vfree(buf);
848
849bail:;
850}
851
852/**
853 * ipath_update_eeprom_log - copy active-time and error counters to eeprom
854 * @dd: the infinipath device
855 *
856 * Although the time is kept as seconds in the ipath_devdata struct, it is
857 * rounded to hours for re-write, as we have only 16 bits in EEPROM.
858 * First-cut code reads whole (expected) struct ipath_flash, modifies,
859 * re-writes. Future direction: read/write only what we need, assuming
860 * that the EEPROM had to have been "good enough" for driver init, and
861 * if not, we aren't making it worse.
862 *
863 */
864
865int ipath_update_eeprom_log(struct ipath_devdata *dd)
866{
867 void *buf;
868 struct ipath_flash *ifp;
869 int len, hi_water;
870 uint32_t new_time, new_hrs;
871 u8 csum;
872 int ret, idx;
873 unsigned long flags;
874
875 /* first, check if we actually need to do anything. */
876 ret = 0;
877 for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
878 if (dd->ipath_eep_st_new_errs[idx]) {
879 ret = 1;
880 break;
881 }
882 }
883 new_time = atomic_read(&dd->ipath_active_time);
884
885 if (ret == 0 && new_time < 3600)
886 return 0;
887
888 /*
889 * The quick-check above determined that there is something worthy
890 * of logging, so get current contents and do a more detailed idea.
891 * read full flash, not just currently used part, since it may have
892 * been written with a newer definition
893 */
894 len = sizeof(struct ipath_flash);
895 buf = vmalloc(len);
896 ret = 1;
897 if (!buf) {
898 ipath_dev_err(dd, "Couldn't allocate memory to read %u "
899 "bytes from eeprom for logging\n", len);
900 goto bail;
901 }
902
903 /* Grab semaphore and read current EEPROM. If we get an
904 * error, let go, but if not, keep it until we finish write.
905 */
906 ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
907 if (ret) {
908 ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
909 goto free_bail;
910 }
911 ret = ipath_eeprom_internal_read(dd, 0, buf, len);
912 if (ret) {
913 mutex_unlock(&dd->ipath_eep_lock);
914 ipath_dev_err(dd, "Unable read EEPROM for logging\n");
915 goto free_bail;
916 }
917 ifp = (struct ipath_flash *)buf;
918
919 csum = flash_csum(ifp, 0);
920 if (csum != ifp->if_csum) {
921 mutex_unlock(&dd->ipath_eep_lock);
922 ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
923 csum, ifp->if_csum);
924 ret = 1;
925 goto free_bail;
926 }
927 hi_water = 0;
928 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
929 for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
930 int new_val = dd->ipath_eep_st_new_errs[idx];
931 if (new_val) {
932 /*
933 * If we have seen any errors, add to EEPROM values
934 * We need to saturate at 0xFF (255) and we also
935 * would need to adjust the checksum if we were
936 * trying to minimize EEPROM traffic
937 * Note that we add to actual current count in EEPROM,
938 * in case it was altered while we were running.
939 */
940 new_val += ifp->if_errcntp[idx];
941 if (new_val > 0xFF)
942 new_val = 0xFF;
943 if (ifp->if_errcntp[idx] != new_val) {
944 ifp->if_errcntp[idx] = new_val;
945 hi_water = offsetof(struct ipath_flash,
946 if_errcntp) + idx;
947 }
948 /*
949 * update our shadow (used to minimize EEPROM
950 * traffic), to match what we are about to write.
951 */
952 dd->ipath_eep_st_errs[idx] = new_val;
953 dd->ipath_eep_st_new_errs[idx] = 0;
954 }
955 }
956 /*
957 * now update active-time. We would like to round to the nearest hour
958 * but unless atomic_t are sure to be proper signed ints we cannot,
959 * because we need to account for what we "transfer" to EEPROM and
960 * if we log an hour at 31 minutes, then we would need to set
961 * active_time to -29 to accurately count the _next_ hour.
962 */
963 if (new_time >= 3600) {
964 new_hrs = new_time / 3600;
965 atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
966 new_hrs += dd->ipath_eep_hrs;
967 if (new_hrs > 0xFFFF)
968 new_hrs = 0xFFFF;
969 dd->ipath_eep_hrs = new_hrs;
970 if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
971 ifp->if_powerhour[0] = new_hrs & 0xFF;
972 hi_water = offsetof(struct ipath_flash, if_powerhour);
973 }
974 if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
975 ifp->if_powerhour[1] = new_hrs >> 8;
976 hi_water = offsetof(struct ipath_flash, if_powerhour)
977 + 1;
978 }
979 }
980 /*
981 * There is a tiny possibility that we could somehow fail to write
982 * the EEPROM after updating our shadows, but problems from holding
983 * the spinlock too long are a much bigger issue.
984 */
985 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
986 if (hi_water) {
987 /* we made some change to the data, uopdate cksum and write */
988 csum = flash_csum(ifp, 1);
989 ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
990 }
991 mutex_unlock(&dd->ipath_eep_lock);
992 if (ret)
993 ipath_dev_err(dd, "Failed updating EEPROM\n");
994
995free_bail:
996 vfree(buf);
997bail:
998 return ret;
999
1000}
1001
1002/**
1003 * ipath_inc_eeprom_err - increment one of the four error counters
1004 * that are logged to EEPROM.
1005 * @dd: the infinipath device
1006 * @eidx: 0..3, the counter to increment
1007 * @incr: how much to add
1008 *
1009 * Each counter is 8-bits, and saturates at 255 (0xFF). They
1010 * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
1011 * is called, but it can only be called in a context that allows sleep.
1012 * This function can be called even at interrupt level.
1013 */
1014
1015void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
1016{
1017 uint new_val;
1018 unsigned long flags;
1019
1020 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
1021 new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
1022 if (new_val > 255)
1023 new_val = 255;
1024 dd->ipath_eep_st_new_errs[eidx] = new_val;
1025 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
1026 return;
1027}
1028
1029static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum)
1030{
1031 int ret;
1032 struct i2c_chain_desc *icd;
1033
1034 ret = -ENOENT;
1035
1036 icd = ipath_i2c_type(dd);
1037 if (!icd)
1038 goto bail;
1039
1040 if (icd->temp_dev == IPATH_NO_DEV) {
1041 /* tempsense only exists on new, real-I2C boards */
1042 ret = -ENXIO;
1043 goto bail;
1044 }
1045
1046 if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
1047 ipath_dbg("Failed tempsense startcmd\n");
1048 stop_cmd(dd);
1049 ret = -ENXIO;
1050 goto bail;
1051 }
1052 ret = wr_byte(dd, regnum);
1053 stop_cmd(dd);
1054 if (ret) {
1055 ipath_dev_err(dd, "Failed tempsense WR command %02X\n",
1056 regnum);
1057 ret = -ENXIO;
1058 goto bail;
1059 }
1060 if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) {
1061 ipath_dbg("Failed tempsense RD startcmd\n");
1062 stop_cmd(dd);
1063 ret = -ENXIO;
1064 goto bail;
1065 }
1066 /*
1067 * We can only clock out one byte per command, sensibly
1068 */
1069 ret = rd_byte(dd);
1070 stop_cmd(dd);
1071
1072bail:
1073 return ret;
1074}
1075
1076#define VALID_TS_RD_REG_MASK 0xBF
1077
1078/**
1079 * ipath_tempsense_read - read register of temp sensor via I2C
1080 * @dd: the infinipath device
1081 * @regnum: register to read from
1082 *
1083 * returns reg contents (0..255) or < 0 for error
1084 */
1085int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum)
1086{
1087 int ret;
1088
1089 if (regnum > 7)
1090 return -EINVAL;
1091
1092 /* return a bogus value for (the one) register we do not have */
1093 if (!((1 << regnum) & VALID_TS_RD_REG_MASK))
1094 return 0;
1095
1096 ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
1097 if (!ret) {
1098 ret = ipath_tempsense_internal_read(dd, regnum);
1099 mutex_unlock(&dd->ipath_eep_lock);
1100 }
1101
1102 /*
1103 * There are three possibilities here:
1104 * ret is actual value (0..255)
1105 * ret is -ENXIO or -EINVAL from code in this file
1106 * ret is -EINTR from mutex_lock_interruptible.
1107 */
1108 return ret;
1109}
1110
1111static int ipath_tempsense_internal_write(struct ipath_devdata *dd,
1112 u8 regnum, u8 data)
1113{
1114 int ret = -ENOENT;
1115 struct i2c_chain_desc *icd;
1116
1117 icd = ipath_i2c_type(dd);
1118 if (!icd)
1119 goto bail;
1120
1121 if (icd->temp_dev == IPATH_NO_DEV) {
1122 /* tempsense only exists on new, real-I2C boards */
1123 ret = -ENXIO;
1124 goto bail;
1125 }
1126 if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
1127 ipath_dbg("Failed tempsense startcmd\n");
1128 stop_cmd(dd);
1129 ret = -ENXIO;
1130 goto bail;
1131 }
1132 ret = wr_byte(dd, regnum);
1133 if (ret) {
1134 stop_cmd(dd);
1135 ipath_dev_err(dd, "Failed to write tempsense command %02X\n",
1136 regnum);
1137 ret = -ENXIO;
1138 goto bail;
1139 }
1140 ret = wr_byte(dd, data);
1141 stop_cmd(dd);
1142 ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD);
1143 if (ret) {
1144 ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n",
1145 regnum);
1146 ret = -ENXIO;
1147 }
1148
1149bail:
1150 return ret;
1151}
1152
1153#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD))
1154
1155/**
1156 * ipath_tempsense_write - write register of temp sensor via I2C
1157 * @dd: the infinipath device
1158 * @regnum: register to write
1159 * @data: data to write
1160 *
1161 * returns 0 for success or < 0 for error
1162 */
1163int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data)
1164{
1165 int ret;
1166
1167 if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK))
1168 return -EINVAL;
1169
1170 ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
1171 if (!ret) {
1172 ret = ipath_tempsense_internal_write(dd, regnum, data);
1173 mutex_unlock(&dd->ipath_eep_lock);
1174 }
1175
1176 /*
1177 * There are three possibilities here:
1178 * ret is 0 for success
1179 * ret is -ENXIO or -EINVAL from code in this file
1180 * ret is -EINTR from mutex_lock_interruptible.
1181 */
1182 return ret;
1183}
diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
deleted file mode 100644
index 6187b848b3ca..000000000000
--- a/drivers/staging/rdma/ipath/ipath_file_ops.c
+++ /dev/null
@@ -1,2619 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/pci.h>
35#include <linux/poll.h>
36#include <linux/cdev.h>
37#include <linux/swap.h>
38#include <linux/export.h>
39#include <linux/vmalloc.h>
40#include <linux/slab.h>
41#include <linux/highmem.h>
42#include <linux/io.h>
43#include <linux/jiffies.h>
44#include <linux/cpu.h>
45#include <linux/uio.h>
46#include <asm/pgtable.h>
47
48#include "ipath_kernel.h"
49#include "ipath_common.h"
50#include "ipath_user_sdma.h"
51
52static int ipath_open(struct inode *, struct file *);
53static int ipath_close(struct inode *, struct file *);
54static ssize_t ipath_write(struct file *, const char __user *, size_t,
55 loff_t *);
56static ssize_t ipath_write_iter(struct kiocb *, struct iov_iter *from);
57static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
58static int ipath_mmap(struct file *, struct vm_area_struct *);
59
60/*
61 * This is really, really weird shit - write() and writev() here
62 * have completely unrelated semantics. Sucky userland ABI,
63 * film at 11.
64 */
65static const struct file_operations ipath_file_ops = {
66 .owner = THIS_MODULE,
67 .write = ipath_write,
68 .write_iter = ipath_write_iter,
69 .open = ipath_open,
70 .release = ipath_close,
71 .poll = ipath_poll,
72 .mmap = ipath_mmap,
73 .llseek = noop_llseek,
74};
75
76/*
77 * Convert kernel virtual addresses to physical addresses so they don't
78 * potentially conflict with the chip addresses used as mmap offsets.
79 * It doesn't really matter what mmap offset we use as long as we can
80 * interpret it correctly.
81 */
82static u64 cvt_kvaddr(void *p)
83{
84 struct page *page;
85 u64 paddr = 0;
86
87 page = vmalloc_to_page(p);
88 if (page)
89 paddr = page_to_pfn(page) << PAGE_SHIFT;
90
91 return paddr;
92}
93
94static int ipath_get_base_info(struct file *fp,
95 void __user *ubase, size_t ubase_size)
96{
97 struct ipath_portdata *pd = port_fp(fp);
98 int ret = 0;
99 struct ipath_base_info *kinfo = NULL;
100 struct ipath_devdata *dd = pd->port_dd;
101 unsigned subport_cnt;
102 int shared, master;
103 size_t sz;
104
105 subport_cnt = pd->port_subport_cnt;
106 if (!subport_cnt) {
107 shared = 0;
108 master = 0;
109 subport_cnt = 1;
110 } else {
111 shared = 1;
112 master = !subport_fp(fp);
113 }
114
115 sz = sizeof(*kinfo);
116 /* If port sharing is not requested, allow the old size structure */
117 if (!shared)
118 sz -= 7 * sizeof(u64);
119 if (ubase_size < sz) {
120 ipath_cdbg(PROC,
121 "Base size %zu, need %zu (version mismatch?)\n",
122 ubase_size, sz);
123 ret = -EINVAL;
124 goto bail;
125 }
126
127 kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
128 if (kinfo == NULL) {
129 ret = -ENOMEM;
130 goto bail;
131 }
132
133 ret = dd->ipath_f_get_base_info(pd, kinfo);
134 if (ret < 0)
135 goto bail;
136
137 kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
138 kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
139 kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
140 kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
141 /*
142 * have to mmap whole thing
143 */
144 kinfo->spi_rcv_egrbuftotlen =
145 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
146 kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
147 kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
148 pd->port_rcvegrbuf_chunks;
149 kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
150 if (master)
151 kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
152 /*
153 * for this use, may be ipath_cfgports summed over all chips that
154 * are are configured and present
155 */
156 kinfo->spi_nports = dd->ipath_cfgports;
157 /* unit (chip/board) our port is on */
158 kinfo->spi_unit = dd->ipath_unit;
159 /* for now, only a single page */
160 kinfo->spi_tid_maxsize = PAGE_SIZE;
161
162 /*
163 * Doing this per port, and based on the skip value, etc. This has
164 * to be the actual buffer size, since the protocol code treats it
165 * as an array.
166 *
167 * These have to be set to user addresses in the user code via mmap.
168 * These values are used on return to user code for the mmap target
169 * addresses only. For 32 bit, same 44 bit address problem, so use
170 * the physical address, not virtual. Before 2.6.11, using the
171 * page_address() macro worked, but in 2.6.11, even that returns the
172 * full 64 bit address (upper bits all 1's). So far, using the
173 * physical addresses (or chip offsets, for chip mapping) works, but
174 * no doubt some future kernel release will change that, and we'll be
175 * on to yet another method of dealing with this.
176 */
177 kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
178 kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
179 kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
180 kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
181 kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
182 (void *) dd->ipath_statusp -
183 (void *) dd->ipath_pioavailregs_dma;
184 if (!shared) {
185 kinfo->spi_piocnt = pd->port_piocnt;
186 kinfo->spi_piobufbase = (u64) pd->port_piobufs;
187 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
188 dd->ipath_ureg_align * pd->port_port;
189 } else if (master) {
190 kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
191 (pd->port_piocnt % subport_cnt);
192 /* Master's PIO buffers are after all the slave's */
193 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
194 dd->ipath_palign *
195 (pd->port_piocnt - kinfo->spi_piocnt);
196 } else {
197 unsigned slave = subport_fp(fp) - 1;
198
199 kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
200 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
201 dd->ipath_palign * kinfo->spi_piocnt * slave;
202 }
203
204 if (shared) {
205 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
206 dd->ipath_ureg_align * pd->port_port;
207 kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
208 kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
209 kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
210
211 kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
212 PAGE_SIZE * subport_fp(fp));
213
214 kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
215 pd->port_rcvhdrq_size * subport_fp(fp));
216 kinfo->spi_rcvhdr_tailaddr = 0;
217 kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
218 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
219 subport_fp(fp));
220
221 kinfo->spi_subport_uregbase =
222 cvt_kvaddr(pd->subport_uregbase);
223 kinfo->spi_subport_rcvegrbuf =
224 cvt_kvaddr(pd->subport_rcvegrbuf);
225 kinfo->spi_subport_rcvhdr_base =
226 cvt_kvaddr(pd->subport_rcvhdr_base);
227 ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
228 kinfo->spi_port, kinfo->spi_runtime_flags,
229 (unsigned long long) kinfo->spi_subport_uregbase,
230 (unsigned long long) kinfo->spi_subport_rcvegrbuf,
231 (unsigned long long) kinfo->spi_subport_rcvhdr_base);
232 }
233
234 /*
235 * All user buffers are 2KB buffers. If we ever support
236 * giving 4KB buffers to user processes, this will need some
237 * work.
238 */
239 kinfo->spi_pioindex = (kinfo->spi_piobufbase -
240 (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
241 kinfo->spi_pioalign = dd->ipath_palign;
242
243 kinfo->spi_qpair = IPATH_KD_QP;
244 /*
245 * user mode PIO buffers are always 2KB, even when 4KB can
246 * be received, and sent via the kernel; this is ibmaxlen
247 * for 2K MTU.
248 */
249 kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
250 kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */
251 kinfo->spi_port = pd->port_port;
252 kinfo->spi_subport = subport_fp(fp);
253 kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
254 kinfo->spi_hw_version = dd->ipath_revision;
255
256 if (master) {
257 kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
258 }
259
260 sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
261 if (copy_to_user(ubase, kinfo, sz))
262 ret = -EFAULT;
263
264bail:
265 kfree(kinfo);
266 return ret;
267}
268
269/**
270 * ipath_tid_update - update a port TID
271 * @pd: the port
272 * @fp: the ipath device file
273 * @ti: the TID information
274 *
275 * The new implementation as of Oct 2004 is that the driver assigns
276 * the tid and returns it to the caller. To make it easier to
277 * catch bugs, and to reduce search time, we keep a cursor for
278 * each port, walking the shadow tid array to find one that's not
279 * in use.
280 *
281 * For now, if we can't allocate the full list, we fail, although
282 * in the long run, we'll allocate as many as we can, and the
283 * caller will deal with that by trying the remaining pages later.
284 * That means that when we fail, we have to mark the tids as not in
285 * use again, in our shadow copy.
286 *
287 * It's up to the caller to free the tids when they are done.
288 * We'll unlock the pages as they free them.
289 *
290 * Also, right now we are locking one page at a time, but since
291 * the intended use of this routine is for a single group of
292 * virtually contiguous pages, that should change to improve
293 * performance.
294 */
295static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
296 const struct ipath_tid_info *ti)
297{
298 int ret = 0, ntids;
299 u32 tid, porttid, cnt, i, tidcnt, tidoff;
300 u16 *tidlist;
301 struct ipath_devdata *dd = pd->port_dd;
302 u64 physaddr;
303 unsigned long vaddr;
304 u64 __iomem *tidbase;
305 unsigned long tidmap[8];
306 struct page **pagep = NULL;
307 unsigned subport = subport_fp(fp);
308
309 if (!dd->ipath_pageshadow) {
310 ret = -ENOMEM;
311 goto done;
312 }
313
314 cnt = ti->tidcnt;
315 if (!cnt) {
316 ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
317 (unsigned long long) ti->tidlist);
318 /*
319 * Should we treat as success? likely a bug
320 */
321 ret = -EFAULT;
322 goto done;
323 }
324 porttid = pd->port_port * dd->ipath_rcvtidcnt;
325 if (!pd->port_subport_cnt) {
326 tidcnt = dd->ipath_rcvtidcnt;
327 tid = pd->port_tidcursor;
328 tidoff = 0;
329 } else if (!subport) {
330 tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
331 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
332 tidoff = dd->ipath_rcvtidcnt - tidcnt;
333 porttid += tidoff;
334 tid = tidcursor_fp(fp);
335 } else {
336 tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
337 tidoff = tidcnt * (subport - 1);
338 porttid += tidoff;
339 tid = tidcursor_fp(fp);
340 }
341 if (cnt > tidcnt) {
342 /* make sure it all fits in port_tid_pg_list */
343 dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
344 "TIDs, only trying max (%u)\n", cnt, tidcnt);
345 cnt = tidcnt;
346 }
347 pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
348 tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
349
350 memset(tidmap, 0, sizeof(tidmap));
351 /* before decrement; chip actual # */
352 ntids = tidcnt;
353 tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
354 dd->ipath_rcvtidbase +
355 porttid * sizeof(*tidbase));
356
357 ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
358 pd->port_port, cnt, tid, tidbase);
359
360 /* virtual address of first page in transfer */
361 vaddr = ti->tidvaddr;
362 if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
363 cnt * PAGE_SIZE)) {
364 ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
365 (void *)vaddr, cnt);
366 ret = -EFAULT;
367 goto done;
368 }
369 ret = ipath_get_user_pages(vaddr, cnt, pagep);
370 if (ret) {
371 if (ret == -EBUSY) {
372 ipath_dbg("Failed to lock addr %p, %u pages "
373 "(already locked)\n",
374 (void *) vaddr, cnt);
375 /*
376 * for now, continue, and see what happens but with
377 * the new implementation, this should never happen,
378 * unless perhaps the user has mpin'ed the pages
379 * themselves (something we need to test)
380 */
381 ret = 0;
382 } else {
383 dev_info(&dd->pcidev->dev,
384 "Failed to lock addr %p, %u pages: "
385 "errno %d\n", (void *) vaddr, cnt, -ret);
386 goto done;
387 }
388 }
389 for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
390 for (; ntids--; tid++) {
391 if (tid == tidcnt)
392 tid = 0;
393 if (!dd->ipath_pageshadow[porttid + tid])
394 break;
395 }
396 if (ntids < 0) {
397 /*
398 * oops, wrapped all the way through their TIDs,
399 * and didn't have enough free; see comments at
400 * start of routine
401 */
402 ipath_dbg("Not enough free TIDs for %u pages "
403 "(index %d), failing\n", cnt, i);
404 i--; /* last tidlist[i] not filled in */
405 ret = -ENOMEM;
406 break;
407 }
408 tidlist[i] = tid + tidoff;
409 ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
410 "vaddr %lx\n", i, tid + tidoff, vaddr);
411 /* we "know" system pages and TID pages are same size */
412 dd->ipath_pageshadow[porttid + tid] = pagep[i];
413 dd->ipath_physshadow[porttid + tid] = ipath_map_page(
414 dd->pcidev, pagep[i], 0, PAGE_SIZE,
415 PCI_DMA_FROMDEVICE);
416 /*
417 * don't need atomic or it's overhead
418 */
419 __set_bit(tid, tidmap);
420 physaddr = dd->ipath_physshadow[porttid + tid];
421 ipath_stats.sps_pagelocks++;
422 ipath_cdbg(VERBOSE,
423 "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
424 tid, vaddr, (unsigned long long) physaddr,
425 pagep[i]);
426 dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
427 physaddr);
428 /*
429 * don't check this tid in ipath_portshadow, since we
430 * just filled it in; start with the next one.
431 */
432 tid++;
433 }
434
435 if (ret) {
436 u32 limit;
437 cleanup:
438 /* jump here if copy out of updated info failed... */
439 ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
440 -ret, i, cnt);
441 /* same code that's in ipath_free_tid() */
442 limit = sizeof(tidmap) * BITS_PER_BYTE;
443 if (limit > tidcnt)
444 /* just in case size changes in future */
445 limit = tidcnt;
446 tid = find_first_bit((const unsigned long *)tidmap, limit);
447 for (; tid < limit; tid++) {
448 if (!test_bit(tid, tidmap))
449 continue;
450 if (dd->ipath_pageshadow[porttid + tid]) {
451 ipath_cdbg(VERBOSE, "Freeing TID %u\n",
452 tid);
453 dd->ipath_f_put_tid(dd, &tidbase[tid],
454 RCVHQ_RCV_TYPE_EXPECTED,
455 dd->ipath_tidinvalid);
456 pci_unmap_page(dd->pcidev,
457 dd->ipath_physshadow[porttid + tid],
458 PAGE_SIZE, PCI_DMA_FROMDEVICE);
459 dd->ipath_pageshadow[porttid + tid] = NULL;
460 ipath_stats.sps_pageunlocks++;
461 }
462 }
463 ipath_release_user_pages(pagep, cnt);
464 } else {
465 /*
466 * Copy the updated array, with ipath_tid's filled in, back
467 * to user. Since we did the copy in already, this "should
468 * never fail" If it does, we have to clean up...
469 */
470 if (copy_to_user((void __user *)
471 (unsigned long) ti->tidlist,
472 tidlist, cnt * sizeof(*tidlist))) {
473 ret = -EFAULT;
474 goto cleanup;
475 }
476 if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
477 tidmap, sizeof tidmap)) {
478 ret = -EFAULT;
479 goto cleanup;
480 }
481 if (tid == tidcnt)
482 tid = 0;
483 if (!pd->port_subport_cnt)
484 pd->port_tidcursor = tid;
485 else
486 tidcursor_fp(fp) = tid;
487 }
488
489done:
490 if (ret)
491 ipath_dbg("Failed to map %u TID pages, failing with %d\n",
492 ti->tidcnt, -ret);
493 return ret;
494}
495
496/**
497 * ipath_tid_free - free a port TID
498 * @pd: the port
499 * @subport: the subport
500 * @ti: the TID info
501 *
502 * right now we are unlocking one page at a time, but since
503 * the intended use of this routine is for a single group of
504 * virtually contiguous pages, that should change to improve
505 * performance. We check that the TID is in range for this port
506 * but otherwise don't check validity; if user has an error and
507 * frees the wrong tid, it's only their own data that can thereby
508 * be corrupted. We do check that the TID was in use, for sanity
509 * We always use our idea of the saved address, not the address that
510 * they pass in to us.
511 */
512
513static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
514 const struct ipath_tid_info *ti)
515{
516 int ret = 0;
517 u32 tid, porttid, cnt, limit, tidcnt;
518 struct ipath_devdata *dd = pd->port_dd;
519 u64 __iomem *tidbase;
520 unsigned long tidmap[8];
521
522 if (!dd->ipath_pageshadow) {
523 ret = -ENOMEM;
524 goto done;
525 }
526
527 if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
528 sizeof tidmap)) {
529 ret = -EFAULT;
530 goto done;
531 }
532
533 porttid = pd->port_port * dd->ipath_rcvtidcnt;
534 if (!pd->port_subport_cnt)
535 tidcnt = dd->ipath_rcvtidcnt;
536 else if (!subport) {
537 tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
538 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
539 porttid += dd->ipath_rcvtidcnt - tidcnt;
540 } else {
541 tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
542 porttid += tidcnt * (subport - 1);
543 }
544 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
545 dd->ipath_rcvtidbase +
546 porttid * sizeof(*tidbase));
547
548 limit = sizeof(tidmap) * BITS_PER_BYTE;
549 if (limit > tidcnt)
550 /* just in case size changes in future */
551 limit = tidcnt;
552 tid = find_first_bit(tidmap, limit);
553 ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
554 "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
555 limit, tid, porttid);
556 for (cnt = 0; tid < limit; tid++) {
557 /*
558 * small optimization; if we detect a run of 3 or so without
559 * any set, use find_first_bit again. That's mainly to
560 * accelerate the case where we wrapped, so we have some at
561 * the beginning, and some at the end, and a big gap
562 * in the middle.
563 */
564 if (!test_bit(tid, tidmap))
565 continue;
566 cnt++;
567 if (dd->ipath_pageshadow[porttid + tid]) {
568 struct page *p;
569 p = dd->ipath_pageshadow[porttid + tid];
570 dd->ipath_pageshadow[porttid + tid] = NULL;
571 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
572 pid_nr(pd->port_pid), tid);
573 dd->ipath_f_put_tid(dd, &tidbase[tid],
574 RCVHQ_RCV_TYPE_EXPECTED,
575 dd->ipath_tidinvalid);
576 pci_unmap_page(dd->pcidev,
577 dd->ipath_physshadow[porttid + tid],
578 PAGE_SIZE, PCI_DMA_FROMDEVICE);
579 ipath_release_user_pages(&p, 1);
580 ipath_stats.sps_pageunlocks++;
581 } else
582 ipath_dbg("Unused tid %u, ignoring\n", tid);
583 }
584 if (cnt != ti->tidcnt)
585 ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
586 ti->tidcnt, cnt);
587done:
588 if (ret)
589 ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
590 ti->tidcnt, -ret);
591 return ret;
592}
593
594/**
595 * ipath_set_part_key - set a partition key
596 * @pd: the port
597 * @key: the key
598 *
599 * We can have up to 4 active at a time (other than the default, which is
600 * always allowed). This is somewhat tricky, since multiple ports may set
601 * the same key, so we reference count them, and clean up at exit. All 4
602 * partition keys are packed into a single infinipath register. It's an
603 * error for a process to set the same pkey multiple times. We provide no
604 * mechanism to de-allocate a pkey at this time, we may eventually need to
605 * do that. I've used the atomic operations, and no locking, and only make
606 * a single pass through what's available. This should be more than
607 * adequate for some time. I'll think about spinlocks or the like if and as
608 * it's necessary.
609 */
610static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
611{
612 struct ipath_devdata *dd = pd->port_dd;
613 int i, any = 0, pidx = -1;
614 u16 lkey = key & 0x7FFF;
615 int ret;
616
617 if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
618 /* nothing to do; this key always valid */
619 ret = 0;
620 goto bail;
621 }
622
623 ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
624 "%hx:%x %hx:%x %hx:%x %hx:%x\n",
625 pd->port_port, key, dd->ipath_pkeys[0],
626 atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
627 atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
628 atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
629 atomic_read(&dd->ipath_pkeyrefs[3]));
630
631 if (!lkey) {
632 ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
633 pd->port_port);
634 ret = -EINVAL;
635 goto bail;
636 }
637
638 /*
639 * Set the full membership bit, because it has to be
640 * set in the register or the packet, and it seems
641 * cleaner to set in the register than to force all
642 * callers to set it. (see bug 4331)
643 */
644 key |= 0x8000;
645
646 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
647 if (!pd->port_pkeys[i] && pidx == -1)
648 pidx = i;
649 if (pd->port_pkeys[i] == key) {
650 ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
651 "(%x) more than once\n",
652 pd->port_port, key);
653 ret = -EEXIST;
654 goto bail;
655 }
656 }
657 if (pidx == -1) {
658 ipath_dbg("All pkeys for port %u already in use, "
659 "can't set %x\n", pd->port_port, key);
660 ret = -EBUSY;
661 goto bail;
662 }
663 for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
664 if (!dd->ipath_pkeys[i]) {
665 any++;
666 continue;
667 }
668 if (dd->ipath_pkeys[i] == key) {
669 atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
670
671 if (atomic_inc_return(pkrefs) > 1) {
672 pd->port_pkeys[pidx] = key;
673 ipath_cdbg(VERBOSE, "p%u set key %x "
674 "matches #%d, count now %d\n",
675 pd->port_port, key, i,
676 atomic_read(pkrefs));
677 ret = 0;
678 goto bail;
679 } else {
680 /*
681 * lost race, decrement count, catch below
682 */
683 atomic_dec(pkrefs);
684 ipath_cdbg(VERBOSE, "Lost race, count was "
685 "0, after dec, it's %d\n",
686 atomic_read(pkrefs));
687 any++;
688 }
689 }
690 if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
691 /*
692 * It makes no sense to have both the limited and
693 * full membership PKEY set at the same time since
694 * the unlimited one will disable the limited one.
695 */
696 ret = -EEXIST;
697 goto bail;
698 }
699 }
700 if (!any) {
701 ipath_dbg("port %u, all pkeys already in use, "
702 "can't set %x\n", pd->port_port, key);
703 ret = -EBUSY;
704 goto bail;
705 }
706 for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
707 if (!dd->ipath_pkeys[i] &&
708 atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
709 u64 pkey;
710
711 /* for ipathstats, etc. */
712 ipath_stats.sps_pkeys[i] = lkey;
713 pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
714 pkey =
715 (u64) dd->ipath_pkeys[0] |
716 ((u64) dd->ipath_pkeys[1] << 16) |
717 ((u64) dd->ipath_pkeys[2] << 32) |
718 ((u64) dd->ipath_pkeys[3] << 48);
719 ipath_cdbg(PROC, "p%u set key %x in #%d, "
720 "portidx %d, new pkey reg %llx\n",
721 pd->port_port, key, i, pidx,
722 (unsigned long long) pkey);
723 ipath_write_kreg(
724 dd, dd->ipath_kregs->kr_partitionkey, pkey);
725
726 ret = 0;
727 goto bail;
728 }
729 }
730 ipath_dbg("port %u, all pkeys already in use 2nd pass, "
731 "can't set %x\n", pd->port_port, key);
732 ret = -EBUSY;
733
734bail:
735 return ret;
736}
737
738/**
739 * ipath_manage_rcvq - manage a port's receive queue
740 * @pd: the port
741 * @subport: the subport
742 * @start_stop: action to carry out
743 *
744 * start_stop == 0 disables receive on the port, for use in queue
745 * overflow conditions. start_stop==1 re-enables, to be used to
746 * re-init the software copy of the head register
747 */
748static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
749 int start_stop)
750{
751 struct ipath_devdata *dd = pd->port_dd;
752
753 ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
754 start_stop ? "en" : "dis", dd->ipath_unit,
755 pd->port_port, subport);
756 if (subport)
757 goto bail;
758 /* atomically clear receive enable port. */
759 if (start_stop) {
760 /*
761 * On enable, force in-memory copy of the tail register to
762 * 0, so that protocol code doesn't have to worry about
763 * whether or not the chip has yet updated the in-memory
764 * copy or not on return from the system call. The chip
765 * always resets it's tail register back to 0 on a
766 * transition from disabled to enabled. This could cause a
767 * problem if software was broken, and did the enable w/o
768 * the disable, but eventually the in-memory copy will be
769 * updated and correct itself, even in the face of software
770 * bugs.
771 */
772 if (pd->port_rcvhdrtail_kvaddr)
773 ipath_clear_rcvhdrtail(pd);
774 set_bit(dd->ipath_r_portenable_shift + pd->port_port,
775 &dd->ipath_rcvctrl);
776 } else
777 clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
778 &dd->ipath_rcvctrl);
779 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
780 dd->ipath_rcvctrl);
781 /* now be sure chip saw it before we return */
782 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
783 if (start_stop) {
784 /*
785 * And try to be sure that tail reg update has happened too.
786 * This should in theory interlock with the RXE changes to
787 * the tail register. Don't assign it to the tail register
788 * in memory copy, since we could overwrite an update by the
789 * chip if we did.
790 */
791 ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
792 }
793 /* always; new head should be equal to new tail; see above */
794bail:
795 return 0;
796}
797
798static void ipath_clean_part_key(struct ipath_portdata *pd,
799 struct ipath_devdata *dd)
800{
801 int i, j, pchanged = 0;
802 u64 oldpkey;
803
804 /* for debugging only */
805 oldpkey = (u64) dd->ipath_pkeys[0] |
806 ((u64) dd->ipath_pkeys[1] << 16) |
807 ((u64) dd->ipath_pkeys[2] << 32) |
808 ((u64) dd->ipath_pkeys[3] << 48);
809
810 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
811 if (!pd->port_pkeys[i])
812 continue;
813 ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
814 pd->port_pkeys[i]);
815 for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
816 /* check for match independent of the global bit */
817 if ((dd->ipath_pkeys[j] & 0x7fff) !=
818 (pd->port_pkeys[i] & 0x7fff))
819 continue;
820 if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
821 ipath_cdbg(VERBOSE, "p%u clear key "
822 "%x matches #%d\n",
823 pd->port_port,
824 pd->port_pkeys[i], j);
825 ipath_stats.sps_pkeys[j] =
826 dd->ipath_pkeys[j] = 0;
827 pchanged++;
828 } else {
829 ipath_cdbg(VERBOSE, "p%u key %x matches #%d, "
830 "but ref still %d\n", pd->port_port,
831 pd->port_pkeys[i], j,
832 atomic_read(&dd->ipath_pkeyrefs[j]));
833 break;
834 }
835 }
836 pd->port_pkeys[i] = 0;
837 }
838 if (pchanged) {
839 u64 pkey = (u64) dd->ipath_pkeys[0] |
840 ((u64) dd->ipath_pkeys[1] << 16) |
841 ((u64) dd->ipath_pkeys[2] << 32) |
842 ((u64) dd->ipath_pkeys[3] << 48);
843 ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
844 "new pkey reg %llx\n", pd->port_port,
845 (unsigned long long) oldpkey,
846 (unsigned long long) pkey);
847 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
848 pkey);
849 }
850}
851
852/*
853 * Initialize the port data with the receive buffer sizes
854 * so this can be done while the master port is locked.
855 * Otherwise, there is a race with a slave opening the port
856 * and seeing these fields uninitialized.
857 */
858static void init_user_egr_sizes(struct ipath_portdata *pd)
859{
860 struct ipath_devdata *dd = pd->port_dd;
861 unsigned egrperchunk, egrcnt, size;
862
863 /*
864 * to avoid wasting a lot of memory, we allocate 32KB chunks of
865 * physically contiguous memory, advance through it until used up
866 * and then allocate more. Of course, we need memory to store those
867 * extra pointers, now. Started out with 256KB, but under heavy
868 * memory pressure (creating large files and then copying them over
869 * NFS while doing lots of MPI jobs), we hit some allocation
870 * failures, even though we can sleep... (2.6.10) Still get
871 * failures at 64K. 32K is the lowest we can go without wasting
872 * additional memory.
873 */
874 size = 0x8000;
875 egrperchunk = size / dd->ipath_rcvegrbufsize;
876 egrcnt = dd->ipath_rcvegrcnt;
877 pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
878 pd->port_rcvegrbufs_perchunk = egrperchunk;
879 pd->port_rcvegrbuf_size = size;
880}
881
882/**
883 * ipath_create_user_egr - allocate eager TID buffers
884 * @pd: the port to allocate TID buffers for
885 *
886 * This routine is now quite different for user and kernel, because
887 * the kernel uses skb's, for the accelerated network performance
888 * This is the user port version
889 *
890 * Allocate the eager TID buffers and program them into infinipath
891 * They are no longer completely contiguous, we do multiple allocation
892 * calls.
893 */
894static int ipath_create_user_egr(struct ipath_portdata *pd)
895{
896 struct ipath_devdata *dd = pd->port_dd;
897 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
898 size_t size;
899 int ret;
900 gfp_t gfp_flags;
901
902 /*
903 * GFP_USER, but without GFP_FS, so buffer cache can be
904 * coalesced (we hope); otherwise, even at order 4,
905 * heavy filesystem activity makes these fail, and we can
906 * use compound pages.
907 */
908 gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
909
910 egrcnt = dd->ipath_rcvegrcnt;
911 /* TID number offset for this port */
912 egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
913 egrsize = dd->ipath_rcvegrbufsize;
914 ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
915 "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
916
917 chunk = pd->port_rcvegrbuf_chunks;
918 egrperchunk = pd->port_rcvegrbufs_perchunk;
919 size = pd->port_rcvegrbuf_size;
920 pd->port_rcvegrbuf = kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf[0]),
921 GFP_KERNEL);
922 if (!pd->port_rcvegrbuf) {
923 ret = -ENOMEM;
924 goto bail;
925 }
926 pd->port_rcvegrbuf_phys =
927 kmalloc_array(chunk, sizeof(pd->port_rcvegrbuf_phys[0]),
928 GFP_KERNEL);
929 if (!pd->port_rcvegrbuf_phys) {
930 ret = -ENOMEM;
931 goto bail_rcvegrbuf;
932 }
933 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
934
935 pd->port_rcvegrbuf[e] = dma_alloc_coherent(
936 &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
937 gfp_flags);
938
939 if (!pd->port_rcvegrbuf[e]) {
940 ret = -ENOMEM;
941 goto bail_rcvegrbuf_phys;
942 }
943 }
944
945 pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
946
947 for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
948 dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
949 unsigned i;
950
951 for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
952 dd->ipath_f_put_tid(dd, e + egroff +
953 (u64 __iomem *)
954 ((char __iomem *)
955 dd->ipath_kregbase +
956 dd->ipath_rcvegrbase),
957 RCVHQ_RCV_TYPE_EAGER, pa);
958 pa += egrsize;
959 }
960 cond_resched(); /* don't hog the cpu */
961 }
962
963 ret = 0;
964 goto bail;
965
966bail_rcvegrbuf_phys:
967 for (e = 0; e < pd->port_rcvegrbuf_chunks &&
968 pd->port_rcvegrbuf[e]; e++) {
969 dma_free_coherent(&dd->pcidev->dev, size,
970 pd->port_rcvegrbuf[e],
971 pd->port_rcvegrbuf_phys[e]);
972
973 }
974 kfree(pd->port_rcvegrbuf_phys);
975 pd->port_rcvegrbuf_phys = NULL;
976bail_rcvegrbuf:
977 kfree(pd->port_rcvegrbuf);
978 pd->port_rcvegrbuf = NULL;
979bail:
980 return ret;
981}
982
983
984/* common code for the mappings on dma_alloc_coherent mem */
985static int ipath_mmap_mem(struct vm_area_struct *vma,
986 struct ipath_portdata *pd, unsigned len, int write_ok,
987 void *kvaddr, char *what)
988{
989 struct ipath_devdata *dd = pd->port_dd;
990 unsigned long pfn;
991 int ret;
992
993 if ((vma->vm_end - vma->vm_start) > len) {
994 dev_info(&dd->pcidev->dev,
995 "FAIL on %s: len %lx > %x\n", what,
996 vma->vm_end - vma->vm_start, len);
997 ret = -EFAULT;
998 goto bail;
999 }
1000
1001 if (!write_ok) {
1002 if (vma->vm_flags & VM_WRITE) {
1003 dev_info(&dd->pcidev->dev,
1004 "%s must be mapped readonly\n", what);
1005 ret = -EPERM;
1006 goto bail;
1007 }
1008
1009 /* don't allow them to later change with mprotect */
1010 vma->vm_flags &= ~VM_MAYWRITE;
1011 }
1012
1013 pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
1014 ret = remap_pfn_range(vma, vma->vm_start, pfn,
1015 len, vma->vm_page_prot);
1016 if (ret)
1017 dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
1018 "bytes r%c failed: %d\n", what, pd->port_port,
1019 pfn, len, write_ok?'w':'o', ret);
1020 else
1021 ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
1022 "r%c\n", what, pd->port_port, pfn, len,
1023 write_ok?'w':'o');
1024bail:
1025 return ret;
1026}
1027
1028static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
1029 u64 ureg)
1030{
1031 unsigned long phys;
1032 int ret;
1033
1034 /*
1035 * This is real hardware, so use io_remap. This is the mechanism
1036 * for the user process to update the head registers for their port
1037 * in the chip.
1038 */
1039 if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
1040 dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
1041 "%lx > PAGE\n", vma->vm_end - vma->vm_start);
1042 ret = -EFAULT;
1043 } else {
1044 phys = dd->ipath_physaddr + ureg;
1045 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1046
1047 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
1048 ret = io_remap_pfn_range(vma, vma->vm_start,
1049 phys >> PAGE_SHIFT,
1050 vma->vm_end - vma->vm_start,
1051 vma->vm_page_prot);
1052 }
1053 return ret;
1054}
1055
1056static int mmap_piobufs(struct vm_area_struct *vma,
1057 struct ipath_devdata *dd,
1058 struct ipath_portdata *pd,
1059 unsigned piobufs, unsigned piocnt)
1060{
1061 unsigned long phys;
1062 int ret;
1063
1064 /*
1065 * When we map the PIO buffers in the chip, we want to map them as
1066 * writeonly, no read possible. This prevents access to previous
1067 * process data, and catches users who might try to read the i/o
1068 * space due to a bug.
1069 */
1070 if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
1071 dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
1072 "reqlen %lx > PAGE\n",
1073 vma->vm_end - vma->vm_start);
1074 ret = -EINVAL;
1075 goto bail;
1076 }
1077
1078 phys = dd->ipath_physaddr + piobufs;
1079
1080#if defined(__powerpc__)
1081 /* There isn't a generic way to specify writethrough mappings */
1082 pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
1083 pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
1084 pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
1085#endif
1086
1087 /*
1088 * don't allow them to later change to readable with mprotect (for when
1089 * not initially mapped readable, as is normally the case)
1090 */
1091 vma->vm_flags &= ~VM_MAYREAD;
1092 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
1093
1094 ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
1095 vma->vm_end - vma->vm_start,
1096 vma->vm_page_prot);
1097bail:
1098 return ret;
1099}
1100
1101static int mmap_rcvegrbufs(struct vm_area_struct *vma,
1102 struct ipath_portdata *pd)
1103{
1104 struct ipath_devdata *dd = pd->port_dd;
1105 unsigned long start, size;
1106 size_t total_size, i;
1107 unsigned long pfn;
1108 int ret;
1109
1110 size = pd->port_rcvegrbuf_size;
1111 total_size = pd->port_rcvegrbuf_chunks * size;
1112 if ((vma->vm_end - vma->vm_start) > total_size) {
1113 dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
1114 "reqlen %lx > actual %lx\n",
1115 vma->vm_end - vma->vm_start,
1116 (unsigned long) total_size);
1117 ret = -EINVAL;
1118 goto bail;
1119 }
1120
1121 if (vma->vm_flags & VM_WRITE) {
1122 dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
1123 "writable (flags=%lx)\n", vma->vm_flags);
1124 ret = -EPERM;
1125 goto bail;
1126 }
1127 /* don't allow them to later change to writeable with mprotect */
1128 vma->vm_flags &= ~VM_MAYWRITE;
1129
1130 start = vma->vm_start;
1131
1132 for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
1133 pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
1134 ret = remap_pfn_range(vma, start, pfn, size,
1135 vma->vm_page_prot);
1136 if (ret < 0)
1137 goto bail;
1138 }
1139 ret = 0;
1140
1141bail:
1142 return ret;
1143}
1144
1145/*
1146 * ipath_file_vma_fault - handle a VMA page fault.
1147 */
1148static int ipath_file_vma_fault(struct vm_area_struct *vma,
1149 struct vm_fault *vmf)
1150{
1151 struct page *page;
1152
1153 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
1154 if (!page)
1155 return VM_FAULT_SIGBUS;
1156 get_page(page);
1157 vmf->page = page;
1158
1159 return 0;
1160}
1161
1162static const struct vm_operations_struct ipath_file_vm_ops = {
1163 .fault = ipath_file_vma_fault,
1164};
1165
1166static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
1167 struct ipath_portdata *pd, unsigned subport)
1168{
1169 unsigned long len;
1170 struct ipath_devdata *dd;
1171 void *addr;
1172 size_t size;
1173 int ret = 0;
1174
1175 /* If the port is not shared, all addresses should be physical */
1176 if (!pd->port_subport_cnt)
1177 goto bail;
1178
1179 dd = pd->port_dd;
1180 size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
1181
1182 /*
1183 * Each process has all the subport uregbase, rcvhdrq, and
1184 * rcvegrbufs mmapped - as an array for all the processes,
1185 * and also separately for this process.
1186 */
1187 if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
1188 addr = pd->subport_uregbase;
1189 size = PAGE_SIZE * pd->port_subport_cnt;
1190 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
1191 addr = pd->subport_rcvhdr_base;
1192 size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
1193 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
1194 addr = pd->subport_rcvegrbuf;
1195 size *= pd->port_subport_cnt;
1196 } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
1197 PAGE_SIZE * subport)) {
1198 addr = pd->subport_uregbase + PAGE_SIZE * subport;
1199 size = PAGE_SIZE;
1200 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
1201 pd->port_rcvhdrq_size * subport)) {
1202 addr = pd->subport_rcvhdr_base +
1203 pd->port_rcvhdrq_size * subport;
1204 size = pd->port_rcvhdrq_size;
1205 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
1206 size * subport)) {
1207 addr = pd->subport_rcvegrbuf + size * subport;
1208 /* rcvegrbufs are read-only on the slave */
1209 if (vma->vm_flags & VM_WRITE) {
1210 dev_info(&dd->pcidev->dev,
1211 "Can't map eager buffers as "
1212 "writable (flags=%lx)\n", vma->vm_flags);
1213 ret = -EPERM;
1214 goto bail;
1215 }
1216 /*
1217 * Don't allow permission to later change to writeable
1218 * with mprotect.
1219 */
1220 vma->vm_flags &= ~VM_MAYWRITE;
1221 } else {
1222 goto bail;
1223 }
1224 len = vma->vm_end - vma->vm_start;
1225 if (len > size) {
1226 ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
1227 ret = -EINVAL;
1228 goto bail;
1229 }
1230
1231 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
1232 vma->vm_ops = &ipath_file_vm_ops;
1233 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
1234 ret = 1;
1235
1236bail:
1237 return ret;
1238}
1239
1240/**
1241 * ipath_mmap - mmap various structures into user space
1242 * @fp: the file pointer
1243 * @vma: the VM area
1244 *
1245 * We use this to have a shared buffer between the kernel and the user code
1246 * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
1247 * buffers in the chip. We have the open and close entries so we can bump
1248 * the ref count and keep the driver from being unloaded while still mapped.
1249 */
1250static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1251{
1252 struct ipath_portdata *pd;
1253 struct ipath_devdata *dd;
1254 u64 pgaddr, ureg;
1255 unsigned piobufs, piocnt;
1256 int ret;
1257
1258 pd = port_fp(fp);
1259 if (!pd) {
1260 ret = -EINVAL;
1261 goto bail;
1262 }
1263 dd = pd->port_dd;
1264
1265 /*
1266 * This is the ipath_do_user_init() code, mapping the shared buffers
1267 * into the user process. The address referred to by vm_pgoff is the
1268 * file offset passed via mmap(). For shared ports, this is the
1269 * kernel vmalloc() address of the pages to share with the master.
1270 * For non-shared or master ports, this is a physical address.
1271 * We only do one mmap for each space mapped.
1272 */
1273 pgaddr = vma->vm_pgoff << PAGE_SHIFT;
1274
1275 /*
1276 * Check for 0 in case one of the allocations failed, but user
1277 * called mmap anyway.
1278 */
1279 if (!pgaddr) {
1280 ret = -EINVAL;
1281 goto bail;
1282 }
1283
1284 ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
1285 (unsigned long long) pgaddr, vma->vm_start,
1286 vma->vm_end - vma->vm_start, dd->ipath_unit,
1287 pd->port_port, subport_fp(fp));
1288
1289 /*
1290 * Physical addresses must fit in 40 bits for our hardware.
1291 * Check for kernel virtual addresses first, anything else must
1292 * match a HW or memory address.
1293 */
1294 ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
1295 if (ret) {
1296 if (ret > 0)
1297 ret = 0;
1298 goto bail;
1299 }
1300
1301 ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
1302 if (!pd->port_subport_cnt) {
1303 /* port is not shared */
1304 piocnt = pd->port_piocnt;
1305 piobufs = pd->port_piobufs;
1306 } else if (!subport_fp(fp)) {
1307 /* caller is the master */
1308 piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
1309 (pd->port_piocnt % pd->port_subport_cnt);
1310 piobufs = pd->port_piobufs +
1311 dd->ipath_palign * (pd->port_piocnt - piocnt);
1312 } else {
1313 unsigned slave = subport_fp(fp) - 1;
1314
1315 /* caller is a slave */
1316 piocnt = pd->port_piocnt / pd->port_subport_cnt;
1317 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
1318 }
1319
1320 if (pgaddr == ureg)
1321 ret = mmap_ureg(vma, dd, ureg);
1322 else if (pgaddr == piobufs)
1323 ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
1324 else if (pgaddr == dd->ipath_pioavailregs_phys)
1325 /* in-memory copy of pioavail registers */
1326 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1327 (void *) dd->ipath_pioavailregs_dma,
1328 "pioavail registers");
1329 else if (pgaddr == pd->port_rcvegr_phys)
1330 ret = mmap_rcvegrbufs(vma, pd);
1331 else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
1332 /*
1333 * The rcvhdrq itself; readonly except on HT (so have
1334 * to allow writable mapping), multiple pages, contiguous
1335 * from an i/o perspective.
1336 */
1337 ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
1338 pd->port_rcvhdrq,
1339 "rcvhdrq");
1340 else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
1341 /* in-memory copy of rcvhdrq tail register */
1342 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1343 pd->port_rcvhdrtail_kvaddr,
1344 "rcvhdrq tail");
1345 else
1346 ret = -EINVAL;
1347
1348 vma->vm_private_data = NULL;
1349
1350 if (ret < 0)
1351 dev_info(&dd->pcidev->dev,
1352 "Failure %d on off %llx len %lx\n",
1353 -ret, (unsigned long long)pgaddr,
1354 vma->vm_end - vma->vm_start);
1355bail:
1356 return ret;
1357}
1358
1359static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
1360{
1361 unsigned pollflag = 0;
1362
1363 if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
1364 pd->port_hdrqfull != pd->port_hdrqfull_poll) {
1365 pollflag |= POLLIN | POLLRDNORM;
1366 pd->port_hdrqfull_poll = pd->port_hdrqfull;
1367 }
1368
1369 return pollflag;
1370}
1371
1372static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
1373 struct file *fp,
1374 struct poll_table_struct *pt)
1375{
1376 unsigned pollflag = 0;
1377 struct ipath_devdata *dd;
1378
1379 dd = pd->port_dd;
1380
1381 /* variable access in ipath_poll_hdrqfull() needs this */
1382 rmb();
1383 pollflag = ipath_poll_hdrqfull(pd);
1384
1385 if (pd->port_urgent != pd->port_urgent_poll) {
1386 pollflag |= POLLIN | POLLRDNORM;
1387 pd->port_urgent_poll = pd->port_urgent;
1388 }
1389
1390 if (!pollflag) {
1391 /* this saves a spin_lock/unlock in interrupt handler... */
1392 set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
1393 /* flush waiting flag so don't miss an event... */
1394 wmb();
1395 poll_wait(fp, &pd->port_wait, pt);
1396 }
1397
1398 return pollflag;
1399}
1400
1401static unsigned int ipath_poll_next(struct ipath_portdata *pd,
1402 struct file *fp,
1403 struct poll_table_struct *pt)
1404{
1405 u32 head;
1406 u32 tail;
1407 unsigned pollflag = 0;
1408 struct ipath_devdata *dd;
1409
1410 dd = pd->port_dd;
1411
1412 /* variable access in ipath_poll_hdrqfull() needs this */
1413 rmb();
1414 pollflag = ipath_poll_hdrqfull(pd);
1415
1416 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
1417 if (pd->port_rcvhdrtail_kvaddr)
1418 tail = ipath_get_rcvhdrtail(pd);
1419 else
1420 tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
1421
1422 if (head != tail)
1423 pollflag |= POLLIN | POLLRDNORM;
1424 else {
1425 /* this saves a spin_lock/unlock in interrupt handler */
1426 set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1427 /* flush waiting flag so we don't miss an event */
1428 wmb();
1429
1430 set_bit(pd->port_port + dd->ipath_r_intravail_shift,
1431 &dd->ipath_rcvctrl);
1432
1433 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1434 dd->ipath_rcvctrl);
1435
1436 if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
1437 ipath_write_ureg(dd, ur_rcvhdrhead,
1438 dd->ipath_rhdrhead_intr_off | head,
1439 pd->port_port);
1440
1441 poll_wait(fp, &pd->port_wait, pt);
1442 }
1443
1444 return pollflag;
1445}
1446
1447static unsigned int ipath_poll(struct file *fp,
1448 struct poll_table_struct *pt)
1449{
1450 struct ipath_portdata *pd;
1451 unsigned pollflag;
1452
1453 pd = port_fp(fp);
1454 if (!pd)
1455 pollflag = 0;
1456 else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
1457 pollflag = ipath_poll_urgent(pd, fp, pt);
1458 else
1459 pollflag = ipath_poll_next(pd, fp, pt);
1460
1461 return pollflag;
1462}
1463
1464static int ipath_supports_subports(int user_swmajor, int user_swminor)
1465{
1466 /* no subport implementation prior to software version 1.3 */
1467 return (user_swmajor > 1) || (user_swminor >= 3);
1468}
1469
1470static int ipath_compatible_subports(int user_swmajor, int user_swminor)
1471{
1472 /* this code is written long-hand for clarity */
1473 if (IPATH_USER_SWMAJOR != user_swmajor) {
1474 /* no promise of compatibility if major mismatch */
1475 return 0;
1476 }
1477 if (IPATH_USER_SWMAJOR == 1) {
1478 switch (IPATH_USER_SWMINOR) {
1479 case 0:
1480 case 1:
1481 case 2:
1482 /* no subport implementation so cannot be compatible */
1483 return 0;
1484 case 3:
1485 /* 3 is only compatible with itself */
1486 return user_swminor == 3;
1487 default:
1488 /* >= 4 are compatible (or are expected to be) */
1489 return user_swminor >= 4;
1490 }
1491 }
1492 /* make no promises yet for future major versions */
1493 return 0;
1494}
1495
1496static int init_subports(struct ipath_devdata *dd,
1497 struct ipath_portdata *pd,
1498 const struct ipath_user_info *uinfo)
1499{
1500 int ret = 0;
1501 unsigned num_subports;
1502 size_t size;
1503
1504 /*
1505 * If the user is requesting zero subports,
1506 * skip the subport allocation.
1507 */
1508 if (uinfo->spu_subport_cnt <= 0)
1509 goto bail;
1510
1511 /* Self-consistency check for ipath_compatible_subports() */
1512 if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
1513 !ipath_compatible_subports(IPATH_USER_SWMAJOR,
1514 IPATH_USER_SWMINOR)) {
1515 dev_info(&dd->pcidev->dev,
1516 "Inconsistent ipath_compatible_subports()\n");
1517 goto bail;
1518 }
1519
1520 /* Check for subport compatibility */
1521 if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
1522 uinfo->spu_userversion & 0xffff)) {
1523 dev_info(&dd->pcidev->dev,
1524 "Mismatched user version (%d.%d) and driver "
1525 "version (%d.%d) while port sharing. Ensure "
1526 "that driver and library are from the same "
1527 "release.\n",
1528 (int) (uinfo->spu_userversion >> 16),
1529 (int) (uinfo->spu_userversion & 0xffff),
1530 IPATH_USER_SWMAJOR,
1531 IPATH_USER_SWMINOR);
1532 goto bail;
1533 }
1534 if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
1535 ret = -EINVAL;
1536 goto bail;
1537 }
1538
1539 num_subports = uinfo->spu_subport_cnt;
1540 pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
1541 if (!pd->subport_uregbase) {
1542 ret = -ENOMEM;
1543 goto bail;
1544 }
1545 /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
1546 size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1547 sizeof(u32), PAGE_SIZE) * num_subports;
1548 pd->subport_rcvhdr_base = vzalloc(size);
1549 if (!pd->subport_rcvhdr_base) {
1550 ret = -ENOMEM;
1551 goto bail_ureg;
1552 }
1553
1554 pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
1555 pd->port_rcvegrbuf_size *
1556 num_subports);
1557 if (!pd->subport_rcvegrbuf) {
1558 ret = -ENOMEM;
1559 goto bail_rhdr;
1560 }
1561
1562 pd->port_subport_cnt = uinfo->spu_subport_cnt;
1563 pd->port_subport_id = uinfo->spu_subport_id;
1564 pd->active_slaves = 1;
1565 set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1566 goto bail;
1567
1568bail_rhdr:
1569 vfree(pd->subport_rcvhdr_base);
1570bail_ureg:
1571 vfree(pd->subport_uregbase);
1572 pd->subport_uregbase = NULL;
1573bail:
1574 return ret;
1575}
1576
1577static int try_alloc_port(struct ipath_devdata *dd, int port,
1578 struct file *fp,
1579 const struct ipath_user_info *uinfo)
1580{
1581 struct ipath_portdata *pd;
1582 int ret;
1583
1584 if (!(pd = dd->ipath_pd[port])) {
1585 void *ptmp;
1586
1587 pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
1588
1589 /*
1590 * Allocate memory for use in ipath_tid_update() just once
1591 * at open, not per call. Reduces cost of expected send
1592 * setup.
1593 */
1594 ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
1595 dd->ipath_rcvtidcnt * sizeof(struct page **),
1596 GFP_KERNEL);
1597 if (!pd || !ptmp) {
1598 ipath_dev_err(dd, "Unable to allocate portdata "
1599 "memory, failing open\n");
1600 ret = -ENOMEM;
1601 kfree(pd);
1602 kfree(ptmp);
1603 goto bail;
1604 }
1605 dd->ipath_pd[port] = pd;
1606 dd->ipath_pd[port]->port_port = port;
1607 dd->ipath_pd[port]->port_dd = dd;
1608 dd->ipath_pd[port]->port_tid_pg_list = ptmp;
1609 init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
1610 }
1611 if (!pd->port_cnt) {
1612 pd->userversion = uinfo->spu_userversion;
1613 init_user_egr_sizes(pd);
1614 if ((ret = init_subports(dd, pd, uinfo)) != 0)
1615 goto bail;
1616 ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
1617 current->comm, current->pid, dd->ipath_unit,
1618 port);
1619 pd->port_cnt = 1;
1620 port_fp(fp) = pd;
1621 pd->port_pid = get_pid(task_pid(current));
1622 strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
1623 ipath_stats.sps_ports++;
1624 ret = 0;
1625 } else
1626 ret = -EBUSY;
1627
1628bail:
1629 return ret;
1630}
1631
1632static inline int usable(struct ipath_devdata *dd)
1633{
1634 return dd &&
1635 (dd->ipath_flags & IPATH_PRESENT) &&
1636 dd->ipath_kregbase &&
1637 dd->ipath_lid &&
1638 !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
1639 | IPATH_LINKUNK));
1640}
1641
1642static int find_free_port(int unit, struct file *fp,
1643 const struct ipath_user_info *uinfo)
1644{
1645 struct ipath_devdata *dd = ipath_lookup(unit);
1646 int ret, i;
1647
1648 if (!dd) {
1649 ret = -ENODEV;
1650 goto bail;
1651 }
1652
1653 if (!usable(dd)) {
1654 ret = -ENETDOWN;
1655 goto bail;
1656 }
1657
1658 for (i = 1; i < dd->ipath_cfgports; i++) {
1659 ret = try_alloc_port(dd, i, fp, uinfo);
1660 if (ret != -EBUSY)
1661 goto bail;
1662 }
1663 ret = -EBUSY;
1664
1665bail:
1666 return ret;
1667}
1668
1669static int find_best_unit(struct file *fp,
1670 const struct ipath_user_info *uinfo)
1671{
1672 int ret = 0, i, prefunit = -1, devmax;
1673 int maxofallports, npresent, nup;
1674 int ndev;
1675
1676 devmax = ipath_count_units(&npresent, &nup, &maxofallports);
1677
1678 /*
1679 * This code is present to allow a knowledgeable person to
1680 * specify the layout of processes to processors before opening
1681 * this driver, and then we'll assign the process to the "closest"
1682 * InfiniPath chip to that processor (we assume reasonable connectivity,
1683 * for now). This code assumes that if affinity has been set
1684 * before this point, that at most one cpu is set; for now this
1685 * is reasonable. I check for both cpumask_empty() and cpumask_full(),
1686 * in case some kernel variant sets none of the bits when no
1687 * affinity is set. 2.6.11 and 12 kernels have all present
1688 * cpus set. Some day we'll have to fix it up further to handle
1689 * a cpu subset. This algorithm fails for two HT chips connected
1690 * in tunnel fashion. Eventually this needs real topology
1691 * information. There may be some issues with dual core numbering
1692 * as well. This needs more work prior to release.
1693 */
1694 if (!cpumask_empty(tsk_cpus_allowed(current)) &&
1695 !cpumask_full(tsk_cpus_allowed(current))) {
1696 int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
1697 get_online_cpus();
1698 for_each_online_cpu(i)
1699 if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) {
1700 ipath_cdbg(PROC, "%s[%u] affinity set for "
1701 "cpu %d/%d\n", current->comm,
1702 current->pid, i, ncpus);
1703 curcpu = i;
1704 nset++;
1705 }
1706 put_online_cpus();
1707 if (curcpu != -1 && nset != ncpus) {
1708 if (npresent) {
1709 prefunit = curcpu / (ncpus / npresent);
1710 ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
1711 "%d cpus/chip, select unit %d\n",
1712 current->comm, current->pid,
1713 npresent, ncpus, ncpus / npresent,
1714 prefunit);
1715 }
1716 }
1717 }
1718
1719 /*
1720 * user ports start at 1, kernel port is 0
1721 * For now, we do round-robin access across all chips
1722 */
1723
1724 if (prefunit != -1)
1725 devmax = prefunit + 1;
1726recheck:
1727 for (i = 1; i < maxofallports; i++) {
1728 for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
1729 ndev++) {
1730 struct ipath_devdata *dd = ipath_lookup(ndev);
1731
1732 if (!usable(dd))
1733 continue; /* can't use this unit */
1734 if (i >= dd->ipath_cfgports)
1735 /*
1736 * Maxed out on users of this unit. Try
1737 * next.
1738 */
1739 continue;
1740 ret = try_alloc_port(dd, i, fp, uinfo);
1741 if (!ret)
1742 goto done;
1743 }
1744 }
1745
1746 if (npresent) {
1747 if (nup == 0) {
1748 ret = -ENETDOWN;
1749 ipath_dbg("No ports available (none initialized "
1750 "and ready)\n");
1751 } else {
1752 if (prefunit > 0) {
1753 /* if started above 0, retry from 0 */
1754 ipath_cdbg(PROC,
1755 "%s[%u] no ports on prefunit "
1756 "%d, clear and re-check\n",
1757 current->comm, current->pid,
1758 prefunit);
1759 devmax = ipath_count_units(NULL, NULL,
1760 NULL);
1761 prefunit = -1;
1762 goto recheck;
1763 }
1764 ret = -EBUSY;
1765 ipath_dbg("No ports available\n");
1766 }
1767 } else {
1768 ret = -ENXIO;
1769 ipath_dbg("No boards found\n");
1770 }
1771
1772done:
1773 return ret;
1774}
1775
1776static int find_shared_port(struct file *fp,
1777 const struct ipath_user_info *uinfo)
1778{
1779 int devmax, ndev, i;
1780 int ret = 0;
1781
1782 devmax = ipath_count_units(NULL, NULL, NULL);
1783
1784 for (ndev = 0; ndev < devmax; ndev++) {
1785 struct ipath_devdata *dd = ipath_lookup(ndev);
1786
1787 if (!usable(dd))
1788 continue;
1789 for (i = 1; i < dd->ipath_cfgports; i++) {
1790 struct ipath_portdata *pd = dd->ipath_pd[i];
1791
1792 /* Skip ports which are not yet open */
1793 if (!pd || !pd->port_cnt)
1794 continue;
1795 /* Skip port if it doesn't match the requested one */
1796 if (pd->port_subport_id != uinfo->spu_subport_id)
1797 continue;
1798 /* Verify the sharing process matches the master */
1799 if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
1800 pd->userversion != uinfo->spu_userversion ||
1801 pd->port_cnt >= pd->port_subport_cnt) {
1802 ret = -EINVAL;
1803 goto done;
1804 }
1805 port_fp(fp) = pd;
1806 subport_fp(fp) = pd->port_cnt++;
1807 pd->port_subpid[subport_fp(fp)] =
1808 get_pid(task_pid(current));
1809 tidcursor_fp(fp) = 0;
1810 pd->active_slaves |= 1 << subport_fp(fp);
1811 ipath_cdbg(PROC,
1812 "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
1813 current->comm, current->pid,
1814 subport_fp(fp),
1815 pd->port_comm, pid_nr(pd->port_pid),
1816 dd->ipath_unit, pd->port_port);
1817 ret = 1;
1818 goto done;
1819 }
1820 }
1821
1822done:
1823 return ret;
1824}
1825
1826static int ipath_open(struct inode *in, struct file *fp)
1827{
1828 /* The real work is performed later in ipath_assign_port() */
1829 fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
1830 return fp->private_data ? 0 : -ENOMEM;
1831}
1832
1833/* Get port early, so can set affinity prior to memory allocation */
1834static int ipath_assign_port(struct file *fp,
1835 const struct ipath_user_info *uinfo)
1836{
1837 int ret;
1838 int i_minor;
1839 unsigned swmajor, swminor;
1840
1841 /* Check to be sure we haven't already initialized this file */
1842 if (port_fp(fp)) {
1843 ret = -EINVAL;
1844 goto done;
1845 }
1846
1847 /* for now, if major version is different, bail */
1848 swmajor = uinfo->spu_userversion >> 16;
1849 if (swmajor != IPATH_USER_SWMAJOR) {
1850 ipath_dbg("User major version %d not same as driver "
1851 "major %d\n", uinfo->spu_userversion >> 16,
1852 IPATH_USER_SWMAJOR);
1853 ret = -ENODEV;
1854 goto done;
1855 }
1856
1857 swminor = uinfo->spu_userversion & 0xffff;
1858 if (swminor != IPATH_USER_SWMINOR)
1859 ipath_dbg("User minor version %d not same as driver "
1860 "minor %d\n", swminor, IPATH_USER_SWMINOR);
1861
1862 mutex_lock(&ipath_mutex);
1863
1864 if (ipath_compatible_subports(swmajor, swminor) &&
1865 uinfo->spu_subport_cnt &&
1866 (ret = find_shared_port(fp, uinfo))) {
1867 if (ret > 0)
1868 ret = 0;
1869 goto done_chk_sdma;
1870 }
1871
1872 i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE;
1873 ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
1874 (long)file_inode(fp)->i_rdev, i_minor);
1875
1876 if (i_minor)
1877 ret = find_free_port(i_minor - 1, fp, uinfo);
1878 else
1879 ret = find_best_unit(fp, uinfo);
1880
1881done_chk_sdma:
1882 if (!ret) {
1883 struct ipath_filedata *fd = fp->private_data;
1884 const struct ipath_portdata *pd = fd->pd;
1885 const struct ipath_devdata *dd = pd->port_dd;
1886
1887 fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
1888 dd->ipath_unit,
1889 pd->port_port,
1890 fd->subport);
1891
1892 if (!fd->pq)
1893 ret = -ENOMEM;
1894 }
1895
1896 mutex_unlock(&ipath_mutex);
1897
1898done:
1899 return ret;
1900}
1901
1902
1903static int ipath_do_user_init(struct file *fp,
1904 const struct ipath_user_info *uinfo)
1905{
1906 int ret;
1907 struct ipath_portdata *pd = port_fp(fp);
1908 struct ipath_devdata *dd;
1909 u32 head32;
1910
1911 /* Subports don't need to initialize anything since master did it. */
1912 if (subport_fp(fp)) {
1913 ret = wait_event_interruptible(pd->port_wait,
1914 !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
1915 goto done;
1916 }
1917
1918 dd = pd->port_dd;
1919
1920 if (uinfo->spu_rcvhdrsize) {
1921 ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
1922 if (ret)
1923 goto done;
1924 }
1925
1926 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
1927
1928 /* some ports may get extra buffers, calculate that here */
1929 if (pd->port_port <= dd->ipath_ports_extrabuf)
1930 pd->port_piocnt = dd->ipath_pbufsport + 1;
1931 else
1932 pd->port_piocnt = dd->ipath_pbufsport;
1933
1934 /* for right now, kernel piobufs are at end, so port 1 is at 0 */
1935 if (pd->port_port <= dd->ipath_ports_extrabuf)
1936 pd->port_pio_base = (dd->ipath_pbufsport + 1)
1937 * (pd->port_port - 1);
1938 else
1939 pd->port_pio_base = dd->ipath_ports_extrabuf +
1940 dd->ipath_pbufsport * (pd->port_port - 1);
1941 pd->port_piobufs = dd->ipath_piobufbase +
1942 pd->port_pio_base * dd->ipath_palign;
1943 ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
1944 " first pio %u\n", pd->port_port, pd->port_piobufs,
1945 pd->port_piocnt, pd->port_pio_base);
1946 ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
1947
1948 /*
1949 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
1950 * array for time being. If pd->port_port > chip-supported,
1951 * we need to do extra stuff here to handle by handling overflow
1952 * through port 0, someday
1953 */
1954 ret = ipath_create_rcvhdrq(dd, pd);
1955 if (!ret)
1956 ret = ipath_create_user_egr(pd);
1957 if (ret)
1958 goto done;
1959
1960 /*
1961 * set the eager head register for this port to the current values
1962 * of the tail pointers, since we don't know if they were
1963 * updated on last use of the port.
1964 */
1965 head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
1966 ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
1967 pd->port_lastrcvhdrqtail = -1;
1968 ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
1969 pd->port_port, head32);
1970 pd->port_tidcursor = 0; /* start at beginning after open */
1971
1972 /* initialize poll variables... */
1973 pd->port_urgent = 0;
1974 pd->port_urgent_poll = 0;
1975 pd->port_hdrqfull_poll = pd->port_hdrqfull;
1976
1977 /*
1978 * Now enable the port for receive.
1979 * For chips that are set to DMA the tail register to memory
1980 * when they change (and when the update bit transitions from
1981 * 0 to 1. So for those chips, we turn it off and then back on.
1982 * This will (very briefly) affect any other open ports, but the
1983 * duration is very short, and therefore isn't an issue. We
1984 * explicitly set the in-memory tail copy to 0 beforehand, so we
1985 * don't have to wait to be sure the DMA update has happened
1986 * (chip resets head/tail to 0 on transition to enable).
1987 */
1988 set_bit(dd->ipath_r_portenable_shift + pd->port_port,
1989 &dd->ipath_rcvctrl);
1990 if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
1991 if (pd->port_rcvhdrtail_kvaddr)
1992 ipath_clear_rcvhdrtail(pd);
1993 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1994 dd->ipath_rcvctrl &
1995 ~(1ULL << dd->ipath_r_tailupd_shift));
1996 }
1997 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1998 dd->ipath_rcvctrl);
1999 /* Notify any waiting slaves */
2000 if (pd->port_subport_cnt) {
2001 clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
2002 wake_up(&pd->port_wait);
2003 }
2004done:
2005 return ret;
2006}
2007
2008/**
2009 * unlock_exptid - unlock any expected TID entries port still had in use
2010 * @pd: port
2011 *
2012 * We don't actually update the chip here, because we do a bulk update
2013 * below, using ipath_f_clear_tids.
2014 */
2015static void unlock_expected_tids(struct ipath_portdata *pd)
2016{
2017 struct ipath_devdata *dd = pd->port_dd;
2018 int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
2019 int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
2020
2021 ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
2022 pd->port_port);
2023 for (i = port_tidbase; i < maxtid; i++) {
2024 struct page *ps = dd->ipath_pageshadow[i];
2025
2026 if (!ps)
2027 continue;
2028
2029 dd->ipath_pageshadow[i] = NULL;
2030 pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
2031 PAGE_SIZE, PCI_DMA_FROMDEVICE);
2032 ipath_release_user_pages_on_close(&ps, 1);
2033 cnt++;
2034 ipath_stats.sps_pageunlocks++;
2035 }
2036 if (cnt)
2037 ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
2038 pd->port_port, cnt);
2039
2040 if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
2041 ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
2042 (unsigned long long) ipath_stats.sps_pagelocks,
2043 (unsigned long long)
2044 ipath_stats.sps_pageunlocks);
2045}
2046
2047static int ipath_close(struct inode *in, struct file *fp)
2048{
2049 struct ipath_filedata *fd;
2050 struct ipath_portdata *pd;
2051 struct ipath_devdata *dd;
2052 unsigned long flags;
2053 unsigned port;
2054 struct pid *pid;
2055
2056 ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
2057 (long)in->i_rdev, fp->private_data);
2058
2059 mutex_lock(&ipath_mutex);
2060
2061 fd = fp->private_data;
2062 fp->private_data = NULL;
2063 pd = fd->pd;
2064 if (!pd) {
2065 mutex_unlock(&ipath_mutex);
2066 goto bail;
2067 }
2068
2069 dd = pd->port_dd;
2070
2071 /* drain user sdma queue */
2072 ipath_user_sdma_queue_drain(dd, fd->pq);
2073 ipath_user_sdma_queue_destroy(fd->pq);
2074
2075 if (--pd->port_cnt) {
2076 /*
2077 * XXX If the master closes the port before the slave(s),
2078 * revoke the mmap for the eager receive queue so
2079 * the slave(s) don't wait for receive data forever.
2080 */
2081 pd->active_slaves &= ~(1 << fd->subport);
2082 put_pid(pd->port_subpid[fd->subport]);
2083 pd->port_subpid[fd->subport] = NULL;
2084 mutex_unlock(&ipath_mutex);
2085 goto bail;
2086 }
2087 /* early; no interrupt users after this */
2088 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
2089 port = pd->port_port;
2090 dd->ipath_pd[port] = NULL;
2091 pid = pd->port_pid;
2092 pd->port_pid = NULL;
2093 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2094
2095 if (pd->port_rcvwait_to || pd->port_piowait_to
2096 || pd->port_rcvnowait || pd->port_pionowait) {
2097 ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
2098 "%u rcv %u, pio already\n",
2099 pd->port_port, pd->port_rcvwait_to,
2100 pd->port_piowait_to, pd->port_rcvnowait,
2101 pd->port_pionowait);
2102 pd->port_rcvwait_to = pd->port_piowait_to =
2103 pd->port_rcvnowait = pd->port_pionowait = 0;
2104 }
2105 if (pd->port_flag) {
2106 ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
2107 pd->port_port, pd->port_flag);
2108 pd->port_flag = 0;
2109 }
2110
2111 if (dd->ipath_kregbase) {
2112 /* atomically clear receive enable port and intr avail. */
2113 clear_bit(dd->ipath_r_portenable_shift + port,
2114 &dd->ipath_rcvctrl);
2115 clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
2116 &dd->ipath_rcvctrl);
2117 ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
2118 dd->ipath_rcvctrl);
2119 /* and read back from chip to be sure that nothing
2120 * else is in flight when we do the rest */
2121 (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2122
2123 /* clean up the pkeys for this port user */
2124 ipath_clean_part_key(pd, dd);
2125 /*
2126 * be paranoid, and never write 0's to these, just use an
2127 * unused part of the port 0 tail page. Of course,
2128 * rcvhdraddr points to a large chunk of memory, so this
2129 * could still trash things, but at least it won't trash
2130 * page 0, and by disabling the port, it should stop "soon",
2131 * even if a packet or two is in already in flight after we
2132 * disabled the port.
2133 */
2134 ipath_write_kreg_port(dd,
2135 dd->ipath_kregs->kr_rcvhdrtailaddr, port,
2136 dd->ipath_dummy_hdrq_phys);
2137 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
2138 pd->port_port, dd->ipath_dummy_hdrq_phys);
2139
2140 ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
2141 ipath_chg_pioavailkernel(dd, pd->port_pio_base,
2142 pd->port_piocnt, 1);
2143
2144 dd->ipath_f_clear_tids(dd, pd->port_port);
2145
2146 if (dd->ipath_pageshadow)
2147 unlock_expected_tids(pd);
2148 ipath_stats.sps_ports--;
2149 ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
2150 pd->port_comm, pid_nr(pid),
2151 dd->ipath_unit, port);
2152 }
2153
2154 put_pid(pid);
2155 mutex_unlock(&ipath_mutex);
2156 ipath_free_pddata(dd, pd); /* after releasing the mutex */
2157
2158bail:
2159 kfree(fd);
2160 return 0;
2161}
2162
2163static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
2164 struct ipath_port_info __user *uinfo)
2165{
2166 struct ipath_port_info info;
2167 int nup;
2168 int ret;
2169 size_t sz;
2170
2171 (void) ipath_count_units(NULL, &nup, NULL);
2172 info.num_active = nup;
2173 info.unit = pd->port_dd->ipath_unit;
2174 info.port = pd->port_port;
2175 info.subport = subport;
2176 /* Don't return new fields if old library opened the port. */
2177 if (ipath_supports_subports(pd->userversion >> 16,
2178 pd->userversion & 0xffff)) {
2179 /* Number of user ports available for this device. */
2180 info.num_ports = pd->port_dd->ipath_cfgports - 1;
2181 info.num_subports = pd->port_subport_cnt;
2182 sz = sizeof(info);
2183 } else
2184 sz = sizeof(info) - 2 * sizeof(u16);
2185
2186 if (copy_to_user(uinfo, &info, sz)) {
2187 ret = -EFAULT;
2188 goto bail;
2189 }
2190 ret = 0;
2191
2192bail:
2193 return ret;
2194}
2195
2196static int ipath_get_slave_info(struct ipath_portdata *pd,
2197 void __user *slave_mask_addr)
2198{
2199 int ret = 0;
2200
2201 if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
2202 ret = -EFAULT;
2203 return ret;
2204}
2205
2206static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
2207 u32 __user *inflightp)
2208{
2209 const u32 val = ipath_user_sdma_inflight_counter(pq);
2210
2211 if (put_user(val, inflightp))
2212 return -EFAULT;
2213
2214 return 0;
2215}
2216
2217static int ipath_sdma_get_complete(struct ipath_devdata *dd,
2218 struct ipath_user_sdma_queue *pq,
2219 u32 __user *completep)
2220{
2221 u32 val;
2222 int err;
2223
2224 err = ipath_user_sdma_make_progress(dd, pq);
2225 if (err < 0)
2226 return err;
2227
2228 val = ipath_user_sdma_complete_counter(pq);
2229 if (put_user(val, completep))
2230 return -EFAULT;
2231
2232 return 0;
2233}
2234
2235static ssize_t ipath_write(struct file *fp, const char __user *data,
2236 size_t count, loff_t *off)
2237{
2238 const struct ipath_cmd __user *ucmd;
2239 struct ipath_portdata *pd;
2240 const void __user *src;
2241 size_t consumed, copy;
2242 struct ipath_cmd cmd;
2243 ssize_t ret = 0;
2244 void *dest;
2245
2246 if (count < sizeof(cmd.type)) {
2247 ret = -EINVAL;
2248 goto bail;
2249 }
2250
2251 ucmd = (const struct ipath_cmd __user *) data;
2252
2253 if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
2254 ret = -EFAULT;
2255 goto bail;
2256 }
2257
2258 consumed = sizeof(cmd.type);
2259
2260 switch (cmd.type) {
2261 case IPATH_CMD_ASSIGN_PORT:
2262 case __IPATH_CMD_USER_INIT:
2263 case IPATH_CMD_USER_INIT:
2264 copy = sizeof(cmd.cmd.user_info);
2265 dest = &cmd.cmd.user_info;
2266 src = &ucmd->cmd.user_info;
2267 break;
2268 case IPATH_CMD_RECV_CTRL:
2269 copy = sizeof(cmd.cmd.recv_ctrl);
2270 dest = &cmd.cmd.recv_ctrl;
2271 src = &ucmd->cmd.recv_ctrl;
2272 break;
2273 case IPATH_CMD_PORT_INFO:
2274 copy = sizeof(cmd.cmd.port_info);
2275 dest = &cmd.cmd.port_info;
2276 src = &ucmd->cmd.port_info;
2277 break;
2278 case IPATH_CMD_TID_UPDATE:
2279 case IPATH_CMD_TID_FREE:
2280 copy = sizeof(cmd.cmd.tid_info);
2281 dest = &cmd.cmd.tid_info;
2282 src = &ucmd->cmd.tid_info;
2283 break;
2284 case IPATH_CMD_SET_PART_KEY:
2285 copy = sizeof(cmd.cmd.part_key);
2286 dest = &cmd.cmd.part_key;
2287 src = &ucmd->cmd.part_key;
2288 break;
2289 case __IPATH_CMD_SLAVE_INFO:
2290 copy = sizeof(cmd.cmd.slave_mask_addr);
2291 dest = &cmd.cmd.slave_mask_addr;
2292 src = &ucmd->cmd.slave_mask_addr;
2293 break;
2294 case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg
2295 copy = 0;
2296 src = NULL;
2297 dest = NULL;
2298 break;
2299 case IPATH_CMD_POLL_TYPE:
2300 copy = sizeof(cmd.cmd.poll_type);
2301 dest = &cmd.cmd.poll_type;
2302 src = &ucmd->cmd.poll_type;
2303 break;
2304 case IPATH_CMD_ARMLAUNCH_CTRL:
2305 copy = sizeof(cmd.cmd.armlaunch_ctrl);
2306 dest = &cmd.cmd.armlaunch_ctrl;
2307 src = &ucmd->cmd.armlaunch_ctrl;
2308 break;
2309 case IPATH_CMD_SDMA_INFLIGHT:
2310 copy = sizeof(cmd.cmd.sdma_inflight);
2311 dest = &cmd.cmd.sdma_inflight;
2312 src = &ucmd->cmd.sdma_inflight;
2313 break;
2314 case IPATH_CMD_SDMA_COMPLETE:
2315 copy = sizeof(cmd.cmd.sdma_complete);
2316 dest = &cmd.cmd.sdma_complete;
2317 src = &ucmd->cmd.sdma_complete;
2318 break;
2319 default:
2320 ret = -EINVAL;
2321 goto bail;
2322 }
2323
2324 if (copy) {
2325 if ((count - consumed) < copy) {
2326 ret = -EINVAL;
2327 goto bail;
2328 }
2329
2330 if (copy_from_user(dest, src, copy)) {
2331 ret = -EFAULT;
2332 goto bail;
2333 }
2334
2335 consumed += copy;
2336 }
2337
2338 pd = port_fp(fp);
2339 if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
2340 cmd.type != IPATH_CMD_ASSIGN_PORT) {
2341 ret = -EINVAL;
2342 goto bail;
2343 }
2344
2345 switch (cmd.type) {
2346 case IPATH_CMD_ASSIGN_PORT:
2347 ret = ipath_assign_port(fp, &cmd.cmd.user_info);
2348 if (ret)
2349 goto bail;
2350 break;
2351 case __IPATH_CMD_USER_INIT:
2352 /* backwards compatibility, get port first */
2353 ret = ipath_assign_port(fp, &cmd.cmd.user_info);
2354 if (ret)
2355 goto bail;
2356 /* and fall through to current version. */
2357 case IPATH_CMD_USER_INIT:
2358 ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
2359 if (ret)
2360 goto bail;
2361 ret = ipath_get_base_info(
2362 fp, (void __user *) (unsigned long)
2363 cmd.cmd.user_info.spu_base_info,
2364 cmd.cmd.user_info.spu_base_info_size);
2365 break;
2366 case IPATH_CMD_RECV_CTRL:
2367 ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
2368 break;
2369 case IPATH_CMD_PORT_INFO:
2370 ret = ipath_port_info(pd, subport_fp(fp),
2371 (struct ipath_port_info __user *)
2372 (unsigned long) cmd.cmd.port_info);
2373 break;
2374 case IPATH_CMD_TID_UPDATE:
2375 ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
2376 break;
2377 case IPATH_CMD_TID_FREE:
2378 ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
2379 break;
2380 case IPATH_CMD_SET_PART_KEY:
2381 ret = ipath_set_part_key(pd, cmd.cmd.part_key);
2382 break;
2383 case __IPATH_CMD_SLAVE_INFO:
2384 ret = ipath_get_slave_info(pd,
2385 (void __user *) (unsigned long)
2386 cmd.cmd.slave_mask_addr);
2387 break;
2388 case IPATH_CMD_PIOAVAILUPD:
2389 ipath_force_pio_avail_update(pd->port_dd);
2390 break;
2391 case IPATH_CMD_POLL_TYPE:
2392 pd->poll_type = cmd.cmd.poll_type;
2393 break;
2394 case IPATH_CMD_ARMLAUNCH_CTRL:
2395 if (cmd.cmd.armlaunch_ctrl)
2396 ipath_enable_armlaunch(pd->port_dd);
2397 else
2398 ipath_disable_armlaunch(pd->port_dd);
2399 break;
2400 case IPATH_CMD_SDMA_INFLIGHT:
2401 ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
2402 (u32 __user *) (unsigned long)
2403 cmd.cmd.sdma_inflight);
2404 break;
2405 case IPATH_CMD_SDMA_COMPLETE:
2406 ret = ipath_sdma_get_complete(pd->port_dd,
2407 user_sdma_queue_fp(fp),
2408 (u32 __user *) (unsigned long)
2409 cmd.cmd.sdma_complete);
2410 break;
2411 }
2412
2413 if (ret >= 0)
2414 ret = consumed;
2415
2416bail:
2417 return ret;
2418}
2419
2420static ssize_t ipath_write_iter(struct kiocb *iocb, struct iov_iter *from)
2421{
2422 struct file *filp = iocb->ki_filp;
2423 struct ipath_filedata *fp = filp->private_data;
2424 struct ipath_portdata *pd = port_fp(filp);
2425 struct ipath_user_sdma_queue *pq = fp->pq;
2426
2427 if (!iter_is_iovec(from) || !from->nr_segs)
2428 return -EINVAL;
2429
2430 return ipath_user_sdma_writev(pd->port_dd, pq, from->iov, from->nr_segs);
2431}
2432
2433static struct class *ipath_class;
2434
2435static int init_cdev(int minor, char *name, const struct file_operations *fops,
2436 struct cdev **cdevp, struct device **devp)
2437{
2438 const dev_t dev = MKDEV(IPATH_MAJOR, minor);
2439 struct cdev *cdev = NULL;
2440 struct device *device = NULL;
2441 int ret;
2442
2443 cdev = cdev_alloc();
2444 if (!cdev) {
2445 printk(KERN_ERR IPATH_DRV_NAME
2446 ": Could not allocate cdev for minor %d, %s\n",
2447 minor, name);
2448 ret = -ENOMEM;
2449 goto done;
2450 }
2451
2452 cdev->owner = THIS_MODULE;
2453 cdev->ops = fops;
2454 kobject_set_name(&cdev->kobj, name);
2455
2456 ret = cdev_add(cdev, dev, 1);
2457 if (ret < 0) {
2458 printk(KERN_ERR IPATH_DRV_NAME
2459 ": Could not add cdev for minor %d, %s (err %d)\n",
2460 minor, name, -ret);
2461 goto err_cdev;
2462 }
2463
2464 device = device_create(ipath_class, NULL, dev, NULL, name);
2465
2466 if (IS_ERR(device)) {
2467 ret = PTR_ERR(device);
2468 printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
2469 "device for minor %d, %s (err %d)\n",
2470 minor, name, -ret);
2471 goto err_cdev;
2472 }
2473
2474 goto done;
2475
2476err_cdev:
2477 cdev_del(cdev);
2478 cdev = NULL;
2479
2480done:
2481 if (ret >= 0) {
2482 *cdevp = cdev;
2483 *devp = device;
2484 } else {
2485 *cdevp = NULL;
2486 *devp = NULL;
2487 }
2488
2489 return ret;
2490}
2491
2492int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
2493 struct cdev **cdevp, struct device **devp)
2494{
2495 return init_cdev(minor, name, fops, cdevp, devp);
2496}
2497
2498static void cleanup_cdev(struct cdev **cdevp,
2499 struct device **devp)
2500{
2501 struct device *dev = *devp;
2502
2503 if (dev) {
2504 device_unregister(dev);
2505 *devp = NULL;
2506 }
2507
2508 if (*cdevp) {
2509 cdev_del(*cdevp);
2510 *cdevp = NULL;
2511 }
2512}
2513
2514void ipath_cdev_cleanup(struct cdev **cdevp,
2515 struct device **devp)
2516{
2517 cleanup_cdev(cdevp, devp);
2518}
2519
2520static struct cdev *wildcard_cdev;
2521static struct device *wildcard_dev;
2522
2523static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
2524
2525static int user_init(void)
2526{
2527 int ret;
2528
2529 ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
2530 if (ret < 0) {
2531 printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
2532 "chrdev region (err %d)\n", -ret);
2533 goto done;
2534 }
2535
2536 ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
2537
2538 if (IS_ERR(ipath_class)) {
2539 ret = PTR_ERR(ipath_class);
2540 printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
2541 "device class (err %d)\n", -ret);
2542 goto bail;
2543 }
2544
2545 goto done;
2546bail:
2547 unregister_chrdev_region(dev, IPATH_NMINORS);
2548done:
2549 return ret;
2550}
2551
2552static void user_cleanup(void)
2553{
2554 if (ipath_class) {
2555 class_destroy(ipath_class);
2556 ipath_class = NULL;
2557 }
2558
2559 unregister_chrdev_region(dev, IPATH_NMINORS);
2560}
2561
2562static atomic_t user_count = ATOMIC_INIT(0);
2563static atomic_t user_setup = ATOMIC_INIT(0);
2564
2565int ipath_user_add(struct ipath_devdata *dd)
2566{
2567 char name[10];
2568 int ret;
2569
2570 if (atomic_inc_return(&user_count) == 1) {
2571 ret = user_init();
2572 if (ret < 0) {
2573 ipath_dev_err(dd, "Unable to set up user support: "
2574 "error %d\n", -ret);
2575 goto bail;
2576 }
2577 ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
2578 &wildcard_dev);
2579 if (ret < 0) {
2580 ipath_dev_err(dd, "Could not create wildcard "
2581 "minor: error %d\n", -ret);
2582 goto bail_user;
2583 }
2584
2585 atomic_set(&user_setup, 1);
2586 }
2587
2588 snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
2589
2590 ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
2591 &dd->user_cdev, &dd->user_dev);
2592 if (ret < 0)
2593 ipath_dev_err(dd, "Could not create user minor %d, %s\n",
2594 dd->ipath_unit + 1, name);
2595
2596 goto bail;
2597
2598bail_user:
2599 user_cleanup();
2600bail:
2601 return ret;
2602}
2603
2604void ipath_user_remove(struct ipath_devdata *dd)
2605{
2606 cleanup_cdev(&dd->user_cdev, &dd->user_dev);
2607
2608 if (atomic_dec_return(&user_count) == 0) {
2609 if (atomic_read(&user_setup) == 0)
2610 goto bail;
2611
2612 cleanup_cdev(&wildcard_cdev, &wildcard_dev);
2613 user_cleanup();
2614
2615 atomic_set(&user_setup, 0);
2616 }
2617bail:
2618 return;
2619}
diff --git a/drivers/staging/rdma/ipath/ipath_fs.c b/drivers/staging/rdma/ipath/ipath_fs.c
deleted file mode 100644
index 476fcdf05acb..000000000000
--- a/drivers/staging/rdma/ipath/ipath_fs.c
+++ /dev/null
@@ -1,415 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/module.h>
35#include <linux/fs.h>
36#include <linux/mount.h>
37#include <linux/pagemap.h>
38#include <linux/init.h>
39#include <linux/namei.h>
40#include <linux/slab.h>
41
42#include "ipath_kernel.h"
43
44#define IPATHFS_MAGIC 0x726a77
45
46static struct super_block *ipath_super;
47
48static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
49 umode_t mode, const struct file_operations *fops,
50 void *data)
51{
52 int error;
53 struct inode *inode = new_inode(dir->i_sb);
54
55 if (!inode) {
56 error = -EPERM;
57 goto bail;
58 }
59
60 inode->i_ino = get_next_ino();
61 inode->i_mode = mode;
62 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
63 inode->i_private = data;
64 if (S_ISDIR(mode)) {
65 inode->i_op = &simple_dir_inode_operations;
66 inc_nlink(inode);
67 inc_nlink(dir);
68 }
69
70 inode->i_fop = fops;
71
72 d_instantiate(dentry, inode);
73 error = 0;
74
75bail:
76 return error;
77}
78
79static int create_file(const char *name, umode_t mode,
80 struct dentry *parent, struct dentry **dentry,
81 const struct file_operations *fops, void *data)
82{
83 int error;
84
85 inode_lock(d_inode(parent));
86 *dentry = lookup_one_len(name, parent, strlen(name));
87 if (!IS_ERR(*dentry))
88 error = ipathfs_mknod(d_inode(parent), *dentry,
89 mode, fops, data);
90 else
91 error = PTR_ERR(*dentry);
92 inode_unlock(d_inode(parent));
93
94 return error;
95}
96
97static ssize_t atomic_stats_read(struct file *file, char __user *buf,
98 size_t count, loff_t *ppos)
99{
100 return simple_read_from_buffer(buf, count, ppos, &ipath_stats,
101 sizeof ipath_stats);
102}
103
104static const struct file_operations atomic_stats_ops = {
105 .read = atomic_stats_read,
106 .llseek = default_llseek,
107};
108
109static ssize_t atomic_counters_read(struct file *file, char __user *buf,
110 size_t count, loff_t *ppos)
111{
112 struct infinipath_counters counters;
113 struct ipath_devdata *dd;
114
115 dd = file_inode(file)->i_private;
116 dd->ipath_f_read_counters(dd, &counters);
117
118 return simple_read_from_buffer(buf, count, ppos, &counters,
119 sizeof counters);
120}
121
122static const struct file_operations atomic_counters_ops = {
123 .read = atomic_counters_read,
124 .llseek = default_llseek,
125};
126
127static ssize_t flash_read(struct file *file, char __user *buf,
128 size_t count, loff_t *ppos)
129{
130 struct ipath_devdata *dd;
131 ssize_t ret;
132 loff_t pos;
133 char *tmp;
134
135 pos = *ppos;
136
137 if ( pos < 0) {
138 ret = -EINVAL;
139 goto bail;
140 }
141
142 if (pos >= sizeof(struct ipath_flash)) {
143 ret = 0;
144 goto bail;
145 }
146
147 if (count > sizeof(struct ipath_flash) - pos)
148 count = sizeof(struct ipath_flash) - pos;
149
150 tmp = kmalloc(count, GFP_KERNEL);
151 if (!tmp) {
152 ret = -ENOMEM;
153 goto bail;
154 }
155
156 dd = file_inode(file)->i_private;
157 if (ipath_eeprom_read(dd, pos, tmp, count)) {
158 ipath_dev_err(dd, "failed to read from flash\n");
159 ret = -ENXIO;
160 goto bail_tmp;
161 }
162
163 if (copy_to_user(buf, tmp, count)) {
164 ret = -EFAULT;
165 goto bail_tmp;
166 }
167
168 *ppos = pos + count;
169 ret = count;
170
171bail_tmp:
172 kfree(tmp);
173
174bail:
175 return ret;
176}
177
178static ssize_t flash_write(struct file *file, const char __user *buf,
179 size_t count, loff_t *ppos)
180{
181 struct ipath_devdata *dd;
182 ssize_t ret;
183 loff_t pos;
184 char *tmp;
185
186 pos = *ppos;
187
188 if (pos != 0) {
189 ret = -EINVAL;
190 goto bail;
191 }
192
193 if (count != sizeof(struct ipath_flash)) {
194 ret = -EINVAL;
195 goto bail;
196 }
197
198 tmp = memdup_user(buf, count);
199 if (IS_ERR(tmp))
200 return PTR_ERR(tmp);
201
202 dd = file_inode(file)->i_private;
203 if (ipath_eeprom_write(dd, pos, tmp, count)) {
204 ret = -ENXIO;
205 ipath_dev_err(dd, "failed to write to flash\n");
206 goto bail_tmp;
207 }
208
209 *ppos = pos + count;
210 ret = count;
211
212bail_tmp:
213 kfree(tmp);
214
215bail:
216 return ret;
217}
218
219static const struct file_operations flash_ops = {
220 .read = flash_read,
221 .write = flash_write,
222 .llseek = default_llseek,
223};
224
225static int create_device_files(struct super_block *sb,
226 struct ipath_devdata *dd)
227{
228 struct dentry *dir, *tmp;
229 char unit[10];
230 int ret;
231
232 snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
233 ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
234 &simple_dir_operations, dd);
235 if (ret) {
236 printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
237 goto bail;
238 }
239
240 ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp,
241 &atomic_counters_ops, dd);
242 if (ret) {
243 printk(KERN_ERR "create_file(%s/atomic_counters) "
244 "failed: %d\n", unit, ret);
245 goto bail;
246 }
247
248 ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
249 &flash_ops, dd);
250 if (ret) {
251 printk(KERN_ERR "create_file(%s/flash) "
252 "failed: %d\n", unit, ret);
253 goto bail;
254 }
255
256bail:
257 return ret;
258}
259
260static int remove_file(struct dentry *parent, char *name)
261{
262 struct dentry *tmp;
263 int ret;
264
265 tmp = lookup_one_len(name, parent, strlen(name));
266
267 if (IS_ERR(tmp)) {
268 ret = PTR_ERR(tmp);
269 goto bail;
270 }
271
272 spin_lock(&tmp->d_lock);
273 if (simple_positive(tmp)) {
274 dget_dlock(tmp);
275 __d_drop(tmp);
276 spin_unlock(&tmp->d_lock);
277 simple_unlink(d_inode(parent), tmp);
278 } else
279 spin_unlock(&tmp->d_lock);
280
281 ret = 0;
282bail:
283 /*
284 * We don't expect clients to care about the return value, but
285 * it's there if they need it.
286 */
287 return ret;
288}
289
290static int remove_device_files(struct super_block *sb,
291 struct ipath_devdata *dd)
292{
293 struct dentry *dir, *root;
294 char unit[10];
295 int ret;
296
297 root = dget(sb->s_root);
298 inode_lock(d_inode(root));
299 snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
300 dir = lookup_one_len(unit, root, strlen(unit));
301
302 if (IS_ERR(dir)) {
303 ret = PTR_ERR(dir);
304 printk(KERN_ERR "Lookup of %s failed\n", unit);
305 goto bail;
306 }
307
308 remove_file(dir, "flash");
309 remove_file(dir, "atomic_counters");
310 d_delete(dir);
311 ret = simple_rmdir(d_inode(root), dir);
312
313bail:
314 inode_unlock(d_inode(root));
315 dput(root);
316 return ret;
317}
318
319static int ipathfs_fill_super(struct super_block *sb, void *data,
320 int silent)
321{
322 struct ipath_devdata *dd, *tmp;
323 unsigned long flags;
324 int ret;
325
326 static struct tree_descr files[] = {
327 [2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
328 {""},
329 };
330
331 ret = simple_fill_super(sb, IPATHFS_MAGIC, files);
332 if (ret) {
333 printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
334 goto bail;
335 }
336
337 spin_lock_irqsave(&ipath_devs_lock, flags);
338
339 list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
340 spin_unlock_irqrestore(&ipath_devs_lock, flags);
341 ret = create_device_files(sb, dd);
342 if (ret)
343 goto bail;
344 spin_lock_irqsave(&ipath_devs_lock, flags);
345 }
346
347 spin_unlock_irqrestore(&ipath_devs_lock, flags);
348
349bail:
350 return ret;
351}
352
353static struct dentry *ipathfs_mount(struct file_system_type *fs_type,
354 int flags, const char *dev_name, void *data)
355{
356 struct dentry *ret;
357 ret = mount_single(fs_type, flags, data, ipathfs_fill_super);
358 if (!IS_ERR(ret))
359 ipath_super = ret->d_sb;
360 return ret;
361}
362
363static void ipathfs_kill_super(struct super_block *s)
364{
365 kill_litter_super(s);
366 ipath_super = NULL;
367}
368
369int ipathfs_add_device(struct ipath_devdata *dd)
370{
371 int ret;
372
373 if (ipath_super == NULL) {
374 ret = 0;
375 goto bail;
376 }
377
378 ret = create_device_files(ipath_super, dd);
379
380bail:
381 return ret;
382}
383
384int ipathfs_remove_device(struct ipath_devdata *dd)
385{
386 int ret;
387
388 if (ipath_super == NULL) {
389 ret = 0;
390 goto bail;
391 }
392
393 ret = remove_device_files(ipath_super, dd);
394
395bail:
396 return ret;
397}
398
399static struct file_system_type ipathfs_fs_type = {
400 .owner = THIS_MODULE,
401 .name = "ipathfs",
402 .mount = ipathfs_mount,
403 .kill_sb = ipathfs_kill_super,
404};
405MODULE_ALIAS_FS("ipathfs");
406
407int __init ipath_init_ipathfs(void)
408{
409 return register_filesystem(&ipathfs_fs_type);
410}
411
412void __exit ipath_exit_ipathfs(void)
413{
414 unregister_filesystem(&ipathfs_fs_type);
415}
diff --git a/drivers/staging/rdma/ipath/ipath_iba6110.c b/drivers/staging/rdma/ipath/ipath_iba6110.c
deleted file mode 100644
index 5f13572a5e24..000000000000
--- a/drivers/staging/rdma/ipath/ipath_iba6110.c
+++ /dev/null
@@ -1,1939 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34/*
35 * This file contains all of the code that is specific to the InfiniPath
36 * HT chip.
37 */
38
39#include <linux/vmalloc.h>
40#include <linux/pci.h>
41#include <linux/delay.h>
42#include <linux/htirq.h>
43#include <rdma/ib_verbs.h>
44
45#include "ipath_kernel.h"
46#include "ipath_registers.h"
47
48static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
49
50
51/*
52 * This lists the InfiniPath registers, in the actual chip layout.
53 * This structure should never be directly accessed.
54 *
55 * The names are in InterCap form because they're taken straight from
56 * the chip specification. Since they're only used in this file, they
57 * don't pollute the rest of the source.
58*/
59
60struct _infinipath_do_not_use_kernel_regs {
61 unsigned long long Revision;
62 unsigned long long Control;
63 unsigned long long PageAlign;
64 unsigned long long PortCnt;
65 unsigned long long DebugPortSelect;
66 unsigned long long DebugPort;
67 unsigned long long SendRegBase;
68 unsigned long long UserRegBase;
69 unsigned long long CounterRegBase;
70 unsigned long long Scratch;
71 unsigned long long ReservedMisc1;
72 unsigned long long InterruptConfig;
73 unsigned long long IntBlocked;
74 unsigned long long IntMask;
75 unsigned long long IntStatus;
76 unsigned long long IntClear;
77 unsigned long long ErrorMask;
78 unsigned long long ErrorStatus;
79 unsigned long long ErrorClear;
80 unsigned long long HwErrMask;
81 unsigned long long HwErrStatus;
82 unsigned long long HwErrClear;
83 unsigned long long HwDiagCtrl;
84 unsigned long long MDIO;
85 unsigned long long IBCStatus;
86 unsigned long long IBCCtrl;
87 unsigned long long ExtStatus;
88 unsigned long long ExtCtrl;
89 unsigned long long GPIOOut;
90 unsigned long long GPIOMask;
91 unsigned long long GPIOStatus;
92 unsigned long long GPIOClear;
93 unsigned long long RcvCtrl;
94 unsigned long long RcvBTHQP;
95 unsigned long long RcvHdrSize;
96 unsigned long long RcvHdrCnt;
97 unsigned long long RcvHdrEntSize;
98 unsigned long long RcvTIDBase;
99 unsigned long long RcvTIDCnt;
100 unsigned long long RcvEgrBase;
101 unsigned long long RcvEgrCnt;
102 unsigned long long RcvBufBase;
103 unsigned long long RcvBufSize;
104 unsigned long long RxIntMemBase;
105 unsigned long long RxIntMemSize;
106 unsigned long long RcvPartitionKey;
107 unsigned long long ReservedRcv[10];
108 unsigned long long SendCtrl;
109 unsigned long long SendPIOBufBase;
110 unsigned long long SendPIOSize;
111 unsigned long long SendPIOBufCnt;
112 unsigned long long SendPIOAvailAddr;
113 unsigned long long TxIntMemBase;
114 unsigned long long TxIntMemSize;
115 unsigned long long ReservedSend[9];
116 unsigned long long SendBufferError;
117 unsigned long long SendBufferErrorCONT1;
118 unsigned long long SendBufferErrorCONT2;
119 unsigned long long SendBufferErrorCONT3;
120 unsigned long long ReservedSBE[4];
121 unsigned long long RcvHdrAddr0;
122 unsigned long long RcvHdrAddr1;
123 unsigned long long RcvHdrAddr2;
124 unsigned long long RcvHdrAddr3;
125 unsigned long long RcvHdrAddr4;
126 unsigned long long RcvHdrAddr5;
127 unsigned long long RcvHdrAddr6;
128 unsigned long long RcvHdrAddr7;
129 unsigned long long RcvHdrAddr8;
130 unsigned long long ReservedRHA[7];
131 unsigned long long RcvHdrTailAddr0;
132 unsigned long long RcvHdrTailAddr1;
133 unsigned long long RcvHdrTailAddr2;
134 unsigned long long RcvHdrTailAddr3;
135 unsigned long long RcvHdrTailAddr4;
136 unsigned long long RcvHdrTailAddr5;
137 unsigned long long RcvHdrTailAddr6;
138 unsigned long long RcvHdrTailAddr7;
139 unsigned long long RcvHdrTailAddr8;
140 unsigned long long ReservedRHTA[7];
141 unsigned long long Sync; /* Software only */
142 unsigned long long Dump; /* Software only */
143 unsigned long long SimVer; /* Software only */
144 unsigned long long ReservedSW[5];
145 unsigned long long SerdesConfig0;
146 unsigned long long SerdesConfig1;
147 unsigned long long SerdesStatus;
148 unsigned long long XGXSConfig;
149 unsigned long long ReservedSW2[4];
150};
151
152struct _infinipath_do_not_use_counters {
153 __u64 LBIntCnt;
154 __u64 LBFlowStallCnt;
155 __u64 Reserved1;
156 __u64 TxUnsupVLErrCnt;
157 __u64 TxDataPktCnt;
158 __u64 TxFlowPktCnt;
159 __u64 TxDwordCnt;
160 __u64 TxLenErrCnt;
161 __u64 TxMaxMinLenErrCnt;
162 __u64 TxUnderrunCnt;
163 __u64 TxFlowStallCnt;
164 __u64 TxDroppedPktCnt;
165 __u64 RxDroppedPktCnt;
166 __u64 RxDataPktCnt;
167 __u64 RxFlowPktCnt;
168 __u64 RxDwordCnt;
169 __u64 RxLenErrCnt;
170 __u64 RxMaxMinLenErrCnt;
171 __u64 RxICRCErrCnt;
172 __u64 RxVCRCErrCnt;
173 __u64 RxFlowCtrlErrCnt;
174 __u64 RxBadFormatCnt;
175 __u64 RxLinkProblemCnt;
176 __u64 RxEBPCnt;
177 __u64 RxLPCRCErrCnt;
178 __u64 RxBufOvflCnt;
179 __u64 RxTIDFullErrCnt;
180 __u64 RxTIDValidErrCnt;
181 __u64 RxPKeyMismatchCnt;
182 __u64 RxP0HdrEgrOvflCnt;
183 __u64 RxP1HdrEgrOvflCnt;
184 __u64 RxP2HdrEgrOvflCnt;
185 __u64 RxP3HdrEgrOvflCnt;
186 __u64 RxP4HdrEgrOvflCnt;
187 __u64 RxP5HdrEgrOvflCnt;
188 __u64 RxP6HdrEgrOvflCnt;
189 __u64 RxP7HdrEgrOvflCnt;
190 __u64 RxP8HdrEgrOvflCnt;
191 __u64 Reserved6;
192 __u64 Reserved7;
193 __u64 IBStatusChangeCnt;
194 __u64 IBLinkErrRecoveryCnt;
195 __u64 IBLinkDownedCnt;
196 __u64 IBSymbolErrCnt;
197};
198
199#define IPATH_KREG_OFFSET(field) (offsetof( \
200 struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
201#define IPATH_CREG_OFFSET(field) (offsetof( \
202 struct _infinipath_do_not_use_counters, field) / sizeof(u64))
203
204static const struct ipath_kregs ipath_ht_kregs = {
205 .kr_control = IPATH_KREG_OFFSET(Control),
206 .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
207 .kr_debugport = IPATH_KREG_OFFSET(DebugPort),
208 .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
209 .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
210 .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
211 .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
212 .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
213 .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
214 .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
215 .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
216 .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
217 .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
218 .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
219 .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
220 .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
221 .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
222 .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
223 .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
224 .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
225 .kr_intclear = IPATH_KREG_OFFSET(IntClear),
226 .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig),
227 .kr_intmask = IPATH_KREG_OFFSET(IntMask),
228 .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
229 .kr_mdio = IPATH_KREG_OFFSET(MDIO),
230 .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
231 .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
232 .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
233 .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
234 .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
235 .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
236 .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
237 .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
238 .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
239 .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
240 .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
241 .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
242 .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
243 .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
244 .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
245 .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
246 .kr_revision = IPATH_KREG_OFFSET(Revision),
247 .kr_scratch = IPATH_KREG_OFFSET(Scratch),
248 .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
249 .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
250 .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
251 .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
252 .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
253 .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
254 .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
255 .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
256 .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
257 .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
258 .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
259 .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
260 .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
261 .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
262 /*
263 * These should not be used directly via ipath_write_kreg64(),
264 * use them with ipath_write_kreg64_port(),
265 */
266 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
267 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
268};
269
270static const struct ipath_cregs ipath_ht_cregs = {
271 .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
272 .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
273 .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
274 .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
275 .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
276 .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
277 .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
278 .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
279 .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
280 .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
281 .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
282 .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
283 /* calc from Reg_CounterRegBase + offset */
284 .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
285 .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
286 .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
287 .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
288 .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
289 .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
290 .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
291 .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
292 .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
293 .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
294 .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
295 .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
296 .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
297 .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
298 .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
299 .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
300 .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
301 .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
302 .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
303 .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
304 .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
305};
306
307/* kr_intstatus, kr_intclear, kr_intmask bits */
308#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
309#define INFINIPATH_I_RCVURG_SHIFT 0
310#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
311#define INFINIPATH_I_RCVAVAIL_SHIFT 12
312
313/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
314#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
315#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL
316#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR 0x0000000000800000ULL
317#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR 0x0000000001000000ULL
318#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR 0x0000000002000000ULL
319#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR 0x0000000004000000ULL
320#define INFINIPATH_HWE_HTCMISCERR4 0x0000000008000000ULL
321#define INFINIPATH_HWE_HTCMISCERR5 0x0000000010000000ULL
322#define INFINIPATH_HWE_HTCMISCERR6 0x0000000020000000ULL
323#define INFINIPATH_HWE_HTCMISCERR7 0x0000000040000000ULL
324#define INFINIPATH_HWE_HTCBUSTREQPARITYERR 0x0000000080000000ULL
325#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL
326#define INFINIPATH_HWE_HTCBUSIREQPARITYERR 0x0000000200000000ULL
327#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
328#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
329#define INFINIPATH_HWE_HTBPLL_FBSLIP 0x0200000000000000ULL
330#define INFINIPATH_HWE_HTBPLL_RFSLIP 0x0400000000000000ULL
331#define INFINIPATH_HWE_HTAPLL_FBSLIP 0x0800000000000000ULL
332#define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL
333#define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL
334
335#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf
336#define IBA6110_IBCS_LINKSTATE_SHIFT 4
337
338/* kr_extstatus bits */
339#define INFINIPATH_EXTS_FREQSEL 0x2
340#define INFINIPATH_EXTS_SERDESSEL 0x4
341#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
342#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
343
344
345/* TID entries (memory), HT-only */
346#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
347#define INFINIPATH_RT_VALID 0x8000000000000000ULL
348#define INFINIPATH_RT_ADDR_SHIFT 0
349#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
350#define INFINIPATH_RT_BUFSIZE_SHIFT 48
351
352#define INFINIPATH_R_INTRAVAIL_SHIFT 16
353#define INFINIPATH_R_TAILUPD_SHIFT 31
354
355/* kr_xgxsconfig bits */
356#define INFINIPATH_XGXS_RESET 0x7ULL
357
358/*
359 * masks and bits that are different in different chips, or present only
360 * in one
361 */
362static const ipath_err_t infinipath_hwe_htcmemparityerr_mask =
363 INFINIPATH_HWE_HTCMEMPARITYERR_MASK;
364static const ipath_err_t infinipath_hwe_htcmemparityerr_shift =
365 INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT;
366
367static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr =
368 INFINIPATH_HWE_HTCLNKABYTE0CRCERR;
369static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr =
370 INFINIPATH_HWE_HTCLNKABYTE1CRCERR;
371static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr =
372 INFINIPATH_HWE_HTCLNKBBYTE0CRCERR;
373static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
374 INFINIPATH_HWE_HTCLNKBBYTE1CRCERR;
375
376#define _IPATH_GPIO_SDA_NUM 1
377#define _IPATH_GPIO_SCL_NUM 0
378
379#define IPATH_GPIO_SDA \
380 (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
381#define IPATH_GPIO_SCL \
382 (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
383
384/* keep the code below somewhat more readable; not used elsewhere */
385#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \
386 infinipath_hwe_htclnkabyte1crcerr)
387#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \
388 infinipath_hwe_htclnkbbyte1crcerr)
389#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \
390 infinipath_hwe_htclnkbbyte0crcerr)
391#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr | \
392 infinipath_hwe_htclnkbbyte1crcerr)
393
394static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
395 char *msg, size_t msgl)
396{
397 char bitsmsg[64];
398 ipath_err_t crcbits = hwerrs &
399 (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS);
400 /* don't check if 8bit HT */
401 if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
402 crcbits &= ~infinipath_hwe_htclnkabyte1crcerr;
403 /* don't check if 8bit HT */
404 if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
405 crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr;
406 /*
407 * we'll want to ignore link errors on link that is
408 * not in use, if any. For now, complain about both
409 */
410 if (crcbits) {
411 u16 ctrl0, ctrl1;
412 snprintf(bitsmsg, sizeof bitsmsg,
413 "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
414 !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
415 "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
416 ? "1 (B)" : "0+1 (A+B)"),
417 !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0"
418 : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" :
419 "0+1"), (unsigned long long) crcbits);
420 strlcat(msg, bitsmsg, msgl);
421
422 /*
423 * print extra info for debugging. slave/primary
424 * config word 4, 8 (link control 0, 1)
425 */
426
427 if (pci_read_config_word(dd->pcidev,
428 dd->ipath_ht_slave_off + 0x4,
429 &ctrl0))
430 dev_info(&dd->pcidev->dev, "Couldn't read "
431 "linkctrl0 of slave/primary "
432 "config block\n");
433 else if (!(ctrl0 & 1 << 6))
434 /* not if EOC bit set */
435 ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0,
436 ((ctrl0 >> 8) & 7) ? " CRC" : "",
437 ((ctrl0 >> 4) & 1) ? "linkfail" :
438 "");
439 if (pci_read_config_word(dd->pcidev,
440 dd->ipath_ht_slave_off + 0x8,
441 &ctrl1))
442 dev_info(&dd->pcidev->dev, "Couldn't read "
443 "linkctrl1 of slave/primary "
444 "config block\n");
445 else if (!(ctrl1 & 1 << 6))
446 /* not if EOC bit set */
447 ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1,
448 ((ctrl1 >> 8) & 7) ? " CRC" : "",
449 ((ctrl1 >> 4) & 1) ? "linkfail" :
450 "");
451
452 /* disable until driver reloaded */
453 dd->ipath_hwerrmask &= ~crcbits;
454 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
455 dd->ipath_hwerrmask);
456 ipath_dbg("HT crc errs: %s\n", msg);
457 } else
458 ipath_dbg("ignoring HT crc errors 0x%llx, "
459 "not in use\n", (unsigned long long)
460 (hwerrs & (_IPATH_HTLINK0_CRCBITS |
461 _IPATH_HTLINK1_CRCBITS)));
462}
463
464/* 6110 specific hardware errors... */
465static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
466 INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
467 INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
468 INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
469 INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
470 INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
471 INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
472 INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
473 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
474};
475
476#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
477 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
478 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
479#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
480 << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
481
482static void ipath_ht_txe_recover(struct ipath_devdata *dd)
483{
484 ++ipath_stats.sps_txeparity;
485 dev_info(&dd->pcidev->dev,
486 "Recovering from TXE PIO parity error\n");
487}
488
489
490/**
491 * ipath_ht_handle_hwerrors - display hardware errors.
492 * @dd: the infinipath device
493 * @msg: the output buffer
494 * @msgl: the size of the output buffer
495 *
496 * Use same msg buffer as regular errors to avoid excessive stack
497 * use. Most hardware errors are catastrophic, but for right now,
498 * we'll print them and continue. We reuse the same message buffer as
499 * ipath_handle_errors() to avoid excessive stack usage.
500 */
501static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
502 size_t msgl)
503{
504 ipath_err_t hwerrs;
505 u32 bits, ctrl;
506 int isfatal = 0;
507 char bitsmsg[64];
508 int log_idx;
509
510 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
511
512 if (!hwerrs) {
513 ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
514 /*
515 * better than printing cofusing messages
516 * This seems to be related to clearing the crc error, or
517 * the pll error during init.
518 */
519 goto bail;
520 } else if (hwerrs == -1LL) {
521 ipath_dev_err(dd, "Read of hardware error status failed "
522 "(all bits set); ignoring\n");
523 goto bail;
524 }
525 ipath_stats.sps_hwerrs++;
526
527 /* Always clear the error status register, except MEMBISTFAIL,
528 * regardless of whether we continue or stop using the chip.
529 * We want that set so we know it failed, even across driver reload.
530 * We'll still ignore it in the hwerrmask. We do this partly for
531 * diagnostics, but also for support */
532 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
533 hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
534
535 hwerrs &= dd->ipath_hwerrmask;
536
537 /* We log some errors to EEPROM, check if we have any of those. */
538 for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
539 if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
540 ipath_inc_eeprom_err(dd, log_idx, 1);
541
542 /*
543 * make sure we get this much out, unless told to be quiet,
544 * it's a parity error we may recover from,
545 * or it's occurred within the last 5 seconds
546 */
547 if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
548 RXE_EAGER_PARITY)) ||
549 (ipath_debug & __IPATH_VERBDBG))
550 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
551 "(cleared)\n", (unsigned long long) hwerrs);
552 dd->ipath_lasthwerror |= hwerrs;
553
554 if (hwerrs & ~dd->ipath_hwe_bitsextant)
555 ipath_dev_err(dd, "hwerror interrupt with unknown errors "
556 "%llx set\n", (unsigned long long)
557 (hwerrs & ~dd->ipath_hwe_bitsextant));
558
559 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
560 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
561 /*
562 * parity errors in send memory are recoverable,
563 * just cancel the send (if indicated in * sendbuffererror),
564 * count the occurrence, unfreeze (if no other handled
565 * hardware error bits are set), and continue. They can
566 * occur if a processor speculative read is done to the PIO
567 * buffer while we are sending a packet, for example.
568 */
569 if (hwerrs & TXE_PIO_PARITY) {
570 ipath_ht_txe_recover(dd);
571 hwerrs &= ~TXE_PIO_PARITY;
572 }
573
574 if (!hwerrs) {
575 ipath_dbg("Clearing freezemode on ignored or "
576 "recovered hardware error\n");
577 ipath_clear_freeze(dd);
578 }
579 }
580
581 *msg = '\0';
582
583 /*
584 * may someday want to decode into which bits are which
585 * functional area for parity errors, etc.
586 */
587 if (hwerrs & (infinipath_hwe_htcmemparityerr_mask
588 << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) {
589 bits = (u32) ((hwerrs >>
590 INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) &
591 INFINIPATH_HWE_HTCMEMPARITYERR_MASK);
592 snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ",
593 bits);
594 strlcat(msg, bitsmsg, msgl);
595 }
596
597 ipath_format_hwerrors(hwerrs,
598 ipath_6110_hwerror_msgs,
599 ARRAY_SIZE(ipath_6110_hwerror_msgs),
600 msg, msgl);
601
602 if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
603 hwerr_crcbits(dd, hwerrs, msg, msgl);
604
605 if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
606 strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
607 msgl);
608 /* ignore from now on, so disable until driver reloaded */
609 dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
610 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
611 dd->ipath_hwerrmask);
612 }
613#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
614 INFINIPATH_HWE_COREPLL_RFSLIP | \
615 INFINIPATH_HWE_HTBPLL_FBSLIP | \
616 INFINIPATH_HWE_HTBPLL_RFSLIP | \
617 INFINIPATH_HWE_HTAPLL_FBSLIP | \
618 INFINIPATH_HWE_HTAPLL_RFSLIP)
619
620 if (hwerrs & _IPATH_PLL_FAIL) {
621 snprintf(bitsmsg, sizeof bitsmsg,
622 "[PLL failed (%llx), InfiniPath hardware unusable]",
623 (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
624 strlcat(msg, bitsmsg, msgl);
625 /* ignore from now on, so disable until driver reloaded */
626 dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
627 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
628 dd->ipath_hwerrmask);
629 }
630
631 if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
632 /*
633 * If it occurs, it is left masked since the eternal
634 * interface is unused
635 */
636 dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
637 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
638 dd->ipath_hwerrmask);
639 }
640
641 if (hwerrs) {
642 /*
643 * if any set that we aren't ignoring; only
644 * make the complaint once, in case it's stuck
645 * or recurring, and we get here multiple
646 * times.
647 * force link down, so switch knows, and
648 * LEDs are turned off
649 */
650 if (dd->ipath_flags & IPATH_INITTED) {
651 ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
652 ipath_setup_ht_setextled(dd,
653 INFINIPATH_IBCS_L_STATE_DOWN,
654 INFINIPATH_IBCS_LT_STATE_DISABLED);
655 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
656 "mode), no longer usable, SN %.16s\n",
657 dd->ipath_serial);
658 isfatal = 1;
659 }
660 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
661 /* mark as having had error */
662 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
663 /*
664 * mark as not usable, at a minimum until driver
665 * is reloaded, probably until reboot, since no
666 * other reset is possible.
667 */
668 dd->ipath_flags &= ~IPATH_INITTED;
669 } else {
670 *msg = 0; /* recovered from all of them */
671 }
672 if (*msg)
673 ipath_dev_err(dd, "%s hardware error\n", msg);
674 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
675 /*
676 * for status file; if no trailing brace is copied,
677 * we'll know it was truncated.
678 */
679 snprintf(dd->ipath_freezemsg,
680 dd->ipath_freezelen, "{%s}", msg);
681
682bail:;
683}
684
685/**
686 * ipath_ht_boardname - fill in the board name
687 * @dd: the infinipath device
688 * @name: the output buffer
689 * @namelen: the size of the output buffer
690 *
691 * fill in the board name, based on the board revision register
692 */
693static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
694 size_t namelen)
695{
696 char *n = NULL;
697 u8 boardrev = dd->ipath_boardrev;
698 int ret = 0;
699
700 switch (boardrev) {
701 case 5:
702 /*
703 * original production board; two production levels, with
704 * different serial number ranges. See ipath_ht_early_init() for
705 * case where we enable IPATH_GPIO_INTR for later serial # range.
706 * Original 112* serial number is no longer supported.
707 */
708 n = "InfiniPath_QHT7040";
709 break;
710 case 7:
711 /* small form factor production board */
712 n = "InfiniPath_QHT7140";
713 break;
714 default: /* don't know, just print the number */
715 ipath_dev_err(dd, "Don't yet know about board "
716 "with ID %u\n", boardrev);
717 snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
718 boardrev);
719 break;
720 }
721 if (n)
722 snprintf(name, namelen, "%s", n);
723
724 if (ret) {
725 ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
726 goto bail;
727 }
728 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
729 dd->ipath_minrev > 4)) {
730 /*
731 * This version of the driver only supports Rev 3.2 - 3.4
732 */
733 ipath_dev_err(dd,
734 "Unsupported InfiniPath hardware revision %u.%u!\n",
735 dd->ipath_majrev, dd->ipath_minrev);
736 ret = 1;
737 goto bail;
738 }
739 /*
740 * pkt/word counters are 32 bit, and therefore wrap fast enough
741 * that we snapshot them from a timer, and maintain 64 bit shadow
742 * copies
743 */
744 dd->ipath_flags |= IPATH_32BITCOUNTERS;
745 dd->ipath_flags |= IPATH_GPIO_INTR;
746 if (dd->ipath_lbus_speed != 800)
747 ipath_dev_err(dd,
748 "Incorrectly configured for HT @ %uMHz\n",
749 dd->ipath_lbus_speed);
750
751 /*
752 * set here, not in ipath_init_*_funcs because we have to do
753 * it after we can read chip registers.
754 */
755 dd->ipath_ureg_align =
756 ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
757
758bail:
759 return ret;
760}
761
762static void ipath_check_htlink(struct ipath_devdata *dd)
763{
764 u8 linkerr, link_off, i;
765
766 for (i = 0; i < 2; i++) {
767 link_off = dd->ipath_ht_slave_off + i * 4 + 0xd;
768 if (pci_read_config_byte(dd->pcidev, link_off, &linkerr))
769 dev_info(&dd->pcidev->dev, "Couldn't read "
770 "linkerror%d of HT slave/primary block\n",
771 i);
772 else if (linkerr & 0xf0) {
773 ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
774 "clearing\n", linkerr >> 4, i);
775 /*
776 * writing the linkerr bits that are set should
777 * clear them
778 */
779 if (pci_write_config_byte(dd->pcidev, link_off,
780 linkerr))
781 ipath_dbg("Failed write to clear HT "
782 "linkerror%d\n", i);
783 if (pci_read_config_byte(dd->pcidev, link_off,
784 &linkerr))
785 dev_info(&dd->pcidev->dev,
786 "Couldn't reread linkerror%d of "
787 "HT slave/primary block\n", i);
788 else if (linkerr & 0xf0)
789 dev_info(&dd->pcidev->dev,
790 "HT linkerror%d bits 0x%x "
791 "couldn't be cleared\n",
792 i, linkerr >> 4);
793 }
794 }
795}
796
797static int ipath_setup_ht_reset(struct ipath_devdata *dd)
798{
799 ipath_dbg("No reset possible for this InfiniPath hardware\n");
800 return 0;
801}
802
803#define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */
804#define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */
805
806/*
807 * Bits 13-15 of command==0 is slave/primary block. Clear any HT CRC
808 * errors. We only bother to do this at load time, because it's OK if
809 * it happened before we were loaded (first time after boot/reset),
810 * but any time after that, it's fatal anyway. Also need to not check
811 * for upper byte errors if we are in 8 bit mode, so figure out
812 * our width. For now, at least, also complain if it's 8 bit.
813 */
814static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
815 int pos, u8 cap_type)
816{
817 u8 linkwidth = 0, linkerr, link_a_b_off, link_off;
818 u16 linkctrl = 0;
819 int i;
820
821 dd->ipath_ht_slave_off = pos;
822 /* command word, master_host bit */
823 /* master host || slave */
824 if ((cap_type >> 2) & 1)
825 link_a_b_off = 4;
826 else
827 link_a_b_off = 0;
828 ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n",
829 link_a_b_off ? 1 : 0,
830 link_a_b_off ? 'B' : 'A');
831
832 link_a_b_off += pos;
833
834 /*
835 * check both link control registers; clear both HT CRC sets if
836 * necessary.
837 */
838 for (i = 0; i < 2; i++) {
839 link_off = pos + i * 4 + 0x4;
840 if (pci_read_config_word(pdev, link_off, &linkctrl))
841 ipath_dev_err(dd, "Couldn't read HT link control%d "
842 "register\n", i);
843 else if (linkctrl & (0xf << 8)) {
844 ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error "
845 "bits %x\n", i, linkctrl & (0xf << 8));
846 /*
847 * now write them back to clear the error.
848 */
849 pci_write_config_word(pdev, link_off,
850 linkctrl & (0xf << 8));
851 }
852 }
853
854 /*
855 * As with HT CRC bits, same for protocol errors that might occur
856 * during boot.
857 */
858 for (i = 0; i < 2; i++) {
859 link_off = pos + i * 4 + 0xd;
860 if (pci_read_config_byte(pdev, link_off, &linkerr))
861 dev_info(&pdev->dev, "Couldn't read linkerror%d "
862 "of HT slave/primary block\n", i);
863 else if (linkerr & 0xf0) {
864 ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
865 "clearing\n", linkerr >> 4, i);
866 /*
867 * writing the linkerr bits that are set will clear
868 * them
869 */
870 if (pci_write_config_byte
871 (pdev, link_off, linkerr))
872 ipath_dbg("Failed write to clear HT "
873 "linkerror%d\n", i);
874 if (pci_read_config_byte(pdev, link_off, &linkerr))
875 dev_info(&pdev->dev, "Couldn't reread "
876 "linkerror%d of HT slave/primary "
877 "block\n", i);
878 else if (linkerr & 0xf0)
879 dev_info(&pdev->dev, "HT linkerror%d bits "
880 "0x%x couldn't be cleared\n",
881 i, linkerr >> 4);
882 }
883 }
884
885 /*
886 * this is just for our link to the host, not devices connected
887 * through tunnel.
888 */
889
890 if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth))
891 ipath_dev_err(dd, "Couldn't read HT link width "
892 "config register\n");
893 else {
894 u32 width;
895 switch (linkwidth & 7) {
896 case 5:
897 width = 4;
898 break;
899 case 4:
900 width = 2;
901 break;
902 case 3:
903 width = 32;
904 break;
905 case 1:
906 width = 16;
907 break;
908 case 0:
909 default: /* if wrong, assume 8 bit */
910 width = 8;
911 break;
912 }
913
914 dd->ipath_lbus_width = width;
915
916 if (linkwidth != 0x11) {
917 ipath_dev_err(dd, "Not configured for 16 bit HT "
918 "(%x)\n", linkwidth);
919 if (!(linkwidth & 0xf)) {
920 ipath_dbg("Will ignore HT lane1 errors\n");
921 dd->ipath_flags |= IPATH_8BIT_IN_HT0;
922 }
923 }
924 }
925
926 /*
927 * this is just for our link to the host, not devices connected
928 * through tunnel.
929 */
930 if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth))
931 ipath_dev_err(dd, "Couldn't read HT link frequency "
932 "config register\n");
933 else {
934 u32 speed;
935 switch (linkwidth & 0xf) {
936 case 6:
937 speed = 1000;
938 break;
939 case 5:
940 speed = 800;
941 break;
942 case 4:
943 speed = 600;
944 break;
945 case 3:
946 speed = 500;
947 break;
948 case 2:
949 speed = 400;
950 break;
951 case 1:
952 speed = 300;
953 break;
954 default:
955 /*
956 * assume reserved and vendor-specific are 200...
957 */
958 case 0:
959 speed = 200;
960 break;
961 }
962 dd->ipath_lbus_speed = speed;
963 }
964
965 snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
966 "HyperTransport,%uMHz,x%u\n",
967 dd->ipath_lbus_speed,
968 dd->ipath_lbus_width);
969}
970
971static int ipath_ht_intconfig(struct ipath_devdata *dd)
972{
973 int ret;
974
975 if (dd->ipath_intconfig) {
976 ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
977 dd->ipath_intconfig); /* interrupt address */
978 ret = 0;
979 } else {
980 ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
981 "interrupt address\n");
982 ret = -EINVAL;
983 }
984
985 return ret;
986}
987
988static void ipath_ht_irq_update(struct pci_dev *dev, int irq,
989 struct ht_irq_msg *msg)
990{
991 struct ipath_devdata *dd = pci_get_drvdata(dev);
992 u64 prev_intconfig = dd->ipath_intconfig;
993
994 dd->ipath_intconfig = msg->address_lo;
995 dd->ipath_intconfig |= ((u64) msg->address_hi) << 32;
996
997 /*
998 * If the previous value of dd->ipath_intconfig is zero, we're
999 * getting configured for the first time, and must not program the
1000 * intconfig register here (it will be programmed later, when the
1001 * hardware is ready). Otherwise, we should.
1002 */
1003 if (prev_intconfig)
1004 ipath_ht_intconfig(dd);
1005}
1006
1007/**
1008 * ipath_setup_ht_config - setup the interruptconfig register
1009 * @dd: the infinipath device
1010 * @pdev: the PCI device
1011 *
1012 * setup the interruptconfig register from the HT config info.
1013 * Also clear CRC errors in HT linkcontrol, if necessary.
1014 * This is done only for the real hardware. It is done before
1015 * chip address space is initted, so can't touch infinipath registers
1016 */
1017static int ipath_setup_ht_config(struct ipath_devdata *dd,
1018 struct pci_dev *pdev)
1019{
1020 int pos, ret;
1021
1022 ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update);
1023 if (ret < 0) {
1024 ipath_dev_err(dd, "Couldn't create interrupt handler: "
1025 "err %d\n", ret);
1026 goto bail;
1027 }
1028 dd->ipath_irq = ret;
1029 ret = 0;
1030
1031 /*
1032 * Handle clearing CRC errors in linkctrl register if necessary. We
1033 * do this early, before we ever enable errors or hardware errors,
1034 * mostly to avoid causing the chip to enter freeze mode.
1035 */
1036 pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
1037 if (!pos) {
1038 ipath_dev_err(dd, "Couldn't find HyperTransport "
1039 "capability; no interrupts\n");
1040 ret = -ENODEV;
1041 goto bail;
1042 }
1043 do {
1044 u8 cap_type;
1045
1046 /*
1047 * The HT capability type byte is 3 bytes after the
1048 * capability byte.
1049 */
1050 if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
1051 dev_info(&pdev->dev, "Couldn't read config "
1052 "command @ %d\n", pos);
1053 continue;
1054 }
1055 if (!(cap_type & 0xE0))
1056 slave_or_pri_blk(dd, pdev, pos, cap_type);
1057 } while ((pos = pci_find_next_capability(pdev, pos,
1058 PCI_CAP_ID_HT)));
1059
1060 dd->ipath_flags |= IPATH_SWAP_PIOBUFS;
1061
1062bail:
1063 return ret;
1064}
1065
1066/**
1067 * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff
1068 * @dd: the infinipath device
1069 *
1070 * Called during driver unload.
1071 * This is currently a nop for the HT chip, not for all chips
1072 */
1073static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
1074{
1075}
1076
1077/**
1078 * ipath_setup_ht_setextled - set the state of the two external LEDs
1079 * @dd: the infinipath device
1080 * @lst: the L state
1081 * @ltst: the LT state
1082 *
1083 * Set the state of the two external LEDs, to indicate physical and
1084 * logical state of IB link. For this chip (at least with recommended
1085 * board pinouts), LED1 is Green (physical state), and LED2 is Yellow
1086 * (logical state)
1087 *
1088 * Note: We try to match the Mellanox HCA LED behavior as best
1089 * we can. Green indicates physical link state is OK (something is
1090 * plugged in, and we can train).
1091 * Amber indicates the link is logically up (ACTIVE).
1092 * Mellanox further blinks the amber LED to indicate data packet
1093 * activity, but we have no hardware support for that, so it would
1094 * require waking up every 10-20 msecs and checking the counters
1095 * on the chip, and then turning the LED off if appropriate. That's
1096 * visible overhead, so not something we will do.
1097 *
1098 */
1099static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
1100 u64 lst, u64 ltst)
1101{
1102 u64 extctl;
1103 unsigned long flags = 0;
1104
1105 /* the diags use the LED to indicate diag info, so we leave
1106 * the external LED alone when the diags are running */
1107 if (ipath_diag_inuse)
1108 return;
1109
1110 /* Allow override of LED display for, e.g. Locating system in rack */
1111 if (dd->ipath_led_override) {
1112 ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
1113 ? INFINIPATH_IBCS_LT_STATE_LINKUP
1114 : INFINIPATH_IBCS_LT_STATE_DISABLED;
1115 lst = (dd->ipath_led_override & IPATH_LED_LOG)
1116 ? INFINIPATH_IBCS_L_STATE_ACTIVE
1117 : INFINIPATH_IBCS_L_STATE_DOWN;
1118 }
1119
1120 spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
1121 /*
1122 * start by setting both LED control bits to off, then turn
1123 * on the appropriate bit(s).
1124 */
1125 if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */
1126 /*
1127 * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF
1128 * is inverted, because it is normally used to indicate
1129 * a hardware fault at reset, if there were errors
1130 */
1131 extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON)
1132 | INFINIPATH_EXTC_LEDGBLERR_OFF;
1133 if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
1134 extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF;
1135 if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
1136 extctl |= INFINIPATH_EXTC_LEDGBLOK_ON;
1137 } else {
1138 extctl = dd->ipath_extctrl &
1139 ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
1140 INFINIPATH_EXTC_LED2PRIPORT_ON);
1141 if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
1142 extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
1143 if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
1144 extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
1145 }
1146 dd->ipath_extctrl = extctl;
1147 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
1148 spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
1149}
1150
1151static void ipath_init_ht_variables(struct ipath_devdata *dd)
1152{
1153 /*
1154 * setup the register offsets, since they are different for each
1155 * chip
1156 */
1157 dd->ipath_kregs = &ipath_ht_kregs;
1158 dd->ipath_cregs = &ipath_ht_cregs;
1159
1160 dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
1161 dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
1162 dd->ipath_gpio_sda = IPATH_GPIO_SDA;
1163 dd->ipath_gpio_scl = IPATH_GPIO_SCL;
1164
1165 /*
1166 * Fill in data for field-values that change in newer chips.
1167 * We dynamically specify only the mask for LINKTRAININGSTATE
1168 * and only the shift for LINKSTATE, as they are the only ones
1169 * that change. Also precalculate the 3 link states of interest
1170 * and the combined mask.
1171 */
1172 dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT;
1173 dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK;
1174 dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
1175 dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
1176 dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
1177 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
1178 (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
1179 dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
1180 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
1181 (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
1182 dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
1183 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
1184 (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
1185
1186 /*
1187 * Fill in data for ibcc field-values that change in newer chips.
1188 * We dynamically specify only the mask for LINKINITCMD
1189 * and only the shift for LINKCMD and MAXPKTLEN, as they are
1190 * the only ones that change.
1191 */
1192 dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
1193 dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
1194 dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
1195
1196 /* Fill in shifts for RcvCtrl. */
1197 dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
1198 dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
1199 dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
1200 dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */
1201
1202 dd->ipath_i_bitsextant =
1203 (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
1204 (INFINIPATH_I_RCVAVAIL_MASK <<
1205 INFINIPATH_I_RCVAVAIL_SHIFT) |
1206 INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
1207 INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
1208
1209 dd->ipath_e_bitsextant =
1210 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
1211 INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
1212 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
1213 INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
1214 INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
1215 INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
1216 INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
1217 INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
1218 INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
1219 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
1220 INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
1221 INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
1222 INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
1223 INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
1224 INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
1225 INFINIPATH_E_HARDWARE;
1226
1227 dd->ipath_hwe_bitsextant =
1228 (INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
1229 INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
1230 (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
1231 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
1232 (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
1233 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
1234 INFINIPATH_HWE_HTCLNKABYTE0CRCERR |
1235 INFINIPATH_HWE_HTCLNKABYTE1CRCERR |
1236 INFINIPATH_HWE_HTCLNKBBYTE0CRCERR |
1237 INFINIPATH_HWE_HTCLNKBBYTE1CRCERR |
1238 INFINIPATH_HWE_HTCMISCERR4 |
1239 INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 |
1240 INFINIPATH_HWE_HTCMISCERR7 |
1241 INFINIPATH_HWE_HTCBUSTREQPARITYERR |
1242 INFINIPATH_HWE_HTCBUSTRESPPARITYERR |
1243 INFINIPATH_HWE_HTCBUSIREQPARITYERR |
1244 INFINIPATH_HWE_RXDSYNCMEMPARITYERR |
1245 INFINIPATH_HWE_MEMBISTFAILED |
1246 INFINIPATH_HWE_COREPLL_FBSLIP |
1247 INFINIPATH_HWE_COREPLL_RFSLIP |
1248 INFINIPATH_HWE_HTBPLL_FBSLIP |
1249 INFINIPATH_HWE_HTBPLL_RFSLIP |
1250 INFINIPATH_HWE_HTAPLL_FBSLIP |
1251 INFINIPATH_HWE_HTAPLL_RFSLIP |
1252 INFINIPATH_HWE_SERDESPLLFAILED |
1253 INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
1254 INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
1255
1256 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
1257 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
1258 dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
1259 dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
1260
1261 /*
1262 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
1263 * 2 is Some Misc, 3 is reserved for future.
1264 */
1265 dd->ipath_eep_st_masks[0].hwerrs_to_log =
1266 INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
1267 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
1268
1269 dd->ipath_eep_st_masks[1].hwerrs_to_log =
1270 INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
1271 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
1272
1273 dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
1274
1275 dd->delay_mult = 2; /* SDR, 4X, can't change */
1276
1277 dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
1278 dd->ipath_link_speed_supported = IPATH_IB_SDR;
1279 dd->ipath_link_width_enabled = IB_WIDTH_4X;
1280 dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
1281 /* these can't change for this chip, so set once */
1282 dd->ipath_link_width_active = dd->ipath_link_width_enabled;
1283 dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
1284}
1285
1286/**
1287 * ipath_ht_init_hwerrors - enable hardware errors
1288 * @dd: the infinipath device
1289 *
1290 * now that we have finished initializing everything that might reasonably
1291 * cause a hardware error, and cleared those errors bits as they occur,
1292 * we can enable hardware errors in the mask (potentially enabling
1293 * freeze mode), and enable hardware errors as errors (along with
1294 * everything else) in errormask
1295 */
1296static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
1297{
1298 ipath_err_t val;
1299 u64 extsval;
1300
1301 extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
1302
1303 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
1304 ipath_dev_err(dd, "MemBIST did not complete!\n");
1305 if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
1306 ipath_dbg("MemBIST corrected\n");
1307
1308 ipath_check_htlink(dd);
1309
1310 /* barring bugs, all hwerrors become interrupts, which can */
1311 val = -1LL;
1312 /* don't look at crc lane1 if 8 bit */
1313 if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
1314 val &= ~infinipath_hwe_htclnkabyte1crcerr;
1315 /* don't look at crc lane1 if 8 bit */
1316 if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
1317 val &= ~infinipath_hwe_htclnkbbyte1crcerr;
1318
1319 /*
1320 * disable RXDSYNCMEMPARITY because external serdes is unused,
1321 * and therefore the logic will never be used or initialized,
1322 * and uninitialized state will normally result in this error
1323 * being asserted. Similarly for the external serdess pll
1324 * lock signal.
1325 */
1326 val &= ~(INFINIPATH_HWE_SERDESPLLFAILED |
1327 INFINIPATH_HWE_RXDSYNCMEMPARITYERR);
1328
1329 /*
1330 * Disable MISCERR4 because of an inversion in the HT core
1331 * logic checking for errors that cause this bit to be set.
1332 * The errata can also cause the protocol error bit to be set
1333 * in the HT config space linkerror register(s).
1334 */
1335 val &= ~INFINIPATH_HWE_HTCMISCERR4;
1336
1337 /*
1338 * PLL ignored because unused MDIO interface has a logic problem
1339 */
1340 if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
1341 val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
1342 dd->ipath_hwerrmask = val;
1343}
1344
1345
1346
1347
1348/**
1349 * ipath_ht_bringup_serdes - bring up the serdes
1350 * @dd: the infinipath device
1351 */
1352static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
1353{
1354 u64 val, config1;
1355 int ret = 0, change = 0;
1356
1357 ipath_dbg("Trying to bringup serdes\n");
1358
1359 if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
1360 INFINIPATH_HWE_SERDESPLLFAILED)
1361 {
1362 ipath_dbg("At start, serdes PLL failed bit set in "
1363 "hwerrstatus, clearing and continuing\n");
1364 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
1365 INFINIPATH_HWE_SERDESPLLFAILED);
1366 }
1367
1368 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
1369 config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
1370
1371 ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx "
1372 "config1=%llx, sstatus=%llx xgxs %llx\n",
1373 (unsigned long long) val, (unsigned long long) config1,
1374 (unsigned long long)
1375 ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
1376 (unsigned long long)
1377 ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
1378
1379 /* force reset on */
1380 val |= INFINIPATH_SERDC0_RESET_PLL
1381 /* | INFINIPATH_SERDC0_RESET_MASK */
1382 ;
1383 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
1384 udelay(15); /* need pll reset set at least for a bit */
1385
1386 if (val & INFINIPATH_SERDC0_RESET_PLL) {
1387 u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL;
1388 /* set lane resets, and tx idle, during pll reset */
1389 val2 |= INFINIPATH_SERDC0_RESET_MASK |
1390 INFINIPATH_SERDC0_TXIDLE;
1391 ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing "
1392 "%llx)\n", (unsigned long long) val2);
1393 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
1394 val2);
1395 /*
1396 * be sure chip saw it
1397 */
1398 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1399 /*
1400 * need pll reset clear at least 11 usec before lane
1401 * resets cleared; give it a few more
1402 */
1403 udelay(15);
1404 val = val2; /* for check below */
1405 }
1406
1407 if (val & (INFINIPATH_SERDC0_RESET_PLL |
1408 INFINIPATH_SERDC0_RESET_MASK |
1409 INFINIPATH_SERDC0_TXIDLE)) {
1410 val &= ~(INFINIPATH_SERDC0_RESET_PLL |
1411 INFINIPATH_SERDC0_RESET_MASK |
1412 INFINIPATH_SERDC0_TXIDLE);
1413 /* clear them */
1414 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
1415 val);
1416 }
1417
1418 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
1419 if (val & INFINIPATH_XGXS_RESET) {
1420 /* normally true after boot */
1421 val &= ~INFINIPATH_XGXS_RESET;
1422 change = 1;
1423 }
1424 if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
1425 INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
1426 /* need to compensate for Tx inversion in partner */
1427 val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
1428 INFINIPATH_XGXS_RX_POL_SHIFT);
1429 val |= dd->ipath_rx_pol_inv <<
1430 INFINIPATH_XGXS_RX_POL_SHIFT;
1431 change = 1;
1432 }
1433 if (change)
1434 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
1435
1436 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
1437
1438 /* clear current and de-emphasis bits */
1439 config1 &= ~0x0ffffffff00ULL;
1440 /* set current to 20ma */
1441 config1 |= 0x00000000000ULL;
1442 /* set de-emphasis to -5.68dB */
1443 config1 |= 0x0cccc000000ULL;
1444 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
1445
1446 ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx "
1447 "config1=%llx, sstatus=%llx xgxs %llx\n",
1448 (unsigned long long) val, (unsigned long long) config1,
1449 (unsigned long long)
1450 ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
1451 (unsigned long long)
1452 ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
1453
1454 return ret; /* for now, say we always succeeded */
1455}
1456
1457/**
1458 * ipath_ht_quiet_serdes - set serdes to txidle
1459 * @dd: the infinipath device
1460 * driver is being unloaded
1461 */
1462static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
1463{
1464 u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
1465
1466 val |= INFINIPATH_SERDC0_TXIDLE;
1467 ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
1468 (unsigned long long) val);
1469 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
1470}
1471
1472/**
1473 * ipath_pe_put_tid - write a TID in chip
1474 * @dd: the infinipath device
1475 * @tidptr: pointer to the expected TID (in chip) to update
1476 * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
1477 * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
1478 *
1479 * This exists as a separate routine to allow for special locking etc.
1480 * It's used for both the full cleanup on exit, as well as the normal
1481 * setup and teardown.
1482 */
1483static void ipath_ht_put_tid(struct ipath_devdata *dd,
1484 u64 __iomem *tidptr, u32 type,
1485 unsigned long pa)
1486{
1487 if (!dd->ipath_kregbase)
1488 return;
1489
1490 if (pa != dd->ipath_tidinvalid) {
1491 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
1492 dev_info(&dd->pcidev->dev,
1493 "physaddr %lx has more than "
1494 "40 bits, using only 40!!!\n", pa);
1495 pa &= INFINIPATH_RT_ADDR_MASK;
1496 }
1497 if (type == RCVHQ_RCV_TYPE_EAGER)
1498 pa |= dd->ipath_tidtemplate;
1499 else {
1500 /* in words (fixed, full page). */
1501 u64 lenvalid = PAGE_SIZE >> 2;
1502 lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
1503 pa |= lenvalid | INFINIPATH_RT_VALID;
1504 }
1505 }
1506
1507 writeq(pa, tidptr);
1508}
1509
1510
1511/**
1512 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
1513 * @dd: the infinipath device
1514 * @port: the port
1515 *
1516 * Used from ipath_close(), and at chip initialization.
1517 */
1518static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
1519{
1520 u64 __iomem *tidbase;
1521 int i;
1522
1523 if (!dd->ipath_kregbase)
1524 return;
1525
1526 ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
1527
1528 /*
1529 * need to invalidate all of the expected TID entries for this
1530 * port, so we don't have valid entries that might somehow get
1531 * used (early in next use of this port, or through some bug)
1532 */
1533 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
1534 dd->ipath_rcvtidbase +
1535 port * dd->ipath_rcvtidcnt *
1536 sizeof(*tidbase));
1537 for (i = 0; i < dd->ipath_rcvtidcnt; i++)
1538 ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
1539 dd->ipath_tidinvalid);
1540
1541 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
1542 dd->ipath_rcvegrbase +
1543 port * dd->ipath_rcvegrcnt *
1544 sizeof(*tidbase));
1545
1546 for (i = 0; i < dd->ipath_rcvegrcnt; i++)
1547 ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
1548 dd->ipath_tidinvalid);
1549}
1550
1551/**
1552 * ipath_ht_tidtemplate - setup constants for TID updates
1553 * @dd: the infinipath device
1554 *
1555 * We setup stuff that we use a lot, to avoid calculating each time
1556 */
1557static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
1558{
1559 dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2;
1560 dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT;
1561 dd->ipath_tidtemplate |= INFINIPATH_RT_VALID;
1562
1563 /*
1564 * work around chip errata bug 7358, by marking invalid tids
1565 * as having max length
1566 */
1567 dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) <<
1568 INFINIPATH_RT_BUFSIZE_SHIFT;
1569}
1570
1571static int ipath_ht_early_init(struct ipath_devdata *dd)
1572{
1573 u32 __iomem *piobuf;
1574 u32 pioincr, val32;
1575 int i;
1576
1577 /*
1578 * one cache line; long IB headers will spill over into received
1579 * buffer
1580 */
1581 dd->ipath_rcvhdrentsize = 16;
1582 dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
1583
1584 /*
1585 * For HT, we allocate a somewhat overly large eager buffer,
1586 * such that we can guarantee that we can receive the largest
1587 * packet that we can send out. To truly support a 4KB MTU,
1588 * we need to bump this to a large value. To date, other than
1589 * testing, we have never encountered an HCA that can really
1590 * send 4KB MTU packets, so we do not handle that (we'll get
1591 * errors interrupts if we ever see one).
1592 */
1593 dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
1594
1595 /*
1596 * the min() check here is currently a nop, but it may not
1597 * always be, depending on just how we do ipath_rcvegrbufsize
1598 */
1599 dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
1600 dd->ipath_rcvegrbufsize);
1601 dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
1602 ipath_ht_tidtemplate(dd);
1603
1604 /*
1605 * zero all the TID entries at startup. We do this for sanity,
1606 * in case of a previous driver crash of some kind, and also
1607 * because the chip powers up with these memories in an unknown
1608 * state. Use portcnt, not cfgports, since this is for the
1609 * full chip, not for current (possibly different) configuration
1610 * value.
1611 * Chip Errata bug 6447
1612 */
1613 for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
1614 ipath_ht_clear_tids(dd, val32);
1615
1616 /*
1617 * write the pbc of each buffer, to be sure it's initialized, then
1618 * cancel all the buffers, and also abort any packets that might
1619 * have been in flight for some reason (the latter is for driver
1620 * unload/reload, but isn't a bad idea at first init). PIO send
1621 * isn't enabled at this point, so there is no danger of sending
1622 * these out on the wire.
1623 * Chip Errata bug 6610
1624 */
1625 piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) +
1626 dd->ipath_piobufbase);
1627 pioincr = dd->ipath_palign / sizeof(*piobuf);
1628 for (i = 0; i < dd->ipath_piobcnt2k; i++) {
1629 /*
1630 * reasonable word count, just to init pbc
1631 */
1632 writel(16, piobuf);
1633 piobuf += pioincr;
1634 }
1635
1636 ipath_get_eeprom_info(dd);
1637 if (dd->ipath_boardrev == 5) {
1638 /*
1639 * Later production QHT7040 has same changes as QHT7140, so
1640 * can use GPIO interrupts. They have serial #'s starting
1641 * with 128, rather than 112.
1642 */
1643 if (dd->ipath_serial[0] == '1' &&
1644 dd->ipath_serial[1] == '2' &&
1645 dd->ipath_serial[2] == '8')
1646 dd->ipath_flags |= IPATH_GPIO_INTR;
1647 else {
1648 ipath_dev_err(dd, "Unsupported InfiniPath board "
1649 "(serial number %.16s)!\n",
1650 dd->ipath_serial);
1651 return 1;
1652 }
1653 }
1654
1655 if (dd->ipath_minrev >= 4) {
1656 /* Rev4+ reports extra errors via internal GPIO pins */
1657 dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
1658 dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
1659 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1660 dd->ipath_gpio_mask);
1661 }
1662
1663 return 0;
1664}
1665
1666
1667/**
1668 * ipath_init_ht_get_base_info - set chip-specific flags for user code
1669 * @dd: the infinipath device
1670 * @kbase: ipath_base_info pointer
1671 *
1672 * We set the PCIE flag because the lower bandwidth on PCIe vs
1673 * HyperTransport can affect some user packet algorithms.
1674 */
1675static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
1676{
1677 struct ipath_base_info *kinfo = kbase;
1678
1679 kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
1680 IPATH_RUNTIME_PIO_REGSWAPPED;
1681
1682 if (pd->port_dd->ipath_minrev < 4)
1683 kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
1684
1685 return 0;
1686}
1687
1688static void ipath_ht_free_irq(struct ipath_devdata *dd)
1689{
1690 free_irq(dd->ipath_irq, dd);
1691 ht_destroy_irq(dd->ipath_irq);
1692 dd->ipath_irq = 0;
1693 dd->ipath_intconfig = 0;
1694}
1695
1696static struct ipath_message_header *
1697ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
1698{
1699 return (struct ipath_message_header *)
1700 &rhf_addr[sizeof(u64) / sizeof(u32)];
1701}
1702
1703static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports)
1704{
1705 dd->ipath_portcnt =
1706 ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
1707 dd->ipath_p0_rcvegrcnt =
1708 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
1709}
1710
1711static void ipath_ht_read_counters(struct ipath_devdata *dd,
1712 struct infinipath_counters *cntrs)
1713{
1714 cntrs->LBIntCnt =
1715 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
1716 cntrs->LBFlowStallCnt =
1717 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
1718 cntrs->TxSDmaDescCnt = 0;
1719 cntrs->TxUnsupVLErrCnt =
1720 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
1721 cntrs->TxDataPktCnt =
1722 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
1723 cntrs->TxFlowPktCnt =
1724 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
1725 cntrs->TxDwordCnt =
1726 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
1727 cntrs->TxLenErrCnt =
1728 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
1729 cntrs->TxMaxMinLenErrCnt =
1730 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
1731 cntrs->TxUnderrunCnt =
1732 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
1733 cntrs->TxFlowStallCnt =
1734 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
1735 cntrs->TxDroppedPktCnt =
1736 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
1737 cntrs->RxDroppedPktCnt =
1738 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
1739 cntrs->RxDataPktCnt =
1740 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
1741 cntrs->RxFlowPktCnt =
1742 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
1743 cntrs->RxDwordCnt =
1744 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
1745 cntrs->RxLenErrCnt =
1746 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
1747 cntrs->RxMaxMinLenErrCnt =
1748 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
1749 cntrs->RxICRCErrCnt =
1750 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
1751 cntrs->RxVCRCErrCnt =
1752 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
1753 cntrs->RxFlowCtrlErrCnt =
1754 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
1755 cntrs->RxBadFormatCnt =
1756 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
1757 cntrs->RxLinkProblemCnt =
1758 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
1759 cntrs->RxEBPCnt =
1760 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
1761 cntrs->RxLPCRCErrCnt =
1762 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
1763 cntrs->RxBufOvflCnt =
1764 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
1765 cntrs->RxTIDFullErrCnt =
1766 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
1767 cntrs->RxTIDValidErrCnt =
1768 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
1769 cntrs->RxPKeyMismatchCnt =
1770 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
1771 cntrs->RxP0HdrEgrOvflCnt =
1772 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
1773 cntrs->RxP1HdrEgrOvflCnt =
1774 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
1775 cntrs->RxP2HdrEgrOvflCnt =
1776 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
1777 cntrs->RxP3HdrEgrOvflCnt =
1778 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
1779 cntrs->RxP4HdrEgrOvflCnt =
1780 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
1781 cntrs->RxP5HdrEgrOvflCnt =
1782 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt));
1783 cntrs->RxP6HdrEgrOvflCnt =
1784 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt));
1785 cntrs->RxP7HdrEgrOvflCnt =
1786 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt));
1787 cntrs->RxP8HdrEgrOvflCnt =
1788 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt));
1789 cntrs->RxP9HdrEgrOvflCnt = 0;
1790 cntrs->RxP10HdrEgrOvflCnt = 0;
1791 cntrs->RxP11HdrEgrOvflCnt = 0;
1792 cntrs->RxP12HdrEgrOvflCnt = 0;
1793 cntrs->RxP13HdrEgrOvflCnt = 0;
1794 cntrs->RxP14HdrEgrOvflCnt = 0;
1795 cntrs->RxP15HdrEgrOvflCnt = 0;
1796 cntrs->RxP16HdrEgrOvflCnt = 0;
1797 cntrs->IBStatusChangeCnt =
1798 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
1799 cntrs->IBLinkErrRecoveryCnt =
1800 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
1801 cntrs->IBLinkDownedCnt =
1802 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
1803 cntrs->IBSymbolErrCnt =
1804 ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
1805 cntrs->RxVL15DroppedPktCnt = 0;
1806 cntrs->RxOtherLocalPhyErrCnt = 0;
1807 cntrs->PcieRetryBufDiagQwordCnt = 0;
1808 cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
1809 cntrs->LocalLinkIntegrityErrCnt =
1810 (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
1811 dd->ipath_lli_errs : dd->ipath_lli_errors;
1812 cntrs->RxVlErrCnt = 0;
1813 cntrs->RxDlidFltrCnt = 0;
1814}
1815
1816
1817/* no interrupt fallback for these chips */
1818static int ipath_ht_nointr_fallback(struct ipath_devdata *dd)
1819{
1820 return 0;
1821}
1822
1823
1824/*
1825 * reset the XGXS (between serdes and IBC). Slightly less intrusive
1826 * than resetting the IBC or external link state, and useful in some
1827 * cases to cause some retraining. To do this right, we reset IBC
1828 * as well.
1829 */
1830static void ipath_ht_xgxs_reset(struct ipath_devdata *dd)
1831{
1832 u64 val, prev_val;
1833
1834 prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
1835 val = prev_val | INFINIPATH_XGXS_RESET;
1836 prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
1837 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
1838 dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
1839 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
1840 ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
1841 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
1842 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
1843 dd->ipath_control);
1844}
1845
1846
1847static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which)
1848{
1849 int ret;
1850
1851 switch (which) {
1852 case IPATH_IB_CFG_LWID:
1853 ret = dd->ipath_link_width_active;
1854 break;
1855 case IPATH_IB_CFG_SPD:
1856 ret = dd->ipath_link_speed_active;
1857 break;
1858 case IPATH_IB_CFG_LWID_ENB:
1859 ret = dd->ipath_link_width_enabled;
1860 break;
1861 case IPATH_IB_CFG_SPD_ENB:
1862 ret = dd->ipath_link_speed_enabled;
1863 break;
1864 default:
1865 ret = -ENOTSUPP;
1866 break;
1867 }
1868 return ret;
1869}
1870
1871
1872/* we assume range checking is already done, if needed */
1873static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
1874{
1875 int ret = 0;
1876
1877 if (which == IPATH_IB_CFG_LWID_ENB)
1878 dd->ipath_link_width_enabled = val;
1879 else if (which == IPATH_IB_CFG_SPD_ENB)
1880 dd->ipath_link_speed_enabled = val;
1881 else
1882 ret = -ENOTSUPP;
1883 return ret;
1884}
1885
1886
1887static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
1888{
1889}
1890
1891
1892static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
1893{
1894 ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs),
1895 ipath_ib_linktrstate(dd, ibcs));
1896 return 0;
1897}
1898
1899
1900/**
1901 * ipath_init_iba6110_funcs - set up the chip-specific function pointers
1902 * @dd: the infinipath device
1903 *
1904 * This is global, and is called directly at init to set up the
1905 * chip-specific function pointers for later use.
1906 */
1907void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
1908{
1909 dd->ipath_f_intrsetup = ipath_ht_intconfig;
1910 dd->ipath_f_bus = ipath_setup_ht_config;
1911 dd->ipath_f_reset = ipath_setup_ht_reset;
1912 dd->ipath_f_get_boardname = ipath_ht_boardname;
1913 dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
1914 dd->ipath_f_early_init = ipath_ht_early_init;
1915 dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
1916 dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
1917 dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes;
1918 dd->ipath_f_clear_tids = ipath_ht_clear_tids;
1919 dd->ipath_f_put_tid = ipath_ht_put_tid;
1920 dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
1921 dd->ipath_f_setextled = ipath_setup_ht_setextled;
1922 dd->ipath_f_get_base_info = ipath_ht_get_base_info;
1923 dd->ipath_f_free_irq = ipath_ht_free_irq;
1924 dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
1925 dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback;
1926 dd->ipath_f_get_msgheader = ipath_ht_get_msgheader;
1927 dd->ipath_f_config_ports = ipath_ht_config_ports;
1928 dd->ipath_f_read_counters = ipath_ht_read_counters;
1929 dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset;
1930 dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg;
1931 dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg;
1932 dd->ipath_f_config_jint = ipath_ht_config_jint;
1933 dd->ipath_f_ib_updown = ipath_ht_ib_updown;
1934
1935 /*
1936 * initialize chip-specific variables
1937 */
1938 ipath_init_ht_variables(dd);
1939}
diff --git a/drivers/staging/rdma/ipath/ipath_init_chip.c b/drivers/staging/rdma/ipath/ipath_init_chip.c
deleted file mode 100644
index a5eea199f733..000000000000
--- a/drivers/staging/rdma/ipath/ipath_init_chip.c
+++ /dev/null
@@ -1,1062 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/pci.h>
35#include <linux/netdevice.h>
36#include <linux/moduleparam.h>
37#include <linux/slab.h>
38#include <linux/stat.h>
39#include <linux/vmalloc.h>
40
41#include "ipath_kernel.h"
42#include "ipath_common.h"
43
44/*
45 * min buffers we want to have per port, after driver
46 */
47#define IPATH_MIN_USER_PORT_BUFCNT 7
48
49/*
50 * Number of ports we are configured to use (to allow for more pio
51 * buffers per port, etc.) Zero means use chip value.
52 */
53static ushort ipath_cfgports;
54
55module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
56MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
57
58/*
59 * Number of buffers reserved for driver (verbs and layered drivers.)
60 * Initialized based on number of PIO buffers if not set via module interface.
61 * The problem with this is that it's global, but we'll use different
62 * numbers for different chip types.
63 */
64static ushort ipath_kpiobufs;
65
66static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
67
68module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort,
69 &ipath_kpiobufs, S_IWUSR | S_IRUGO);
70MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
71
72/**
73 * create_port0_egr - allocate the eager TID buffers
74 * @dd: the infinipath device
75 *
76 * This code is now quite different for user and kernel, because
77 * the kernel uses skb's, for the accelerated network performance.
78 * This is the kernel (port0) version.
79 *
80 * Allocate the eager TID buffers and program them into infinipath.
81 * We use the network layer alloc_skb() allocator to allocate the
82 * memory, and either use the buffers as is for things like verbs
83 * packets, or pass the buffers up to the ipath layered driver and
84 * thence the network layer, replacing them as we do so (see
85 * ipath_rcv_layer()).
86 */
87static int create_port0_egr(struct ipath_devdata *dd)
88{
89 unsigned e, egrcnt;
90 struct ipath_skbinfo *skbinfo;
91 int ret;
92
93 egrcnt = dd->ipath_p0_rcvegrcnt;
94
95 skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
96 if (skbinfo == NULL) {
97 ipath_dev_err(dd, "allocation error for eager TID "
98 "skb array\n");
99 ret = -ENOMEM;
100 goto bail;
101 }
102 for (e = 0; e < egrcnt; e++) {
103 /*
104 * This is a bit tricky in that we allocate extra
105 * space for 2 bytes of the 14 byte ethernet header.
106 * These two bytes are passed in the ipath header so
107 * the rest of the data is word aligned. We allocate
108 * 4 bytes so that the data buffer stays word aligned.
109 * See ipath_kreceive() for more details.
110 */
111 skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
112 if (!skbinfo[e].skb) {
113 ipath_dev_err(dd, "SKB allocation error for "
114 "eager TID %u\n", e);
115 while (e != 0)
116 dev_kfree_skb(skbinfo[--e].skb);
117 vfree(skbinfo);
118 ret = -ENOMEM;
119 goto bail;
120 }
121 }
122 /*
123 * After loop above, so we can test non-NULL to see if ready
124 * to use at receive, etc.
125 */
126 dd->ipath_port0_skbinfo = skbinfo;
127
128 for (e = 0; e < egrcnt; e++) {
129 dd->ipath_port0_skbinfo[e].phys =
130 ipath_map_single(dd->pcidev,
131 dd->ipath_port0_skbinfo[e].skb->data,
132 dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
133 dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
134 ((char __iomem *) dd->ipath_kregbase +
135 dd->ipath_rcvegrbase),
136 RCVHQ_RCV_TYPE_EAGER,
137 dd->ipath_port0_skbinfo[e].phys);
138 }
139
140 ret = 0;
141
142bail:
143 return ret;
144}
145
146static int bringup_link(struct ipath_devdata *dd)
147{
148 u64 val, ibc;
149 int ret = 0;
150
151 /* hold IBC in reset */
152 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
153 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
154 dd->ipath_control);
155
156 /*
157 * set initial max size pkt IBC will send, including ICRC; it's the
158 * PIO buffer size in dwords, less 1; also see ipath_set_mtu()
159 */
160 val = (dd->ipath_ibmaxlen >> 2) + 1;
161 ibc = val << dd->ibcc_mpl_shift;
162
163 /* flowcontrolwatermark is in units of KBytes */
164 ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
165 /*
166 * How often flowctrl sent. More or less in usecs; balance against
167 * watermark value, so that in theory senders always get a flow
168 * control update in time to not let the IB link go idle.
169 */
170 ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT;
171 /* max error tolerance */
172 ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
173 /* use "real" buffer space for */
174 ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT;
175 /* IB credit flow control. */
176 ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
177 /* initially come up waiting for TS1, without sending anything. */
178 dd->ipath_ibcctrl = ibc;
179 /*
180 * Want to start out with both LINKCMD and LINKINITCMD in NOP
181 * (0 and 0). Don't put linkinitcmd in ipath_ibcctrl, want that
182 * to stay a NOP. Flag that we are disabled, for the (unlikely)
183 * case that some recovery path is trying to bring the link up
184 * before we are ready.
185 */
186 ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
187 INFINIPATH_IBCC_LINKINITCMD_SHIFT;
188 dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
189 ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
190 (unsigned long long) ibc);
191 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
192
193 // be sure chip saw it
194 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
195
196 ret = dd->ipath_f_bringup_serdes(dd);
197
198 if (ret)
199 dev_info(&dd->pcidev->dev, "Could not initialize SerDes, "
200 "not usable\n");
201 else {
202 /* enable IBC */
203 dd->ipath_control |= INFINIPATH_C_LINKENABLE;
204 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
205 dd->ipath_control);
206 }
207
208 return ret;
209}
210
211static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
212{
213 struct ipath_portdata *pd;
214
215 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
216 if (pd) {
217 pd->port_dd = dd;
218 pd->port_cnt = 1;
219 /* The port 0 pkey table is used by the layer interface. */
220 pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
221 pd->port_seq_cnt = 1;
222 }
223 return pd;
224}
225
226static int init_chip_first(struct ipath_devdata *dd)
227{
228 struct ipath_portdata *pd;
229 int ret = 0;
230 u64 val;
231
232 spin_lock_init(&dd->ipath_kernel_tid_lock);
233 spin_lock_init(&dd->ipath_user_tid_lock);
234 spin_lock_init(&dd->ipath_sendctrl_lock);
235 spin_lock_init(&dd->ipath_uctxt_lock);
236 spin_lock_init(&dd->ipath_sdma_lock);
237 spin_lock_init(&dd->ipath_gpio_lock);
238 spin_lock_init(&dd->ipath_eep_st_lock);
239 spin_lock_init(&dd->ipath_sdepb_lock);
240 mutex_init(&dd->ipath_eep_lock);
241
242 /*
243 * skip cfgports stuff because we are not allocating memory,
244 * and we don't want problems if the portcnt changed due to
245 * cfgports. We do still check and report a difference, if
246 * not same (should be impossible).
247 */
248 dd->ipath_f_config_ports(dd, ipath_cfgports);
249 if (!ipath_cfgports)
250 dd->ipath_cfgports = dd->ipath_portcnt;
251 else if (ipath_cfgports <= dd->ipath_portcnt) {
252 dd->ipath_cfgports = ipath_cfgports;
253 ipath_dbg("Configured to use %u ports out of %u in chip\n",
254 dd->ipath_cfgports, ipath_read_kreg32(dd,
255 dd->ipath_kregs->kr_portcnt));
256 } else {
257 dd->ipath_cfgports = dd->ipath_portcnt;
258 ipath_dbg("Tried to configured to use %u ports; chip "
259 "only supports %u\n", ipath_cfgports,
260 ipath_read_kreg32(dd,
261 dd->ipath_kregs->kr_portcnt));
262 }
263 /*
264 * Allocate full portcnt array, rather than just cfgports, because
265 * cleanup iterates across all possible ports.
266 */
267 dd->ipath_pd = kcalloc(dd->ipath_portcnt, sizeof(*dd->ipath_pd),
268 GFP_KERNEL);
269
270 if (!dd->ipath_pd) {
271 ipath_dev_err(dd, "Unable to allocate portdata array, "
272 "failing\n");
273 ret = -ENOMEM;
274 goto done;
275 }
276
277 pd = create_portdata0(dd);
278 if (!pd) {
279 ipath_dev_err(dd, "Unable to allocate portdata for port "
280 "0, failing\n");
281 ret = -ENOMEM;
282 goto done;
283 }
284 dd->ipath_pd[0] = pd;
285
286 dd->ipath_rcvtidcnt =
287 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
288 dd->ipath_rcvtidbase =
289 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
290 dd->ipath_rcvegrcnt =
291 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
292 dd->ipath_rcvegrbase =
293 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
294 dd->ipath_palign =
295 ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
296 dd->ipath_piobufbase =
297 ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase);
298 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
299 dd->ipath_piosize2k = val & ~0U;
300 dd->ipath_piosize4k = val >> 32;
301 if (dd->ipath_piosize4k == 0 && ipath_mtu4096)
302 ipath_mtu4096 = 0; /* 4KB not supported by this chip */
303 dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048;
304 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
305 dd->ipath_piobcnt2k = val & ~0U;
306 dd->ipath_piobcnt4k = val >> 32;
307 dd->ipath_pio2kbase =
308 (u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
309 (dd->ipath_piobufbase & 0xffffffff));
310 if (dd->ipath_piobcnt4k) {
311 dd->ipath_pio4kbase = (u32 __iomem *)
312 (((char __iomem *) dd->ipath_kregbase) +
313 (dd->ipath_piobufbase >> 32));
314 /*
315 * 4K buffers take 2 pages; we use roundup just to be
316 * paranoid; we calculate it once here, rather than on
317 * ever buf allocate
318 */
319 dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k,
320 dd->ipath_palign);
321 ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p "
322 "(%x aligned)\n",
323 dd->ipath_piobcnt2k, dd->ipath_piosize2k,
324 dd->ipath_pio2kbase, dd->ipath_piobcnt4k,
325 dd->ipath_piosize4k, dd->ipath_pio4kbase,
326 dd->ipath_4kalign);
327 } else {
328 ipath_dbg("%u 2k piobufs @ %p\n",
329 dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
330 }
331done:
332 return ret;
333}
334
335/**
336 * init_chip_reset - re-initialize after a reset, or enable
337 * @dd: the infinipath device
338 *
339 * sanity check at least some of the values after reset, and
340 * ensure no receive or transmit (explicitly, in case reset
341 * failed
342 */
343static int init_chip_reset(struct ipath_devdata *dd)
344{
345 u32 rtmp;
346 int i;
347 unsigned long flags;
348
349 /*
350 * ensure chip does no sends or receives, tail updates, or
351 * pioavail updates while we re-initialize
352 */
353 dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift);
354 for (i = 0; i < dd->ipath_portcnt; i++) {
355 clear_bit(dd->ipath_r_portenable_shift + i,
356 &dd->ipath_rcvctrl);
357 clear_bit(dd->ipath_r_intravail_shift + i,
358 &dd->ipath_rcvctrl);
359 }
360 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
361 dd->ipath_rcvctrl);
362
363 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
364 dd->ipath_sendctrl = 0U; /* no sdma, etc */
365 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
366 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
367 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
368
369 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
370
371 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
372 if (rtmp != dd->ipath_rcvtidcnt)
373 dev_info(&dd->pcidev->dev, "tidcnt was %u before "
374 "reset, now %u, using original\n",
375 dd->ipath_rcvtidcnt, rtmp);
376 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
377 if (rtmp != dd->ipath_rcvtidbase)
378 dev_info(&dd->pcidev->dev, "tidbase was %u before "
379 "reset, now %u, using original\n",
380 dd->ipath_rcvtidbase, rtmp);
381 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
382 if (rtmp != dd->ipath_rcvegrcnt)
383 dev_info(&dd->pcidev->dev, "egrcnt was %u before "
384 "reset, now %u, using original\n",
385 dd->ipath_rcvegrcnt, rtmp);
386 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
387 if (rtmp != dd->ipath_rcvegrbase)
388 dev_info(&dd->pcidev->dev, "egrbase was %u before "
389 "reset, now %u, using original\n",
390 dd->ipath_rcvegrbase, rtmp);
391
392 return 0;
393}
394
395static int init_pioavailregs(struct ipath_devdata *dd)
396{
397 int ret;
398
399 dd->ipath_pioavailregs_dma = dma_alloc_coherent(
400 &dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys,
401 GFP_KERNEL);
402 if (!dd->ipath_pioavailregs_dma) {
403 ipath_dev_err(dd, "failed to allocate PIOavail reg area "
404 "in memory\n");
405 ret = -ENOMEM;
406 goto done;
407 }
408
409 /*
410 * we really want L2 cache aligned, but for current CPUs of
411 * interest, they are the same.
412 */
413 dd->ipath_statusp = (u64 *)
414 ((char *)dd->ipath_pioavailregs_dma +
415 ((2 * L1_CACHE_BYTES +
416 dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES));
417 /* copy the current value now that it's really allocated */
418 *dd->ipath_statusp = dd->_ipath_status;
419 /*
420 * setup buffer to hold freeze msg, accessible to apps,
421 * following statusp
422 */
423 dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1];
424 /* and its length */
425 dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
426
427 ret = 0;
428
429done:
430 return ret;
431}
432
433/**
434 * init_shadow_tids - allocate the shadow TID array
435 * @dd: the infinipath device
436 *
437 * allocate the shadow TID array, so we can ipath_munlock previous
438 * entries. It may make more sense to move the pageshadow to the
439 * port data structure, so we only allocate memory for ports actually
440 * in use, since we at 8k per port, now.
441 */
442static void init_shadow_tids(struct ipath_devdata *dd)
443{
444 struct page **pages;
445 dma_addr_t *addrs;
446
447 pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
448 sizeof(struct page *));
449 if (!pages) {
450 ipath_dev_err(dd, "failed to allocate shadow page * "
451 "array, no expected sends!\n");
452 dd->ipath_pageshadow = NULL;
453 return;
454 }
455
456 addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
457 sizeof(dma_addr_t));
458 if (!addrs) {
459 ipath_dev_err(dd, "failed to allocate shadow dma handle "
460 "array, no expected sends!\n");
461 vfree(pages);
462 dd->ipath_pageshadow = NULL;
463 return;
464 }
465
466 dd->ipath_pageshadow = pages;
467 dd->ipath_physshadow = addrs;
468}
469
470static void enable_chip(struct ipath_devdata *dd, int reinit)
471{
472 u32 val;
473 u64 rcvmask;
474 unsigned long flags;
475 int i;
476
477 if (!reinit)
478 init_waitqueue_head(&ipath_state_wait);
479
480 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
481 dd->ipath_rcvctrl);
482
483 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
484 /* Enable PIO send, and update of PIOavail regs to memory. */
485 dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
486 INFINIPATH_S_PIOBUFAVAILUPD;
487
488 /*
489 * Set the PIO avail update threshold to host memory
490 * on chips that support it.
491 */
492 if (dd->ipath_pioupd_thresh)
493 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
494 << INFINIPATH_S_UPDTHRESH_SHIFT;
495 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
496 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
497 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
498
499 /*
500 * Enable kernel ports' receive and receive interrupt.
501 * Other ports done as user opens and inits them.
502 */
503 rcvmask = 1ULL;
504 dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) |
505 (rcvmask << dd->ipath_r_intravail_shift);
506 if (!(dd->ipath_flags & IPATH_NODMA_RTAIL))
507 dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift);
508
509 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
510 dd->ipath_rcvctrl);
511
512 /*
513 * now ready for use. this should be cleared whenever we
514 * detect a reset, or initiate one.
515 */
516 dd->ipath_flags |= IPATH_INITTED;
517
518 /*
519 * Init our shadow copies of head from tail values,
520 * and write head values to match.
521 */
522 val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
523 ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
524
525 /* Initialize so we interrupt on next packet received */
526 ipath_write_ureg(dd, ur_rcvhdrhead,
527 dd->ipath_rhdrhead_intr_off |
528 dd->ipath_pd[0]->port_head, 0);
529
530 /*
531 * by now pioavail updates to memory should have occurred, so
532 * copy them into our working/shadow registers; this is in
533 * case something went wrong with abort, but mostly to get the
534 * initial values of the generation bit correct.
535 */
536 for (i = 0; i < dd->ipath_pioavregs; i++) {
537 __le64 pioavail;
538
539 /*
540 * Chip Errata bug 6641; even and odd qwords>3 are swapped.
541 */
542 if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
543 pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
544 else
545 pioavail = dd->ipath_pioavailregs_dma[i];
546 /*
547 * don't need to worry about ipath_pioavailkernel here
548 * because we will call ipath_chg_pioavailkernel() later
549 * in initialization, to busy out buffers as needed
550 */
551 dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
552 }
553 /* can get counters, stats, etc. */
554 dd->ipath_flags |= IPATH_PRESENT;
555}
556
557static int init_housekeeping(struct ipath_devdata *dd, int reinit)
558{
559 char boardn[40];
560 int ret = 0;
561
562 /*
563 * have to clear shadow copies of registers at init that are
564 * not otherwise set here, or all kinds of bizarre things
565 * happen with driver on chip reset
566 */
567 dd->ipath_rcvhdrsize = 0;
568
569 /*
570 * Don't clear ipath_flags as 8bit mode was set before
571 * entering this func. However, we do set the linkstate to
572 * unknown, so we can watch for a transition.
573 * PRESENT is set because we want register reads to work,
574 * and the kernel infrastructure saw it in config space;
575 * We clear it if we have failures.
576 */
577 dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT;
578 dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED |
579 IPATH_LINKDOWN | IPATH_LINKINIT);
580
581 ipath_cdbg(VERBOSE, "Try to read spc chip revision\n");
582 dd->ipath_revision =
583 ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
584
585 /*
586 * set up fundamental info we need to use the chip; we assume
587 * if the revision reg and these regs are OK, we don't need to
588 * special case the rest
589 */
590 dd->ipath_sregbase =
591 ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase);
592 dd->ipath_cregbase =
593 ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase);
594 dd->ipath_uregbase =
595 ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase);
596 ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, "
597 "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase,
598 dd->ipath_uregbase, dd->ipath_cregbase);
599 if ((dd->ipath_revision & 0xffffffff) == 0xffffffff
600 || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff
601 || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff
602 || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
603 ipath_dev_err(dd, "Register read failures from chip, "
604 "giving up initialization\n");
605 dd->ipath_flags &= ~IPATH_PRESENT;
606 ret = -ENODEV;
607 goto done;
608 }
609
610
611 /* clear diagctrl register, in case diags were running and crashed */
612 ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
613
614 /* clear the initial reset flag, in case first driver load */
615 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
616 INFINIPATH_E_RESET);
617
618 ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n",
619 (unsigned long long) dd->ipath_revision,
620 dd->ipath_pcirev);
621
622 if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
623 INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
624 ipath_dev_err(dd, "Driver only handles version %d, "
625 "chip swversion is %d (%llx), failng\n",
626 IPATH_CHIP_SWVERSION,
627 (int)(dd->ipath_revision >>
628 INFINIPATH_R_SOFTWARE_SHIFT) &
629 INFINIPATH_R_SOFTWARE_MASK,
630 (unsigned long long) dd->ipath_revision);
631 ret = -ENOSYS;
632 goto done;
633 }
634 dd->ipath_majrev = (u8) ((dd->ipath_revision >>
635 INFINIPATH_R_CHIPREVMAJOR_SHIFT) &
636 INFINIPATH_R_CHIPREVMAJOR_MASK);
637 dd->ipath_minrev = (u8) ((dd->ipath_revision >>
638 INFINIPATH_R_CHIPREVMINOR_SHIFT) &
639 INFINIPATH_R_CHIPREVMINOR_MASK);
640 dd->ipath_boardrev = (u8) ((dd->ipath_revision >>
641 INFINIPATH_R_BOARDID_SHIFT) &
642 INFINIPATH_R_BOARDID_MASK);
643
644 ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
645
646 snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
647 "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
648 "SW Compat %u\n",
649 IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
650 (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
651 INFINIPATH_R_ARCH_MASK,
652 dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev,
653 (unsigned)(dd->ipath_revision >>
654 INFINIPATH_R_SOFTWARE_SHIFT) &
655 INFINIPATH_R_SOFTWARE_MASK);
656
657 ipath_dbg("%s", dd->ipath_boardversion);
658
659 if (ret)
660 goto done;
661
662 if (reinit)
663 ret = init_chip_reset(dd);
664 else
665 ret = init_chip_first(dd);
666
667done:
668 return ret;
669}
670
671static void verify_interrupt(unsigned long opaque)
672{
673 struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
674
675 if (!dd)
676 return; /* being torn down */
677
678 /*
679 * If we don't have any interrupts, let the user know and
680 * don't bother checking again.
681 */
682 if (dd->ipath_int_counter == 0) {
683 if (!dd->ipath_f_intr_fallback(dd))
684 dev_err(&dd->pcidev->dev, "No interrupts detected, "
685 "not usable.\n");
686 else /* re-arm the timer to see if fallback works */
687 mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2);
688 } else
689 ipath_cdbg(VERBOSE, "%u interrupts at timer check\n",
690 dd->ipath_int_counter);
691}
692
693/**
694 * ipath_init_chip - do the actual initialization sequence on the chip
695 * @dd: the infinipath device
696 * @reinit: reinitializing, so don't allocate new memory
697 *
698 * Do the actual initialization sequence on the chip. This is done
699 * both from the init routine called from the PCI infrastructure, and
700 * when we reset the chip, or detect that it was reset internally,
701 * or it's administratively re-enabled.
702 *
703 * Memory allocation here and in called routines is only done in
704 * the first case (reinit == 0). We have to be careful, because even
705 * without memory allocation, we need to re-write all the chip registers
706 * TIDs, etc. after the reset or enable has completed.
707 */
708int ipath_init_chip(struct ipath_devdata *dd, int reinit)
709{
710 int ret = 0;
711 u32 kpiobufs, defkbufs;
712 u32 piobufs, uports;
713 u64 val;
714 struct ipath_portdata *pd;
715 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
716
717 ret = init_housekeeping(dd, reinit);
718 if (ret)
719 goto done;
720
721 /*
722 * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
723 * but then it no longer nicely fits power of two, and since
724 * we now use routines that backend onto __get_free_pages, the
725 * rest would be wasted.
726 */
727 dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt);
728 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
729 dd->ipath_rcvhdrcnt);
730
731 /*
732 * Set up the shadow copies of the piobufavail registers,
733 * which we compare against the chip registers for now, and
734 * the in memory DMA'ed copies of the registers. This has to
735 * be done early, before we calculate lastport, etc.
736 */
737 piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
738 /*
739 * calc number of pioavail registers, and save it; we have 2
740 * bits per buffer.
741 */
742 dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
743 / (sizeof(u64) * BITS_PER_BYTE / 2);
744 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
745 if (piobufs > 144)
746 defkbufs = 32 + dd->ipath_pioreserved;
747 else
748 defkbufs = 16 + dd->ipath_pioreserved;
749
750 if (ipath_kpiobufs && (ipath_kpiobufs +
751 (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
752 int i = (int) piobufs -
753 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
754 if (i < 1)
755 i = 1;
756 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
757 "%d for kernel leaves too few for %d user ports "
758 "(%d each); using %u\n", ipath_kpiobufs,
759 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
760 /*
761 * shouldn't change ipath_kpiobufs, because could be
762 * different for different devices...
763 */
764 kpiobufs = i;
765 } else if (ipath_kpiobufs)
766 kpiobufs = ipath_kpiobufs;
767 else
768 kpiobufs = defkbufs;
769 dd->ipath_lastport_piobuf = piobufs - kpiobufs;
770 dd->ipath_pbufsport =
771 uports ? dd->ipath_lastport_piobuf / uports : 0;
772 /* if not an even divisor, some user ports get extra buffers */
773 dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
774 (dd->ipath_pbufsport * uports);
775 if (dd->ipath_ports_extrabuf)
776 ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
777 "ports <= %u\n", dd->ipath_pbufsport,
778 dd->ipath_ports_extrabuf);
779 dd->ipath_lastpioindex = 0;
780 dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
781 /* ipath_pioavailshadow initialized earlier */
782 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
783 "each for %u user ports\n", kpiobufs,
784 piobufs, dd->ipath_pbufsport, uports);
785 ret = dd->ipath_f_early_init(dd);
786 if (ret) {
787 ipath_dev_err(dd, "Early initialization failure\n");
788 goto done;
789 }
790
791 /*
792 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
793 * done after early_init.
794 */
795 dd->ipath_hdrqlast =
796 dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
797 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
798 dd->ipath_rcvhdrentsize);
799 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
800 dd->ipath_rcvhdrsize);
801
802 if (!reinit) {
803 ret = init_pioavailregs(dd);
804 init_shadow_tids(dd);
805 if (ret)
806 goto done;
807 }
808
809 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
810 dd->ipath_pioavailregs_phys);
811
812 /*
813 * this is to detect s/w errors, which the h/w works around by
814 * ignoring the low 6 bits of address, if it wasn't aligned.
815 */
816 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr);
817 if (val != dd->ipath_pioavailregs_phys) {
818 ipath_dev_err(dd, "Catastrophic software error, "
819 "SendPIOAvailAddr written as %lx, "
820 "read back as %llx\n",
821 (unsigned long) dd->ipath_pioavailregs_phys,
822 (unsigned long long) val);
823 ret = -EINVAL;
824 goto done;
825 }
826
827 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
828
829 /*
830 * make sure we are not in freeze, and PIO send enabled, so
831 * writes to pbc happen
832 */
833 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL);
834 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
835 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
836 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
837
838 /*
839 * before error clears, since we expect serdes pll errors during
840 * this, the first time after reset
841 */
842 if (bringup_link(dd)) {
843 dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n");
844 ret = -ENETDOWN;
845 goto done;
846 }
847
848 /*
849 * clear any "expected" hwerrs from reset and/or initialization
850 * clear any that aren't enabled (at least this once), and then
851 * set the enable mask
852 */
853 dd->ipath_f_init_hwerrors(dd);
854 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
855 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
856 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
857 dd->ipath_hwerrmask);
858
859 /* clear all */
860 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
861 /* enable errors that are masked, at least this first time. */
862 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
863 ~dd->ipath_maskederrs);
864 dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */
865 dd->ipath_errormask =
866 ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
867 /* clear any interrupts up to this point (ints still not enabled) */
868 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
869
870 dd->ipath_f_tidtemplate(dd);
871
872 /*
873 * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
874 * re-init, the simplest way to handle this is to free
875 * existing, and re-allocate.
876 * Need to re-create rest of port 0 portdata as well.
877 */
878 pd = dd->ipath_pd[0];
879 if (reinit) {
880 struct ipath_portdata *npd;
881
882 /*
883 * Alloc and init new ipath_portdata for port0,
884 * Then free old pd. Could lead to fragmentation, but also
885 * makes later support for hot-swap easier.
886 */
887 npd = create_portdata0(dd);
888 if (npd) {
889 ipath_free_pddata(dd, pd);
890 dd->ipath_pd[0] = npd;
891 pd = npd;
892 } else {
893 ipath_dev_err(dd, "Unable to allocate portdata"
894 " for port 0, failing\n");
895 ret = -ENOMEM;
896 goto done;
897 }
898 }
899 ret = ipath_create_rcvhdrq(dd, pd);
900 if (!ret)
901 ret = create_port0_egr(dd);
902 if (ret) {
903 ipath_dev_err(dd, "failed to allocate kernel port's "
904 "rcvhdrq and/or egr bufs\n");
905 goto done;
906 } else {
907 enable_chip(dd, reinit);
908 }
909
910 /* after enable_chip, so pioavailshadow setup */
911 ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
912
913 /*
914 * Cancel any possible active sends from early driver load.
915 * Follows early_init because some chips have to initialize
916 * PIO buffers in early_init to avoid false parity errors.
917 * After enable and ipath_chg_pioavailkernel so we can safely
918 * enable pioavail updates and PIOENABLE; packets are now
919 * ready to go out.
920 */
921 ipath_cancel_sends(dd, 1);
922
923 if (!reinit) {
924 /*
925 * Used when we close a port, for DMA already in flight
926 * at close.
927 */
928 dd->ipath_dummy_hdrq = dma_alloc_coherent(
929 &dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size,
930 &dd->ipath_dummy_hdrq_phys,
931 gfp_flags);
932 if (!dd->ipath_dummy_hdrq) {
933 dev_info(&dd->pcidev->dev,
934 "Couldn't allocate 0x%lx bytes for dummy hdrq\n",
935 dd->ipath_pd[0]->port_rcvhdrq_size);
936 /* fallback to just 0'ing */
937 dd->ipath_dummy_hdrq_phys = 0UL;
938 }
939 }
940
941 /*
942 * cause retrigger of pending interrupts ignored during init,
943 * even if we had errors
944 */
945 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
946
947 if (!dd->ipath_stats_timer_active) {
948 /*
949 * first init, or after an admin disable/enable
950 * set up stats retrieval timer, even if we had errors
951 * in last portion of setup
952 */
953 setup_timer(&dd->ipath_stats_timer, ipath_get_faststats,
954 (unsigned long)dd);
955 /* every 5 seconds; */
956 dd->ipath_stats_timer.expires = jiffies + 5 * HZ;
957 /* takes ~16 seconds to overflow at full IB 4x bandwdith */
958 add_timer(&dd->ipath_stats_timer);
959 dd->ipath_stats_timer_active = 1;
960 }
961
962 /* Set up SendDMA if chip supports it */
963 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
964 ret = setup_sdma(dd);
965
966 /* Set up HoL state */
967 setup_timer(&dd->ipath_hol_timer, ipath_hol_event, (unsigned long)dd);
968
969 dd->ipath_hol_state = IPATH_HOL_UP;
970
971done:
972 if (!ret) {
973 *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
974 if (!dd->ipath_f_intrsetup(dd)) {
975 /* now we can enable all interrupts from the chip */
976 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
977 -1LL);
978 /* force re-interrupt of any pending interrupts. */
979 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear,
980 0ULL);
981 /* chip is usable; mark it as initialized */
982 *dd->ipath_statusp |= IPATH_STATUS_INITTED;
983
984 /*
985 * setup to verify we get an interrupt, and fallback
986 * to an alternate if necessary and possible
987 */
988 if (!reinit) {
989 setup_timer(&dd->ipath_intrchk_timer,
990 verify_interrupt,
991 (unsigned long)dd);
992 }
993 dd->ipath_intrchk_timer.expires = jiffies + HZ/2;
994 add_timer(&dd->ipath_intrchk_timer);
995 } else
996 ipath_dev_err(dd, "No interrupts enabled, couldn't "
997 "setup interrupt address\n");
998
999 if (dd->ipath_cfgports > ipath_stats.sps_nports)
1000 /*
1001 * sps_nports is a global, so, we set it to
1002 * the highest number of ports of any of the
1003 * chips we find; we never decrement it, at
1004 * least for now. Since this might have changed
1005 * over disable/enable or prior to reset, always
1006 * do the check and potentially adjust.
1007 */
1008 ipath_stats.sps_nports = dd->ipath_cfgports;
1009 } else
1010 ipath_dbg("Failed (%d) to initialize chip\n", ret);
1011
1012 /* if ret is non-zero, we probably should do some cleanup
1013 here... */
1014 return ret;
1015}
1016
1017static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
1018{
1019 struct ipath_devdata *dd;
1020 unsigned long flags;
1021 unsigned short val;
1022 int ret;
1023
1024 ret = ipath_parse_ushort(str, &val);
1025
1026 spin_lock_irqsave(&ipath_devs_lock, flags);
1027
1028 if (ret < 0)
1029 goto bail;
1030
1031 if (val == 0) {
1032 ret = -EINVAL;
1033 goto bail;
1034 }
1035
1036 list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
1037 if (dd->ipath_kregbase)
1038 continue;
1039 if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
1040 (dd->ipath_cfgports *
1041 IPATH_MIN_USER_PORT_BUFCNT)))
1042 {
1043 ipath_dev_err(
1044 dd,
1045 "Allocating %d PIO bufs for kernel leaves "
1046 "too few for %d user ports (%d each)\n",
1047 val, dd->ipath_cfgports - 1,
1048 IPATH_MIN_USER_PORT_BUFCNT);
1049 ret = -EINVAL;
1050 goto bail;
1051 }
1052 dd->ipath_lastport_piobuf =
1053 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
1054 }
1055
1056 ipath_kpiobufs = val;
1057 ret = 0;
1058bail:
1059 spin_unlock_irqrestore(&ipath_devs_lock, flags);
1060
1061 return ret;
1062}
diff --git a/drivers/staging/rdma/ipath/ipath_intr.c b/drivers/staging/rdma/ipath/ipath_intr.c
deleted file mode 100644
index 0403fa28ed8d..000000000000
--- a/drivers/staging/rdma/ipath/ipath_intr.c
+++ /dev/null
@@ -1,1271 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/pci.h>
35#include <linux/delay.h>
36
37#include "ipath_kernel.h"
38#include "ipath_verbs.h"
39#include "ipath_common.h"
40
41
42/*
43 * Called when we might have an error that is specific to a particular
44 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
45 */
46void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
47{
48 u32 piobcnt;
49 unsigned long sbuf[4];
50 /*
51 * it's possible that sendbuffererror could have bits set; might
52 * have already done this as a result of hardware error handling
53 */
54 piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
55 /* read these before writing errorclear */
56 sbuf[0] = ipath_read_kreg64(
57 dd, dd->ipath_kregs->kr_sendbuffererror);
58 sbuf[1] = ipath_read_kreg64(
59 dd, dd->ipath_kregs->kr_sendbuffererror + 1);
60 if (piobcnt > 128)
61 sbuf[2] = ipath_read_kreg64(
62 dd, dd->ipath_kregs->kr_sendbuffererror + 2);
63 if (piobcnt > 192)
64 sbuf[3] = ipath_read_kreg64(
65 dd, dd->ipath_kregs->kr_sendbuffererror + 3);
66 else
67 sbuf[3] = 0;
68
69 if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
70 int i;
71 if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
72 time_after(dd->ipath_lastcancel, jiffies)) {
73 __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
74 "SendbufErrs %lx %lx", sbuf[0],
75 sbuf[1]);
76 if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
77 printk(" %lx %lx ", sbuf[2], sbuf[3]);
78 printk("\n");
79 }
80
81 for (i = 0; i < piobcnt; i++)
82 if (test_bit(i, sbuf))
83 ipath_disarm_piobufs(dd, i, 1);
84 /* ignore armlaunch errs for a bit */
85 dd->ipath_lastcancel = jiffies+3;
86 }
87}
88
89
90/* These are all rcv-related errors which we want to count for stats */
91#define E_SUM_PKTERRS \
92 (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
93 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
94 INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
95 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
96 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
97 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
98
99/* These are all send-related errors which we want to count for stats */
100#define E_SUM_ERRS \
101 (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
102 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
103 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
104 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
105 INFINIPATH_E_INVALIDADDR)
106
107/*
108 * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
109 * errors not related to freeze and cancelling buffers. Can't ignore
110 * armlaunch because could get more while still cleaning up, and need
111 * to cancel those as they happen.
112 */
113#define E_SPKT_ERRS_IGNORE \
114 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
115 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
116 INFINIPATH_E_SPKTLEN)
117
118/*
119 * these are errors that can occur when the link changes state while
120 * a packet is being sent or received. This doesn't cover things
121 * like EBP or VCRC that can be the result of a sending having the
122 * link change state, so we receive a "known bad" packet.
123 */
124#define E_SUM_LINK_PKTERRS \
125 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
126 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
127 INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
128 INFINIPATH_E_RUNEXPCHAR)
129
130static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
131{
132 u64 ignore_this_time = 0;
133
134 ipath_disarm_senderrbufs(dd);
135 if ((errs & E_SUM_LINK_PKTERRS) &&
136 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
137 /*
138 * This can happen when SMA is trying to bring the link
139 * up, but the IB link changes state at the "wrong" time.
140 * The IB logic then complains that the packet isn't
141 * valid. We don't want to confuse people, so we just
142 * don't print them, except at debug
143 */
144 ipath_dbg("Ignoring packet errors %llx, because link not "
145 "ACTIVE\n", (unsigned long long) errs);
146 ignore_this_time = errs & E_SUM_LINK_PKTERRS;
147 }
148
149 return ignore_this_time;
150}
151
152/* generic hw error messages... */
153#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
154 { \
155 .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \
156 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \
157 .msg = "TXE " #a " Memory Parity" \
158 }
159#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
160 { \
161 .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \
162 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \
163 .msg = "RXE " #a " Memory Parity" \
164 }
165
166static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
167 INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
168 INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
169
170 INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
171 INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
172 INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
173
174 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
175 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
176 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
177 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
178 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
179 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
180 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
181};
182
183/**
184 * ipath_format_hwmsg - format a single hwerror message
185 * @msg message buffer
186 * @msgl length of message buffer
187 * @hwmsg message to add to message buffer
188 */
189static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
190{
191 strlcat(msg, "[", msgl);
192 strlcat(msg, hwmsg, msgl);
193 strlcat(msg, "]", msgl);
194}
195
196/**
197 * ipath_format_hwerrors - format hardware error messages for display
198 * @hwerrs hardware errors bit vector
199 * @hwerrmsgs hardware error descriptions
200 * @nhwerrmsgs number of hwerrmsgs
201 * @msg message buffer
202 * @msgl message buffer length
203 */
204void ipath_format_hwerrors(u64 hwerrs,
205 const struct ipath_hwerror_msgs *hwerrmsgs,
206 size_t nhwerrmsgs,
207 char *msg, size_t msgl)
208{
209 int i;
210 const int glen =
211 ARRAY_SIZE(ipath_generic_hwerror_msgs);
212
213 for (i=0; i<glen; i++) {
214 if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
215 ipath_format_hwmsg(msg, msgl,
216 ipath_generic_hwerror_msgs[i].msg);
217 }
218 }
219
220 for (i=0; i<nhwerrmsgs; i++) {
221 if (hwerrs & hwerrmsgs[i].mask) {
222 ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
223 }
224 }
225}
226
227/* return the strings for the most common link states */
228static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
229{
230 char *ret;
231 u32 state;
232
233 state = ipath_ib_state(dd, ibcs);
234 if (state == dd->ib_init)
235 ret = "Init";
236 else if (state == dd->ib_arm)
237 ret = "Arm";
238 else if (state == dd->ib_active)
239 ret = "Active";
240 else
241 ret = "Down";
242 return ret;
243}
244
245void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
246{
247 struct ib_event event;
248
249 event.device = &dd->verbs_dev->ibdev;
250 event.element.port_num = 1;
251 event.event = ev;
252 ib_dispatch_event(&event);
253}
254
255static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
256 ipath_err_t errs)
257{
258 u32 ltstate, lstate, ibstate, lastlstate;
259 u32 init = dd->ib_init;
260 u32 arm = dd->ib_arm;
261 u32 active = dd->ib_active;
262 const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
263
264 lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
265 ibstate = ipath_ib_state(dd, ibcs);
266 /* linkstate at last interrupt */
267 lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
268 ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
269
270 /*
271 * Since going into a recovery state causes the link state to go
272 * down and since recovery is transitory, it is better if we "miss"
273 * ever seeing the link training state go into recovery (i.e.,
274 * ignore this transition for link state special handling purposes)
275 * without even updating ipath_lastibcstat.
276 */
277 if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
278 (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
279 (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
280 goto done;
281
282 /*
283 * if linkstate transitions into INIT from any of the various down
284 * states, or if it transitions from any of the up (INIT or better)
285 * states into any of the down states (except link recovery), then
286 * call the chip-specific code to take appropriate actions.
287 */
288 if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
289 lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
290 /* transitioned to UP */
291 if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
292 /* link came up, so we must no longer be disabled */
293 dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
294 ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
295 goto skip_ibchange; /* chip-code handled */
296 }
297 } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
298 (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
299 ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
300 ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
301 int handled;
302 handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
303 dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
304 if (handled) {
305 ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
306 goto skip_ibchange; /* chip-code handled */
307 }
308 }
309
310 /*
311 * Significant enough to always print and get into logs, if it was
312 * unexpected. If it was a requested state change, we'll have
313 * already cleared the flags, so we won't print this warning
314 */
315 if ((ibstate != arm && ibstate != active) &&
316 (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
317 dev_info(&dd->pcidev->dev, "Link state changed from %s "
318 "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
319 "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
320 }
321
322 if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
323 ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
324 u32 lastlts;
325 lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
326 /*
327 * Ignore cycling back and forth from Polling.Active to
328 * Polling.Quiet while waiting for the other end of the link
329 * to come up, except to try and decide if we are connected
330 * to a live IB device or not. We will cycle back and
331 * forth between them if no cable is plugged in, the other
332 * device is powered off or disabled, etc.
333 */
334 if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
335 lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
336 if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
337 (++dd->ipath_ibpollcnt == 40)) {
338 dd->ipath_flags |= IPATH_NOCABLE;
339 *dd->ipath_statusp |=
340 IPATH_STATUS_IB_NOCABLE;
341 ipath_cdbg(LINKVERB, "Set NOCABLE\n");
342 }
343 ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
344 ipath_ibcstatus_str[ltstate], ibstate);
345 goto skip_ibchange;
346 }
347 }
348
349 dd->ipath_ibpollcnt = 0; /* not poll*, now */
350 ipath_stats.sps_iblink++;
351
352 if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
353 u64 linkrecov;
354 linkrecov = ipath_snap_cntr(dd,
355 dd->ipath_cregs->cr_iblinkerrrecovcnt);
356 if (linkrecov != dd->ipath_lastlinkrecov) {
357 ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
358 (unsigned long long) ibcs,
359 ib_linkstate(dd, ibcs),
360 ipath_ibcstatus_str[ltstate],
361 (unsigned long long) linkrecov);
362 /* and no more until active again */
363 dd->ipath_lastlinkrecov = 0;
364 ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
365 goto skip_ibchange;
366 }
367 }
368
369 if (ibstate == init || ibstate == arm || ibstate == active) {
370 *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
371 if (ibstate == init || ibstate == arm) {
372 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
373 if (dd->ipath_flags & IPATH_LINKACTIVE)
374 signal_ib_event(dd, IB_EVENT_PORT_ERR);
375 }
376 if (ibstate == arm) {
377 dd->ipath_flags |= IPATH_LINKARMED;
378 dd->ipath_flags &= ~(IPATH_LINKUNK |
379 IPATH_LINKINIT | IPATH_LINKDOWN |
380 IPATH_LINKACTIVE | IPATH_NOCABLE);
381 ipath_hol_down(dd);
382 } else if (ibstate == init) {
383 /*
384 * set INIT and DOWN. Down is checked by
385 * most of the other code, but INIT is
386 * useful to know in a few places.
387 */
388 dd->ipath_flags |= IPATH_LINKINIT |
389 IPATH_LINKDOWN;
390 dd->ipath_flags &= ~(IPATH_LINKUNK |
391 IPATH_LINKARMED | IPATH_LINKACTIVE |
392 IPATH_NOCABLE);
393 ipath_hol_down(dd);
394 } else { /* active */
395 dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
396 dd->ipath_cregs->cr_iblinkerrrecovcnt);
397 *dd->ipath_statusp |=
398 IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
399 dd->ipath_flags |= IPATH_LINKACTIVE;
400 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
401 | IPATH_LINKDOWN | IPATH_LINKARMED |
402 IPATH_NOCABLE);
403 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
404 ipath_restart_sdma(dd);
405 signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
406 /* LED active not handled in chip _f_updown */
407 dd->ipath_f_setextled(dd, lstate, ltstate);
408 ipath_hol_up(dd);
409 }
410
411 /*
412 * print after we've already done the work, so as not to
413 * delay the state changes and notifications, for debugging
414 */
415 if (lstate == lastlstate)
416 ipath_cdbg(LINKVERB, "Unchanged from last: %s "
417 "(%x)\n", ib_linkstate(dd, ibcs), ibstate);
418 else
419 ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
420 dd->ipath_unit, ib_linkstate(dd, ibcs),
421 ipath_ibcstatus_str[ltstate], ibstate);
422 } else { /* down */
423 if (dd->ipath_flags & IPATH_LINKACTIVE)
424 signal_ib_event(dd, IB_EVENT_PORT_ERR);
425 dd->ipath_flags |= IPATH_LINKDOWN;
426 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
427 | IPATH_LINKACTIVE |
428 IPATH_LINKARMED);
429 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
430 dd->ipath_lli_counter = 0;
431
432 if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
433 ipath_cdbg(VERBOSE, "Unit %u link state down "
434 "(state 0x%x), from %s\n",
435 dd->ipath_unit, lstate,
436 ib_linkstate(dd, dd->ipath_lastibcstat));
437 else
438 ipath_cdbg(LINKVERB, "Unit %u link state changed "
439 "to %s (0x%x) from down (%x)\n",
440 dd->ipath_unit,
441 ipath_ibcstatus_str[ltstate],
442 ibstate, lastlstate);
443 }
444
445skip_ibchange:
446 dd->ipath_lastibcstat = ibcs;
447done:
448 return;
449}
450
451static void handle_supp_msgs(struct ipath_devdata *dd,
452 unsigned supp_msgs, char *msg, u32 msgsz)
453{
454 /*
455 * Print the message unless it's ibc status change only, which
456 * happens so often we never want to count it.
457 */
458 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
459 int iserr;
460 ipath_err_t mask;
461 iserr = ipath_decode_err(dd, msg, msgsz,
462 dd->ipath_lasterror &
463 ~INFINIPATH_E_IBSTATUSCHANGED);
464
465 mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
466 INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
467
468 /* if we're in debug, then don't mask SDMADISABLED msgs */
469 if (ipath_debug & __IPATH_DBG)
470 mask &= ~INFINIPATH_E_SDMADISABLED;
471
472 if (dd->ipath_lasterror & ~mask)
473 ipath_dev_err(dd, "Suppressed %u messages for "
474 "fast-repeating errors (%s) (%llx)\n",
475 supp_msgs, msg,
476 (unsigned long long)
477 dd->ipath_lasterror);
478 else {
479 /*
480 * rcvegrfull and rcvhdrqfull are "normal", for some
481 * types of processes (mostly benchmarks) that send
482 * huge numbers of messages, while not processing
483 * them. So only complain about these at debug
484 * level.
485 */
486 if (iserr)
487 ipath_dbg("Suppressed %u messages for %s\n",
488 supp_msgs, msg);
489 else
490 ipath_cdbg(ERRPKT,
491 "Suppressed %u messages for %s\n",
492 supp_msgs, msg);
493 }
494 }
495}
496
497static unsigned handle_frequent_errors(struct ipath_devdata *dd,
498 ipath_err_t errs, char *msg,
499 u32 msgsz, int *noprint)
500{
501 unsigned long nc;
502 static unsigned long nextmsg_time;
503 static unsigned nmsgs, supp_msgs;
504
505 /*
506 * Throttle back "fast" messages to no more than 10 per 5 seconds.
507 * This isn't perfect, but it's a reasonable heuristic. If we get
508 * more than 10, give a 6x longer delay.
509 */
510 nc = jiffies;
511 if (nmsgs > 10) {
512 if (time_before(nc, nextmsg_time)) {
513 *noprint = 1;
514 if (!supp_msgs++)
515 nextmsg_time = nc + HZ * 3;
516 } else if (supp_msgs) {
517 handle_supp_msgs(dd, supp_msgs, msg, msgsz);
518 supp_msgs = 0;
519 nmsgs = 0;
520 }
521 } else if (!nmsgs++ || time_after(nc, nextmsg_time)) {
522 nextmsg_time = nc + HZ / 2;
523 }
524
525 return supp_msgs;
526}
527
528static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
529{
530 unsigned long flags;
531 int expected;
532
533 if (ipath_debug & __IPATH_DBG) {
534 char msg[128];
535 ipath_decode_err(dd, msg, sizeof msg, errs &
536 INFINIPATH_E_SDMAERRS);
537 ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
538 }
539 if (ipath_debug & __IPATH_VERBDBG) {
540 unsigned long tl, hd, status, lengen;
541 tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
542 hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
543 status = ipath_read_kreg64(dd
544 , dd->ipath_kregs->kr_senddmastatus);
545 lengen = ipath_read_kreg64(dd,
546 dd->ipath_kregs->kr_senddmalengen);
547 ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
548 "lengen 0x%lx\n", tl, hd, status, lengen);
549 }
550
551 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
552 __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
553 expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
554 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
555 if (!expected)
556 ipath_cancel_sends(dd, 1);
557}
558
559static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
560{
561 unsigned long flags;
562 int expected;
563
564 if ((istat & INFINIPATH_I_SDMAINT) &&
565 !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
566 ipath_sdma_intr(dd);
567
568 if (istat & INFINIPATH_I_SDMADISABLED) {
569 expected = test_bit(IPATH_SDMA_ABORTING,
570 &dd->ipath_sdma_status);
571 ipath_dbg("%s SDmaDisabled intr\n",
572 expected ? "expected" : "unexpected");
573 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
574 __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
575 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
576 if (!expected)
577 ipath_cancel_sends(dd, 1);
578 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
579 tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
580 }
581}
582
583static int handle_hdrq_full(struct ipath_devdata *dd)
584{
585 int chkerrpkts = 0;
586 u32 hd, tl;
587 u32 i;
588
589 ipath_stats.sps_hdrqfull++;
590 for (i = 0; i < dd->ipath_cfgports; i++) {
591 struct ipath_portdata *pd = dd->ipath_pd[i];
592
593 if (i == 0) {
594 /*
595 * For kernel receive queues, we just want to know
596 * if there are packets in the queue that we can
597 * process.
598 */
599 if (pd->port_head != ipath_get_hdrqtail(pd))
600 chkerrpkts |= 1 << i;
601 continue;
602 }
603
604 /* Skip if user context is not open */
605 if (!pd || !pd->port_cnt)
606 continue;
607
608 /* Don't report the same point multiple times. */
609 if (dd->ipath_flags & IPATH_NODMA_RTAIL)
610 tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
611 else
612 tl = ipath_get_rcvhdrtail(pd);
613 if (tl == pd->port_lastrcvhdrqtail)
614 continue;
615
616 hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
617 if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
618 pd->port_lastrcvhdrqtail = tl;
619 pd->port_hdrqfull++;
620 /* flush hdrqfull so that poll() sees it */
621 wmb();
622 wake_up_interruptible(&pd->port_wait);
623 }
624 }
625
626 return chkerrpkts;
627}
628
629static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
630{
631 char msg[128];
632 u64 ignore_this_time = 0;
633 u64 iserr = 0;
634 int chkerrpkts = 0, noprint = 0;
635 unsigned supp_msgs;
636 int log_idx;
637
638 /*
639 * don't report errors that are masked, either at init
640 * (not set in ipath_errormask), or temporarily (set in
641 * ipath_maskederrs)
642 */
643 errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
644
645 supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
646 &noprint);
647
648 /* do these first, they are most important */
649 if (errs & INFINIPATH_E_HARDWARE) {
650 /* reuse same msg buf */
651 dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
652 } else {
653 u64 mask;
654 for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
655 mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
656 if (errs & mask)
657 ipath_inc_eeprom_err(dd, log_idx, 1);
658 }
659 }
660
661 if (errs & INFINIPATH_E_SDMAERRS)
662 handle_sdma_errors(dd, errs);
663
664 if (!noprint && (errs & ~dd->ipath_e_bitsextant))
665 ipath_dev_err(dd, "error interrupt with unknown errors "
666 "%llx set\n", (unsigned long long)
667 (errs & ~dd->ipath_e_bitsextant));
668
669 if (errs & E_SUM_ERRS)
670 ignore_this_time = handle_e_sum_errs(dd, errs);
671 else if ((errs & E_SUM_LINK_PKTERRS) &&
672 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
673 /*
674 * This can happen when SMA is trying to bring the link
675 * up, but the IB link changes state at the "wrong" time.
676 * The IB logic then complains that the packet isn't
677 * valid. We don't want to confuse people, so we just
678 * don't print them, except at debug
679 */
680 ipath_dbg("Ignoring packet errors %llx, because link not "
681 "ACTIVE\n", (unsigned long long) errs);
682 ignore_this_time = errs & E_SUM_LINK_PKTERRS;
683 }
684
685 if (supp_msgs == 250000) {
686 int s_iserr;
687 /*
688 * It's not entirely reasonable assuming that the errors set
689 * in the last clear period are all responsible for the
690 * problem, but the alternative is to assume it's the only
691 * ones on this particular interrupt, which also isn't great
692 */
693 dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
694
695 dd->ipath_errormask &= ~dd->ipath_maskederrs;
696 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
697 dd->ipath_errormask);
698 s_iserr = ipath_decode_err(dd, msg, sizeof msg,
699 dd->ipath_maskederrs);
700
701 if (dd->ipath_maskederrs &
702 ~(INFINIPATH_E_RRCVEGRFULL |
703 INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
704 ipath_dev_err(dd, "Temporarily disabling "
705 "error(s) %llx reporting; too frequent (%s)\n",
706 (unsigned long long) dd->ipath_maskederrs,
707 msg);
708 else {
709 /*
710 * rcvegrfull and rcvhdrqfull are "normal",
711 * for some types of processes (mostly benchmarks)
712 * that send huge numbers of messages, while not
713 * processing them. So only complain about
714 * these at debug level.
715 */
716 if (s_iserr)
717 ipath_dbg("Temporarily disabling reporting "
718 "too frequent queue full errors (%s)\n",
719 msg);
720 else
721 ipath_cdbg(ERRPKT,
722 "Temporarily disabling reporting too"
723 " frequent packet errors (%s)\n",
724 msg);
725 }
726
727 /*
728 * Re-enable the masked errors after around 3 minutes. in
729 * ipath_get_faststats(). If we have a series of fast
730 * repeating but different errors, the interval will keep
731 * stretching out, but that's OK, as that's pretty
732 * catastrophic.
733 */
734 dd->ipath_unmasktime = jiffies + HZ * 180;
735 }
736
737 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
738 if (ignore_this_time)
739 errs &= ~ignore_this_time;
740 if (errs & ~dd->ipath_lasterror) {
741 errs &= ~dd->ipath_lasterror;
742 /* never suppress duplicate hwerrors or ibstatuschange */
743 dd->ipath_lasterror |= errs &
744 ~(INFINIPATH_E_HARDWARE |
745 INFINIPATH_E_IBSTATUSCHANGED);
746 }
747
748 if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
749 dd->ipath_spectriggerhit++;
750 ipath_dbg("%lu special trigger hits\n",
751 dd->ipath_spectriggerhit);
752 }
753
754 /* likely due to cancel; so suppress message unless verbose */
755 if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
756 time_after(dd->ipath_lastcancel, jiffies)) {
757 /* armlaunch takes precedence; it often causes both. */
758 ipath_cdbg(VERBOSE,
759 "Suppressed %s error (%llx) after sendbuf cancel\n",
760 (errs & INFINIPATH_E_SPIOARMLAUNCH) ?
761 "armlaunch" : "sendpktlen", (unsigned long long)errs);
762 errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
763 }
764
765 if (!errs)
766 return 0;
767
768 if (!noprint) {
769 ipath_err_t mask;
770 /*
771 * The ones we mask off are handled specially below
772 * or above. Also mask SDMADISABLED by default as it
773 * is too chatty.
774 */
775 mask = INFINIPATH_E_IBSTATUSCHANGED |
776 INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
777 INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
778
779 /* if we're in debug, then don't mask SDMADISABLED msgs */
780 if (ipath_debug & __IPATH_DBG)
781 mask &= ~INFINIPATH_E_SDMADISABLED;
782
783 ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
784 } else
785 /* so we don't need if (!noprint) at strlcat's below */
786 *msg = 0;
787
788 if (errs & E_SUM_PKTERRS) {
789 ipath_stats.sps_pkterrs++;
790 chkerrpkts = 1;
791 }
792 if (errs & E_SUM_ERRS)
793 ipath_stats.sps_errs++;
794
795 if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
796 ipath_stats.sps_crcerrs++;
797 chkerrpkts = 1;
798 }
799 iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
800
801
802 /*
803 * We don't want to print these two as they happen, or we can make
804 * the situation even worse, because it takes so long to print
805 * messages to serial consoles. Kernel ports get printed from
806 * fast_stats, no more than every 5 seconds, user ports get printed
807 * on close
808 */
809 if (errs & INFINIPATH_E_RRCVHDRFULL)
810 chkerrpkts |= handle_hdrq_full(dd);
811 if (errs & INFINIPATH_E_RRCVEGRFULL) {
812 struct ipath_portdata *pd = dd->ipath_pd[0];
813
814 /*
815 * since this is of less importance and not likely to
816 * happen without also getting hdrfull, only count
817 * occurrences; don't check each port (or even the kernel
818 * vs user)
819 */
820 ipath_stats.sps_etidfull++;
821 if (pd->port_head != ipath_get_hdrqtail(pd))
822 chkerrpkts |= 1;
823 }
824
825 /*
826 * do this before IBSTATUSCHANGED, in case both bits set in a single
827 * interrupt; we want the STATUSCHANGE to "win", so we do our
828 * internal copy of state machine correctly
829 */
830 if (errs & INFINIPATH_E_RIBLOSTLINK) {
831 /*
832 * force through block below
833 */
834 errs |= INFINIPATH_E_IBSTATUSCHANGED;
835 ipath_stats.sps_iblink++;
836 dd->ipath_flags |= IPATH_LINKDOWN;
837 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
838 | IPATH_LINKARMED | IPATH_LINKACTIVE);
839 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
840
841 ipath_dbg("Lost link, link now down (%s)\n",
842 ipath_ibcstatus_str[ipath_read_kreg64(dd,
843 dd->ipath_kregs->kr_ibcstatus) & 0xf]);
844 }
845 if (errs & INFINIPATH_E_IBSTATUSCHANGED)
846 handle_e_ibstatuschanged(dd, errs);
847
848 if (errs & INFINIPATH_E_RESET) {
849 if (!noprint)
850 ipath_dev_err(dd, "Got reset, requires re-init "
851 "(unload and reload driver)\n");
852 dd->ipath_flags &= ~IPATH_INITTED; /* needs re-init */
853 /* mark as having had error */
854 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
855 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
856 }
857
858 if (!noprint && *msg) {
859 if (iserr)
860 ipath_dev_err(dd, "%s error\n", msg);
861 }
862 if (dd->ipath_state_wanted & dd->ipath_flags) {
863 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
864 "waking\n", dd->ipath_state_wanted,
865 dd->ipath_flags);
866 wake_up_interruptible(&ipath_state_wait);
867 }
868
869 return chkerrpkts;
870}
871
872/*
873 * try to cleanup as much as possible for anything that might have gone
874 * wrong while in freeze mode, such as pio buffers being written by user
875 * processes (causing armlaunch), send errors due to going into freeze mode,
876 * etc., and try to avoid causing extra interrupts while doing so.
877 * Forcibly update the in-memory pioavail register copies after cleanup
878 * because the chip won't do it while in freeze mode (the register values
879 * themselves are kept correct).
880 * Make sure that we don't lose any important interrupts by using the chip
881 * feature that says that writing 0 to a bit in *clear that is set in
882 * *status will cause an interrupt to be generated again (if allowed by
883 * the *mask value).
884 */
885void ipath_clear_freeze(struct ipath_devdata *dd)
886{
887 /* disable error interrupts, to avoid confusion */
888 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
889
890 /* also disable interrupts; errormask is sometimes overwriten */
891 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
892
893 ipath_cancel_sends(dd, 1);
894
895 /* clear the freeze, and be sure chip saw it */
896 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
897 dd->ipath_control);
898 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
899
900 /* force in-memory update now we are out of freeze */
901 ipath_force_pio_avail_update(dd);
902
903 /*
904 * force new interrupt if any hwerr, error or interrupt bits are
905 * still set, and clear "safe" send packet errors related to freeze
906 * and cancelling sends. Re-enable error interrupts before possible
907 * force of re-interrupt on pending interrupts.
908 */
909 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
910 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
911 E_SPKT_ERRS_IGNORE);
912 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
913 dd->ipath_errormask);
914 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
915 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
916}
917
918
919/* this is separate to allow for better optimization of ipath_intr() */
920
921static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
922{
923 /*
924 * sometimes happen during driver init and unload, don't want
925 * to process any interrupts at that point
926 */
927
928 /* this is just a bandaid, not a fix, if something goes badly
929 * wrong */
930 if (++*unexpectp > 100) {
931 if (++*unexpectp > 105) {
932 /*
933 * ok, we must be taking somebody else's interrupts,
934 * due to a messed up mptable and/or PIRQ table, so
935 * unregister the interrupt. We've seen this during
936 * linuxbios development work, and it may happen in
937 * the future again.
938 */
939 if (dd->pcidev && dd->ipath_irq) {
940 ipath_dev_err(dd, "Now %u unexpected "
941 "interrupts, unregistering "
942 "interrupt handler\n",
943 *unexpectp);
944 ipath_dbg("free_irq of irq %d\n",
945 dd->ipath_irq);
946 dd->ipath_f_free_irq(dd);
947 }
948 }
949 if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
950 ipath_dev_err(dd, "%u unexpected interrupts, "
951 "disabling interrupts completely\n",
952 *unexpectp);
953 /*
954 * disable all interrupts, something is very wrong
955 */
956 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
957 0ULL);
958 }
959 } else if (*unexpectp > 1)
960 ipath_dbg("Interrupt when not ready, should not happen, "
961 "ignoring\n");
962}
963
964static noinline void ipath_bad_regread(struct ipath_devdata *dd)
965{
966 static int allbits;
967
968 /* separate routine, for better optimization of ipath_intr() */
969
970 /*
971 * We print the message and disable interrupts, in hope of
972 * having a better chance of debugging the problem.
973 */
974 ipath_dev_err(dd,
975 "Read of interrupt status failed (all bits set)\n");
976 if (allbits++) {
977 /* disable all interrupts, something is very wrong */
978 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
979 if (allbits == 2) {
980 ipath_dev_err(dd, "Still bad interrupt status, "
981 "unregistering interrupt\n");
982 dd->ipath_f_free_irq(dd);
983 } else if (allbits > 2) {
984 if ((allbits % 10000) == 0)
985 printk(".");
986 } else
987 ipath_dev_err(dd, "Disabling interrupts, "
988 "multiple errors\n");
989 }
990}
991
992static void handle_layer_pioavail(struct ipath_devdata *dd)
993{
994 unsigned long flags;
995 int ret;
996
997 ret = ipath_ib_piobufavail(dd->verbs_dev);
998 if (ret > 0)
999 goto set;
1000
1001 return;
1002set:
1003 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1004 dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
1005 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1006 dd->ipath_sendctrl);
1007 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1008 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1009}
1010
1011/*
1012 * Handle receive interrupts for user ports; this means a user
1013 * process was waiting for a packet to arrive, and didn't want
1014 * to poll
1015 */
1016static void handle_urcv(struct ipath_devdata *dd, u64 istat)
1017{
1018 u64 portr;
1019 int i;
1020 int rcvdint = 0;
1021
1022 /*
1023 * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
1024 * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
1025 * would both like timely updates of the bits so that
1026 * we don't pass them by unnecessarily. the rmb()
1027 * here ensures that we see them promptly -- the
1028 * corresponding wmb()'s are in ipath_poll_urgent()
1029 * and ipath_poll_next()...
1030 */
1031 rmb();
1032 portr = ((istat >> dd->ipath_i_rcvavail_shift) &
1033 dd->ipath_i_rcvavail_mask) |
1034 ((istat >> dd->ipath_i_rcvurg_shift) &
1035 dd->ipath_i_rcvurg_mask);
1036 for (i = 1; i < dd->ipath_cfgports; i++) {
1037 struct ipath_portdata *pd = dd->ipath_pd[i];
1038
1039 if (portr & (1 << i) && pd && pd->port_cnt) {
1040 if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
1041 &pd->port_flag)) {
1042 clear_bit(i + dd->ipath_r_intravail_shift,
1043 &dd->ipath_rcvctrl);
1044 wake_up_interruptible(&pd->port_wait);
1045 rcvdint = 1;
1046 } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
1047 &pd->port_flag)) {
1048 pd->port_urgent++;
1049 wake_up_interruptible(&pd->port_wait);
1050 }
1051 }
1052 }
1053 if (rcvdint) {
1054 /* only want to take one interrupt, so turn off the rcv
1055 * interrupt for all the ports that we set the rcv_waiting
1056 * (but never for kernel port)
1057 */
1058 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1059 dd->ipath_rcvctrl);
1060 }
1061}
1062
1063irqreturn_t ipath_intr(int irq, void *data)
1064{
1065 struct ipath_devdata *dd = data;
1066 u64 istat, chk0rcv = 0;
1067 ipath_err_t estat = 0;
1068 irqreturn_t ret;
1069 static unsigned unexpected = 0;
1070 u64 kportrbits;
1071
1072 ipath_stats.sps_ints++;
1073
1074 if (dd->ipath_int_counter != (u32) -1)
1075 dd->ipath_int_counter++;
1076
1077 if (!(dd->ipath_flags & IPATH_PRESENT)) {
1078 /*
1079 * This return value is not great, but we do not want the
1080 * interrupt core code to remove our interrupt handler
1081 * because we don't appear to be handling an interrupt
1082 * during a chip reset.
1083 */
1084 return IRQ_HANDLED;
1085 }
1086
1087 /*
1088 * this needs to be flags&initted, not statusp, so we keep
1089 * taking interrupts even after link goes down, etc.
1090 * Also, we *must* clear the interrupt at some point, or we won't
1091 * take it again, which can be real bad for errors, etc...
1092 */
1093
1094 if (!(dd->ipath_flags & IPATH_INITTED)) {
1095 ipath_bad_intr(dd, &unexpected);
1096 ret = IRQ_NONE;
1097 goto bail;
1098 }
1099
1100 istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
1101
1102 if (unlikely(!istat)) {
1103 ipath_stats.sps_nullintr++;
1104 ret = IRQ_NONE; /* not our interrupt, or already handled */
1105 goto bail;
1106 }
1107 if (unlikely(istat == -1)) {
1108 ipath_bad_regread(dd);
1109 /* don't know if it was our interrupt or not */
1110 ret = IRQ_NONE;
1111 goto bail;
1112 }
1113
1114 if (unexpected)
1115 unexpected = 0;
1116
1117 if (unlikely(istat & ~dd->ipath_i_bitsextant))
1118 ipath_dev_err(dd,
1119 "interrupt with unknown interrupts %Lx set\n",
1120 (unsigned long long)
1121 istat & ~dd->ipath_i_bitsextant);
1122 else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
1123 ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n",
1124 (unsigned long long) istat);
1125
1126 if (istat & INFINIPATH_I_ERROR) {
1127 ipath_stats.sps_errints++;
1128 estat = ipath_read_kreg64(dd,
1129 dd->ipath_kregs->kr_errorstatus);
1130 if (!estat)
1131 dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
1132 "but no error bits set!\n",
1133 (unsigned long long) istat);
1134 else if (estat == -1LL)
1135 /*
1136 * should we try clearing all, or hope next read
1137 * works?
1138 */
1139 ipath_dev_err(dd, "Read of error status failed "
1140 "(all bits set); ignoring\n");
1141 else
1142 chk0rcv |= handle_errors(dd, estat);
1143 }
1144
1145 if (istat & INFINIPATH_I_GPIO) {
1146 /*
1147 * GPIO interrupts fall in two broad classes:
1148 * GPIO_2 indicates (on some HT4xx boards) that a packet
1149 * has arrived for Port 0. Checking for this
1150 * is controlled by flag IPATH_GPIO_INTR.
1151 * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
1152 * errors that we need to count. Checking for this
1153 * is controlled by flag IPATH_GPIO_ERRINTRS.
1154 */
1155 u32 gpiostatus;
1156 u32 to_clear = 0;
1157
1158 gpiostatus = ipath_read_kreg32(
1159 dd, dd->ipath_kregs->kr_gpio_status);
1160 /* First the error-counter case. */
1161 if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
1162 (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
1163 /* want to clear the bits we see asserted. */
1164 to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
1165
1166 /*
1167 * Count appropriately, clear bits out of our copy,
1168 * as they have been "handled".
1169 */
1170 if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
1171 ipath_dbg("FlowCtl on UnsupVL\n");
1172 dd->ipath_rxfc_unsupvl_errs++;
1173 }
1174 if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
1175 ipath_dbg("Overrun Threshold exceeded\n");
1176 dd->ipath_overrun_thresh_errs++;
1177 }
1178 if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
1179 ipath_dbg("Local Link Integrity error\n");
1180 dd->ipath_lli_errs++;
1181 }
1182 gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
1183 }
1184 /* Now the Port0 Receive case */
1185 if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
1186 (dd->ipath_flags & IPATH_GPIO_INTR)) {
1187 /*
1188 * GPIO status bit 2 is set, and we expected it.
1189 * clear it and indicate in p0bits.
1190 * This probably only happens if a Port0 pkt
1191 * arrives at _just_ the wrong time, and we
1192 * handle that by seting chk0rcv;
1193 */
1194 to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
1195 gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
1196 chk0rcv = 1;
1197 }
1198 if (gpiostatus) {
1199 /*
1200 * Some unexpected bits remain. If they could have
1201 * caused the interrupt, complain and clear.
1202 * To avoid repetition of this condition, also clear
1203 * the mask. It is almost certainly due to error.
1204 */
1205 const u32 mask = (u32) dd->ipath_gpio_mask;
1206
1207 if (mask & gpiostatus) {
1208 ipath_dbg("Unexpected GPIO IRQ bits %x\n",
1209 gpiostatus & mask);
1210 to_clear |= (gpiostatus & mask);
1211 dd->ipath_gpio_mask &= ~(gpiostatus & mask);
1212 ipath_write_kreg(dd,
1213 dd->ipath_kregs->kr_gpio_mask,
1214 dd->ipath_gpio_mask);
1215 }
1216 }
1217 if (to_clear) {
1218 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
1219 (u64) to_clear);
1220 }
1221 }
1222
1223 /*
1224 * Clear the interrupt bits we found set, unless they are receive
1225 * related, in which case we already cleared them above, and don't
1226 * want to clear them again, because we might lose an interrupt.
1227 * Clear it early, so we "know" know the chip will have seen this by
1228 * the time we process the queue, and will re-interrupt if necessary.
1229 * The processor itself won't take the interrupt again until we return.
1230 */
1231 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
1232
1233 /*
1234 * Handle kernel receive queues before checking for pio buffers
1235 * available since receives can overflow; piobuf waiters can afford
1236 * a few extra cycles, since they were waiting anyway, and user's
1237 * waiting for receive are at the bottom.
1238 */
1239 kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
1240 (1ULL << dd->ipath_i_rcvurg_shift);
1241 if (chk0rcv || (istat & kportrbits)) {
1242 istat &= ~kportrbits;
1243 ipath_kreceive(dd->ipath_pd[0]);
1244 }
1245
1246 if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
1247 (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
1248 handle_urcv(dd, istat);
1249
1250 if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
1251 handle_sdma_intr(dd, istat);
1252
1253 if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
1254 unsigned long flags;
1255
1256 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1257 dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
1258 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1259 dd->ipath_sendctrl);
1260 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1261 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1262
1263 /* always process; sdma verbs uses PIO for acks and VL15 */
1264 handle_layer_pioavail(dd);
1265 }
1266
1267 ret = IRQ_HANDLED;
1268
1269bail:
1270 return ret;
1271}
diff --git a/drivers/staging/rdma/ipath/ipath_kernel.h b/drivers/staging/rdma/ipath/ipath_kernel.h
deleted file mode 100644
index 66c934a5f839..000000000000
--- a/drivers/staging/rdma/ipath/ipath_kernel.h
+++ /dev/null
@@ -1,1374 +0,0 @@
1#ifndef _IPATH_KERNEL_H
2#define _IPATH_KERNEL_H
3/*
4 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
5 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36/*
37 * This header file is the base header file for infinipath kernel code
38 * ipath_user.h serves a similar purpose for user code.
39 */
40
41#include <linux/interrupt.h>
42#include <linux/pci.h>
43#include <linux/dma-mapping.h>
44#include <linux/mutex.h>
45#include <linux/list.h>
46#include <linux/scatterlist.h>
47#include <linux/sched.h>
48#include <asm/io.h>
49#include <rdma/ib_verbs.h>
50
51#include "ipath_common.h"
52#include "ipath_debug.h"
53#include "ipath_registers.h"
54
55/* only s/w major version of InfiniPath we can handle */
56#define IPATH_CHIP_VERS_MAJ 2U
57
58/* don't care about this except printing */
59#define IPATH_CHIP_VERS_MIN 0U
60
61/* temporary, maybe always */
62extern struct infinipath_stats ipath_stats;
63
64#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
65/*
66 * First-cut critierion for "device is active" is
67 * two thousand dwords combined Tx, Rx traffic per
68 * 5-second interval. SMA packets are 64 dwords,
69 * and occur "a few per second", presumably each way.
70 */
71#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
72/*
73 * Struct used to indicate which errors are logged in each of the
74 * error-counters that are logged to EEPROM. A counter is incremented
75 * _once_ (saturating at 255) for each event with any bits set in
76 * the error or hwerror register masks below.
77 */
78#define IPATH_EEP_LOG_CNT (4)
79struct ipath_eep_log_mask {
80 u64 errs_to_log;
81 u64 hwerrs_to_log;
82};
83
84struct ipath_portdata {
85 void **port_rcvegrbuf;
86 dma_addr_t *port_rcvegrbuf_phys;
87 /* rcvhdrq base, needs mmap before useful */
88 void *port_rcvhdrq;
89 /* kernel virtual address where hdrqtail is updated */
90 void *port_rcvhdrtail_kvaddr;
91 /*
92 * temp buffer for expected send setup, allocated at open, instead
93 * of each setup call
94 */
95 void *port_tid_pg_list;
96 /* when waiting for rcv or pioavail */
97 wait_queue_head_t port_wait;
98 /*
99 * rcvegr bufs base, physical, must fit
100 * in 44 bits so 32 bit programs mmap64 44 bit works)
101 */
102 dma_addr_t port_rcvegr_phys;
103 /* mmap of hdrq, must fit in 44 bits */
104 dma_addr_t port_rcvhdrq_phys;
105 dma_addr_t port_rcvhdrqtailaddr_phys;
106 /*
107 * number of opens (including slave subports) on this instance
108 * (ignoring forks, dup, etc. for now)
109 */
110 int port_cnt;
111 /*
112 * how much space to leave at start of eager TID entries for
113 * protocol use, on each TID
114 */
115 /* instead of calculating it */
116 unsigned port_port;
117 /* non-zero if port is being shared. */
118 u16 port_subport_cnt;
119 /* non-zero if port is being shared. */
120 u16 port_subport_id;
121 /* number of pio bufs for this port (all procs, if shared) */
122 u32 port_piocnt;
123 /* first pio buffer for this port */
124 u32 port_pio_base;
125 /* chip offset of PIO buffers for this port */
126 u32 port_piobufs;
127 /* how many alloc_pages() chunks in port_rcvegrbuf_pages */
128 u32 port_rcvegrbuf_chunks;
129 /* how many egrbufs per chunk */
130 u32 port_rcvegrbufs_perchunk;
131 /* order for port_rcvegrbuf_pages */
132 size_t port_rcvegrbuf_size;
133 /* rcvhdrq size (for freeing) */
134 size_t port_rcvhdrq_size;
135 /* next expected TID to check when looking for free */
136 u32 port_tidcursor;
137 /* next expected TID to check */
138 unsigned long port_flag;
139 /* what happened */
140 unsigned long int_flag;
141 /* WAIT_RCV that timed out, no interrupt */
142 u32 port_rcvwait_to;
143 /* WAIT_PIO that timed out, no interrupt */
144 u32 port_piowait_to;
145 /* WAIT_RCV already happened, no wait */
146 u32 port_rcvnowait;
147 /* WAIT_PIO already happened, no wait */
148 u32 port_pionowait;
149 /* total number of rcvhdrqfull errors */
150 u32 port_hdrqfull;
151 /*
152 * Used to suppress multiple instances of same
153 * port staying stuck at same point.
154 */
155 u32 port_lastrcvhdrqtail;
156 /* saved total number of rcvhdrqfull errors for poll edge trigger */
157 u32 port_hdrqfull_poll;
158 /* total number of polled urgent packets */
159 u32 port_urgent;
160 /* saved total number of polled urgent packets for poll edge trigger */
161 u32 port_urgent_poll;
162 /* pid of process using this port */
163 struct pid *port_pid;
164 struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
165 /* same size as task_struct .comm[] */
166 char port_comm[TASK_COMM_LEN];
167 /* pkeys set by this use of this port */
168 u16 port_pkeys[4];
169 /* so file ops can get at unit */
170 struct ipath_devdata *port_dd;
171 /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
172 void *subport_uregbase;
173 /* An array of pages for the eager receive buffers * N */
174 void *subport_rcvegrbuf;
175 /* An array of pages for the eager header queue entries * N */
176 void *subport_rcvhdr_base;
177 /* The version of the library which opened this port */
178 u32 userversion;
179 /* Bitmask of active slaves */
180 u32 active_slaves;
181 /* Type of packets or conditions we want to poll for */
182 u16 poll_type;
183 /* port rcvhdrq head offset */
184 u32 port_head;
185 /* receive packet sequence counter */
186 u32 port_seq_cnt;
187};
188
189struct sk_buff;
190struct ipath_sge_state;
191struct ipath_verbs_txreq;
192
193/*
194 * control information for layered drivers
195 */
196struct _ipath_layer {
197 void *l_arg;
198};
199
200struct ipath_skbinfo {
201 struct sk_buff *skb;
202 dma_addr_t phys;
203};
204
205struct ipath_sdma_txreq {
206 int flags;
207 int sg_count;
208 union {
209 struct scatterlist *sg;
210 void *map_addr;
211 };
212 void (*callback)(void *, int);
213 void *callback_cookie;
214 int callback_status;
215 u16 start_idx; /* sdma private */
216 u16 next_descq_idx; /* sdma private */
217 struct list_head list; /* sdma private */
218};
219
220struct ipath_sdma_desc {
221 __le64 qw[2];
222};
223
224#define IPATH_SDMA_TXREQ_F_USELARGEBUF 0x1
225#define IPATH_SDMA_TXREQ_F_HEADTOHOST 0x2
226#define IPATH_SDMA_TXREQ_F_INTREQ 0x4
227#define IPATH_SDMA_TXREQ_F_FREEBUF 0x8
228#define IPATH_SDMA_TXREQ_F_FREEDESC 0x10
229#define IPATH_SDMA_TXREQ_F_VL15 0x20
230
231#define IPATH_SDMA_TXREQ_S_OK 0
232#define IPATH_SDMA_TXREQ_S_SENDERROR 1
233#define IPATH_SDMA_TXREQ_S_ABORTED 2
234#define IPATH_SDMA_TXREQ_S_SHUTDOWN 3
235
236#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63)
237#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62)
238#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61)
239#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30)
240
241/* max dwords in small buffer packet */
242#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
243
244/*
245 * Possible IB config parameters for ipath_f_get/set_ib_cfg()
246 */
247#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */
248#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */
249#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */
250#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */
251#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */
252#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */
253#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */
254#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */
255#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */
256#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */
257#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */
258
259
260struct ipath_devdata {
261 struct list_head ipath_list;
262
263 struct ipath_kregs const *ipath_kregs;
264 struct ipath_cregs const *ipath_cregs;
265
266 /* mem-mapped pointer to base of chip regs */
267 u64 __iomem *ipath_kregbase;
268 /* end of mem-mapped chip space; range checking */
269 u64 __iomem *ipath_kregend;
270 /* physical address of chip for io_remap, etc. */
271 unsigned long ipath_physaddr;
272 /* base of memory alloced for ipath_kregbase, for free */
273 u64 *ipath_kregalloc;
274 /* ipath_cfgports pointers */
275 struct ipath_portdata **ipath_pd;
276 /* sk_buffs used by port 0 eager receive queue */
277 struct ipath_skbinfo *ipath_port0_skbinfo;
278 /* kvirt address of 1st 2k pio buffer */
279 void __iomem *ipath_pio2kbase;
280 /* kvirt address of 1st 4k pio buffer */
281 void __iomem *ipath_pio4kbase;
282 /*
283 * points to area where PIOavail registers will be DMA'ed.
284 * Has to be on a page of it's own, because the page will be
285 * mapped into user program space. This copy is *ONLY* ever
286 * written by DMA, not by the driver! Need a copy per device
287 * when we get to multiple devices
288 */
289 volatile __le64 *ipath_pioavailregs_dma;
290 /* physical address where updates occur */
291 dma_addr_t ipath_pioavailregs_phys;
292 struct _ipath_layer ipath_layer;
293 /* setup intr */
294 int (*ipath_f_intrsetup)(struct ipath_devdata *);
295 /* fallback to alternate interrupt type if possible */
296 int (*ipath_f_intr_fallback)(struct ipath_devdata *);
297 /* setup on-chip bus config */
298 int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
299 /* hard reset chip */
300 int (*ipath_f_reset)(struct ipath_devdata *);
301 int (*ipath_f_get_boardname)(struct ipath_devdata *, char *,
302 size_t);
303 void (*ipath_f_init_hwerrors)(struct ipath_devdata *);
304 void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *,
305 size_t);
306 void (*ipath_f_quiet_serdes)(struct ipath_devdata *);
307 int (*ipath_f_bringup_serdes)(struct ipath_devdata *);
308 int (*ipath_f_early_init)(struct ipath_devdata *);
309 void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned);
310 void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*,
311 u32, unsigned long);
312 void (*ipath_f_tidtemplate)(struct ipath_devdata *);
313 void (*ipath_f_cleanup)(struct ipath_devdata *);
314 void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
315 /* fill out chip-specific fields */
316 int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
317 /* free irq */
318 void (*ipath_f_free_irq)(struct ipath_devdata *);
319 struct ipath_message_header *(*ipath_f_get_msgheader)
320 (struct ipath_devdata *, __le32 *);
321 void (*ipath_f_config_ports)(struct ipath_devdata *, ushort);
322 int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int);
323 int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32);
324 void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16);
325 void (*ipath_f_read_counters)(struct ipath_devdata *,
326 struct infinipath_counters *);
327 void (*ipath_f_xgxs_reset)(struct ipath_devdata *);
328 /* per chip actions needed for IB Link up/down changes */
329 int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64);
330
331 unsigned ipath_lastegr_idx;
332 struct ipath_ibdev *verbs_dev;
333 struct timer_list verbs_timer;
334 /* total dwords sent (summed from counter) */
335 u64 ipath_sword;
336 /* total dwords rcvd (summed from counter) */
337 u64 ipath_rword;
338 /* total packets sent (summed from counter) */
339 u64 ipath_spkts;
340 /* total packets rcvd (summed from counter) */
341 u64 ipath_rpkts;
342 /* ipath_statusp initially points to this. */
343 u64 _ipath_status;
344 /* GUID for this interface, in network order */
345 __be64 ipath_guid;
346 /*
347 * aggregrate of error bits reported since last cleared, for
348 * limiting of error reporting
349 */
350 ipath_err_t ipath_lasterror;
351 /*
352 * aggregrate of error bits reported since last cleared, for
353 * limiting of hwerror reporting
354 */
355 ipath_err_t ipath_lasthwerror;
356 /* errors masked because they occur too fast */
357 ipath_err_t ipath_maskederrs;
358 u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
359 /* these 5 fields are used to establish deltas for IB Symbol
360 * errors and linkrecovery errors. They can be reported on
361 * some chips during link negotiation prior to INIT, and with
362 * DDR when faking DDR negotiations with non-IBTA switches.
363 * The chip counters are adjusted at driver unload if there is
364 * a non-zero delta.
365 */
366 u64 ibdeltainprog;
367 u64 ibsymdelta;
368 u64 ibsymsnap;
369 u64 iblnkerrdelta;
370 u64 iblnkerrsnap;
371
372 /* time in jiffies at which to re-enable maskederrs */
373 unsigned long ipath_unmasktime;
374 /* count of egrfull errors, combined for all ports */
375 u64 ipath_last_tidfull;
376 /* for ipath_qcheck() */
377 u64 ipath_lastport0rcv_cnt;
378 /* template for writing TIDs */
379 u64 ipath_tidtemplate;
380 /* value to write to free TIDs */
381 u64 ipath_tidinvalid;
382 /* IBA6120 rcv interrupt setup */
383 u64 ipath_rhdrhead_intr_off;
384
385 /* size of memory at ipath_kregbase */
386 u32 ipath_kregsize;
387 /* number of registers used for pioavail */
388 u32 ipath_pioavregs;
389 /* IPATH_POLL, etc. */
390 u32 ipath_flags;
391 /* ipath_flags driver is waiting for */
392 u32 ipath_state_wanted;
393 /* last buffer for user use, first buf for kernel use is this
394 * index. */
395 u32 ipath_lastport_piobuf;
396 /* is a stats timer active */
397 u32 ipath_stats_timer_active;
398 /* number of interrupts for this device -- saturates... */
399 u32 ipath_int_counter;
400 /* dwords sent read from counter */
401 u32 ipath_lastsword;
402 /* dwords received read from counter */
403 u32 ipath_lastrword;
404 /* sent packets read from counter */
405 u32 ipath_lastspkts;
406 /* received packets read from counter */
407 u32 ipath_lastrpkts;
408 /* pio bufs allocated per port */
409 u32 ipath_pbufsport;
410 /* if remainder on bufs/port, ports < extrabuf get 1 extra */
411 u32 ipath_ports_extrabuf;
412 u32 ipath_pioupd_thresh; /* update threshold, some chips */
413 /*
414 * number of ports configured as max; zero is set to number chip
415 * supports, less gives more pio bufs/port, etc.
416 */
417 u32 ipath_cfgports;
418 /* count of port 0 hdrqfull errors */
419 u32 ipath_p0_hdrqfull;
420 /* port 0 number of receive eager buffers */
421 u32 ipath_p0_rcvegrcnt;
422
423 /*
424 * index of last piobuffer we used. Speeds up searching, by
425 * starting at this point. Doesn't matter if multiple cpu's use and
426 * update, last updater is only write that matters. Whenever it
427 * wraps, we update shadow copies. Need a copy per device when we
428 * get to multiple devices
429 */
430 u32 ipath_lastpioindex;
431 u32 ipath_lastpioindexl;
432 /* max length of freezemsg */
433 u32 ipath_freezelen;
434 /*
435 * consecutive times we wanted a PIO buffer but were unable to
436 * get one
437 */
438 u32 ipath_consec_nopiobuf;
439 /*
440 * hint that we should update ipath_pioavailshadow before
441 * looking for a PIO buffer
442 */
443 u32 ipath_upd_pio_shadow;
444 /* so we can rewrite it after a chip reset */
445 u32 ipath_pcibar0;
446 /* so we can rewrite it after a chip reset */
447 u32 ipath_pcibar1;
448 u32 ipath_x1_fix_tries;
449 u32 ipath_autoneg_tries;
450 u32 serdes_first_init_done;
451
452 struct ipath_relock {
453 atomic_t ipath_relock_timer_active;
454 struct timer_list ipath_relock_timer;
455 unsigned int ipath_relock_interval; /* in jiffies */
456 } ipath_relock_singleton;
457
458 /* interrupt number */
459 int ipath_irq;
460 /* HT/PCI Vendor ID (here for NodeInfo) */
461 u16 ipath_vendorid;
462 /* HT/PCI Device ID (here for NodeInfo) */
463 u16 ipath_deviceid;
464 /* offset in HT config space of slave/primary interface block */
465 u8 ipath_ht_slave_off;
466 /* for write combining settings */
467 int wc_cookie;
468 /* ref count for each pkey */
469 atomic_t ipath_pkeyrefs[4];
470 /* shadow copy of struct page *'s for exp tid pages */
471 struct page **ipath_pageshadow;
472 /* shadow copy of dma handles for exp tid pages */
473 dma_addr_t *ipath_physshadow;
474 u64 __iomem *ipath_egrtidbase;
475 /* lock to workaround chip bug 9437 and others */
476 spinlock_t ipath_kernel_tid_lock;
477 spinlock_t ipath_user_tid_lock;
478 spinlock_t ipath_sendctrl_lock;
479 /* around ipath_pd and (user ports) port_cnt use (intr vs free) */
480 spinlock_t ipath_uctxt_lock;
481
482 /*
483 * IPATH_STATUS_*,
484 * this address is mapped readonly into user processes so they can
485 * get status cheaply, whenever they want.
486 */
487 u64 *ipath_statusp;
488 /* freeze msg if hw error put chip in freeze */
489 char *ipath_freezemsg;
490 /* pci access data structure */
491 struct pci_dev *pcidev;
492 struct cdev *user_cdev;
493 struct cdev *diag_cdev;
494 struct device *user_dev;
495 struct device *diag_dev;
496 /* timer used to prevent stats overflow, error throttling, etc. */
497 struct timer_list ipath_stats_timer;
498 /* timer to verify interrupts work, and fallback if possible */
499 struct timer_list ipath_intrchk_timer;
500 void *ipath_dummy_hdrq; /* used after port close */
501 dma_addr_t ipath_dummy_hdrq_phys;
502
503 /* SendDMA related entries */
504 spinlock_t ipath_sdma_lock;
505 unsigned long ipath_sdma_status;
506 unsigned long ipath_sdma_abort_jiffies;
507 unsigned long ipath_sdma_abort_intr_timeout;
508 unsigned long ipath_sdma_buf_jiffies;
509 struct ipath_sdma_desc *ipath_sdma_descq;
510 u64 ipath_sdma_descq_added;
511 u64 ipath_sdma_descq_removed;
512 int ipath_sdma_desc_nreserved;
513 u16 ipath_sdma_descq_cnt;
514 u16 ipath_sdma_descq_tail;
515 u16 ipath_sdma_descq_head;
516 u16 ipath_sdma_next_intr;
517 u16 ipath_sdma_reset_wait;
518 u8 ipath_sdma_generation;
519 struct tasklet_struct ipath_sdma_abort_task;
520 struct tasklet_struct ipath_sdma_notify_task;
521 struct list_head ipath_sdma_activelist;
522 struct list_head ipath_sdma_notifylist;
523 atomic_t ipath_sdma_vl15_count;
524 struct timer_list ipath_sdma_vl15_timer;
525
526 dma_addr_t ipath_sdma_descq_phys;
527 volatile __le64 *ipath_sdma_head_dma;
528 dma_addr_t ipath_sdma_head_phys;
529
530 unsigned long ipath_ureg_align; /* user register alignment */
531
532 struct delayed_work ipath_autoneg_work;
533 wait_queue_head_t ipath_autoneg_wait;
534
535 /* HoL blocking / user app forward-progress state */
536 unsigned ipath_hol_state;
537 unsigned ipath_hol_next;
538 struct timer_list ipath_hol_timer;
539
540 /*
541 * Shadow copies of registers; size indicates read access size.
542 * Most of them are readonly, but some are write-only register,
543 * where we manipulate the bits in the shadow copy, and then write
544 * the shadow copy to infinipath.
545 *
546 * We deliberately make most of these 32 bits, since they have
547 * restricted range. For any that we read, we won't to generate 32
548 * bit accesses, since Opteron will generate 2 separate 32 bit HT
549 * transactions for a 64 bit read, and we want to avoid unnecessary
550 * HT transactions.
551 */
552
553 /* This is the 64 bit group */
554
555 /*
556 * shadow of pioavail, check to be sure it's large enough at
557 * init time.
558 */
559 unsigned long ipath_pioavailshadow[8];
560 /* bitmap of send buffers available for the kernel to use with PIO. */
561 unsigned long ipath_pioavailkernel[8];
562 /* shadow of kr_gpio_out, for rmw ops */
563 u64 ipath_gpio_out;
564 /* shadow the gpio mask register */
565 u64 ipath_gpio_mask;
566 /* shadow the gpio output enable, etc... */
567 u64 ipath_extctrl;
568 /* kr_revision shadow */
569 u64 ipath_revision;
570 /*
571 * shadow of ibcctrl, for interrupt handling of link changes,
572 * etc.
573 */
574 u64 ipath_ibcctrl;
575 /*
576 * last ibcstatus, to suppress "duplicate" status change messages,
577 * mostly from 2 to 3
578 */
579 u64 ipath_lastibcstat;
580 /* hwerrmask shadow */
581 ipath_err_t ipath_hwerrmask;
582 ipath_err_t ipath_errormask; /* errormask shadow */
583 /* interrupt config reg shadow */
584 u64 ipath_intconfig;
585 /* kr_sendpiobufbase value */
586 u64 ipath_piobufbase;
587 /* kr_ibcddrctrl shadow */
588 u64 ipath_ibcddrctrl;
589
590 /* these are the "32 bit" regs */
591
592 /*
593 * number of GUIDs in the flash for this interface; may need some
594 * rethinking for setting on other ifaces
595 */
596 u32 ipath_nguid;
597 /*
598 * the following two are 32-bit bitmasks, but {test,clear,set}_bit
599 * all expect bit fields to be "unsigned long"
600 */
601 /* shadow kr_rcvctrl */
602 unsigned long ipath_rcvctrl;
603 /* shadow kr_sendctrl */
604 unsigned long ipath_sendctrl;
605 /* to not count armlaunch after cancel */
606 unsigned long ipath_lastcancel;
607 /* count cases where special trigger was needed (double write) */
608 unsigned long ipath_spectriggerhit;
609
610 /* value we put in kr_rcvhdrcnt */
611 u32 ipath_rcvhdrcnt;
612 /* value we put in kr_rcvhdrsize */
613 u32 ipath_rcvhdrsize;
614 /* value we put in kr_rcvhdrentsize */
615 u32 ipath_rcvhdrentsize;
616 /* offset of last entry in rcvhdrq */
617 u32 ipath_hdrqlast;
618 /* kr_portcnt value */
619 u32 ipath_portcnt;
620 /* kr_pagealign value */
621 u32 ipath_palign;
622 /* number of "2KB" PIO buffers */
623 u32 ipath_piobcnt2k;
624 /* size in bytes of "2KB" PIO buffers */
625 u32 ipath_piosize2k;
626 /* number of "4KB" PIO buffers */
627 u32 ipath_piobcnt4k;
628 /* size in bytes of "4KB" PIO buffers */
629 u32 ipath_piosize4k;
630 u32 ipath_pioreserved; /* reserved special-inkernel; */
631 /* kr_rcvegrbase value */
632 u32 ipath_rcvegrbase;
633 /* kr_rcvegrcnt value */
634 u32 ipath_rcvegrcnt;
635 /* kr_rcvtidbase value */
636 u32 ipath_rcvtidbase;
637 /* kr_rcvtidcnt value */
638 u32 ipath_rcvtidcnt;
639 /* kr_sendregbase */
640 u32 ipath_sregbase;
641 /* kr_userregbase */
642 u32 ipath_uregbase;
643 /* kr_counterregbase */
644 u32 ipath_cregbase;
645 /* shadow the control register contents */
646 u32 ipath_control;
647 /* PCI revision register (HTC rev on FPGA) */
648 u32 ipath_pcirev;
649
650 /* chip address space used by 4k pio buffers */
651 u32 ipath_4kalign;
652 /* The MTU programmed for this unit */
653 u32 ipath_ibmtu;
654 /*
655 * The max size IB packet, included IB headers that we can send.
656 * Starts same as ipath_piosize, but is affected when ibmtu is
657 * changed, or by size of eager buffers
658 */
659 u32 ipath_ibmaxlen;
660 /*
661 * ibmaxlen at init time, limited by chip and by receive buffer
662 * size. Not changed after init.
663 */
664 u32 ipath_init_ibmaxlen;
665 /* size of each rcvegrbuffer */
666 u32 ipath_rcvegrbufsize;
667 /* localbus width (1, 2,4,8,16,32) from config space */
668 u32 ipath_lbus_width;
669 /* localbus speed (HT: 200,400,800,1000; PCIe 2500) */
670 u32 ipath_lbus_speed;
671 /*
672 * number of sequential ibcstatus change for polling active/quiet
673 * (i.e., link not coming up).
674 */
675 u32 ipath_ibpollcnt;
676 /* low and high portions of MSI capability/vector */
677 u32 ipath_msi_lo;
678 /* saved after PCIe init for restore after reset */
679 u32 ipath_msi_hi;
680 /* MSI data (vector) saved for restore */
681 u16 ipath_msi_data;
682 /* MLID programmed for this instance */
683 u16 ipath_mlid;
684 /* LID programmed for this instance */
685 u16 ipath_lid;
686 /* list of pkeys programmed; 0 if not set */
687 u16 ipath_pkeys[4];
688 /*
689 * ASCII serial number, from flash, large enough for original
690 * all digit strings, and longer QLogic serial number format
691 */
692 u8 ipath_serial[16];
693 /* human readable board version */
694 u8 ipath_boardversion[96];
695 u8 ipath_lbus_info[32]; /* human readable localbus info */
696 /* chip major rev, from ipath_revision */
697 u8 ipath_majrev;
698 /* chip minor rev, from ipath_revision */
699 u8 ipath_minrev;
700 /* board rev, from ipath_revision */
701 u8 ipath_boardrev;
702 /* saved for restore after reset */
703 u8 ipath_pci_cacheline;
704 /* LID mask control */
705 u8 ipath_lmc;
706 /* link width supported */
707 u8 ipath_link_width_supported;
708 /* link speed supported */
709 u8 ipath_link_speed_supported;
710 u8 ipath_link_width_enabled;
711 u8 ipath_link_speed_enabled;
712 u8 ipath_link_width_active;
713 u8 ipath_link_speed_active;
714 /* Rx Polarity inversion (compensate for ~tx on partner) */
715 u8 ipath_rx_pol_inv;
716
717 u8 ipath_r_portenable_shift;
718 u8 ipath_r_intravail_shift;
719 u8 ipath_r_tailupd_shift;
720 u8 ipath_r_portcfg_shift;
721
722 /* unit # of this chip, if present */
723 int ipath_unit;
724
725 /* local link integrity counter */
726 u32 ipath_lli_counter;
727 /* local link integrity errors */
728 u32 ipath_lli_errors;
729 /*
730 * Above counts only cases where _successive_ LocalLinkIntegrity
731 * errors were seen in the receive headers of kern-packets.
732 * Below are the three (monotonically increasing) counters
733 * maintained via GPIO interrupts on iba6120-rev2.
734 */
735 u32 ipath_rxfc_unsupvl_errs;
736 u32 ipath_overrun_thresh_errs;
737 u32 ipath_lli_errs;
738
739 /*
740 * Not all devices managed by a driver instance are the same
741 * type, so these fields must be per-device.
742 */
743 u64 ipath_i_bitsextant;
744 ipath_err_t ipath_e_bitsextant;
745 ipath_err_t ipath_hwe_bitsextant;
746
747 /*
748 * Below should be computable from number of ports,
749 * since they are never modified.
750 */
751 u64 ipath_i_rcvavail_mask;
752 u64 ipath_i_rcvurg_mask;
753 u16 ipath_i_rcvurg_shift;
754 u16 ipath_i_rcvavail_shift;
755
756 /*
757 * Register bits for selecting i2c direction and values, used for
758 * I2C serial flash.
759 */
760 u8 ipath_gpio_sda_num;
761 u8 ipath_gpio_scl_num;
762 u8 ipath_i2c_chain_type;
763 u64 ipath_gpio_sda;
764 u64 ipath_gpio_scl;
765
766 /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
767 spinlock_t ipath_gpio_lock;
768
769 /*
770 * IB link and linktraining states and masks that vary per chip in
771 * some way. Set at init, to avoid each IB status change interrupt
772 */
773 u8 ibcs_ls_shift;
774 u8 ibcs_lts_mask;
775 u32 ibcs_mask;
776 u32 ib_init;
777 u32 ib_arm;
778 u32 ib_active;
779
780 u16 ipath_rhf_offset; /* offset of RHF within receive header entry */
781
782 /*
783 * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol
784 * reg. Changes for IBA7220
785 */
786 u8 ibcc_lic_mask; /* LinkInitCmd */
787 u8 ibcc_lc_shift; /* LinkCmd */
788 u8 ibcc_mpl_shift; /* Maxpktlen */
789
790 u8 delay_mult;
791
792 /* used to override LED behavior */
793 u8 ipath_led_override; /* Substituted for normal value, if non-zero */
794 u16 ipath_led_override_timeoff; /* delta to next timer event */
795 u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
796 u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
797 atomic_t ipath_led_override_timer_active;
798 /* Used to flash LEDs in override mode */
799 struct timer_list ipath_led_override_timer;
800
801 /* Support (including locks) for EEPROM logging of errors and time */
802 /* control access to actual counters, timer */
803 spinlock_t ipath_eep_st_lock;
804 /* control high-level access to EEPROM */
805 struct mutex ipath_eep_lock;
806 /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
807 uint64_t ipath_traffic_wds;
808 /* active time is kept in seconds, but logged in hours */
809 atomic_t ipath_active_time;
810 /* Below are nominal shadow of EEPROM, new since last EEPROM update */
811 uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
812 uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
813 uint16_t ipath_eep_hrs;
814 /*
815 * masks for which bits of errs, hwerrs that cause
816 * each of the counters to increment.
817 */
818 struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
819
820 /* interrupt mitigation reload register info */
821 u16 ipath_jint_idle_ticks; /* idle clock ticks */
822 u16 ipath_jint_max_packets; /* max packets across all ports */
823
824 /*
825 * lock for access to SerDes, and flags to sequence preset
826 * versus steady-state. 7220-only at the moment.
827 */
828 spinlock_t ipath_sdepb_lock;
829 u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */
830};
831
832/* ipath_hol_state values (stopping/starting user proc, send flushing) */
833#define IPATH_HOL_UP 0
834#define IPATH_HOL_DOWN 1
835/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
836#define IPATH_HOL_DOWNSTOP 0
837#define IPATH_HOL_DOWNCONT 1
838
839/* bit positions for sdma_status */
840#define IPATH_SDMA_ABORTING 0
841#define IPATH_SDMA_DISARMED 1
842#define IPATH_SDMA_DISABLED 2
843#define IPATH_SDMA_LAYERBUF 3
844#define IPATH_SDMA_RUNNING 30
845#define IPATH_SDMA_SHUTDOWN 31
846
847/* bit combinations that correspond to abort states */
848#define IPATH_SDMA_ABORT_NONE 0
849#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING)
850#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \
851 (1UL << IPATH_SDMA_DISARMED))
852#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \
853 (1UL << IPATH_SDMA_DISABLED))
854#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \
855 (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
856#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \
857 (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
858
859#define IPATH_SDMA_BUF_NONE 0
860#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF)
861
862/* Private data for file operations */
863struct ipath_filedata {
864 struct ipath_portdata *pd;
865 unsigned subport;
866 unsigned tidcursor;
867 struct ipath_user_sdma_queue *pq;
868};
869extern struct list_head ipath_dev_list;
870extern spinlock_t ipath_devs_lock;
871extern struct ipath_devdata *ipath_lookup(int unit);
872
873int ipath_init_chip(struct ipath_devdata *, int);
874int ipath_enable_wc(struct ipath_devdata *dd);
875void ipath_disable_wc(struct ipath_devdata *dd);
876int ipath_count_units(int *npresentp, int *nupp, int *maxportsp);
877void ipath_shutdown_device(struct ipath_devdata *);
878void ipath_clear_freeze(struct ipath_devdata *);
879
880struct file_operations;
881int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
882 struct cdev **cdevp, struct device **devp);
883void ipath_cdev_cleanup(struct cdev **cdevp,
884 struct device **devp);
885
886int ipath_diag_add(struct ipath_devdata *);
887void ipath_diag_remove(struct ipath_devdata *);
888
889extern wait_queue_head_t ipath_state_wait;
890
891int ipath_user_add(struct ipath_devdata *dd);
892void ipath_user_remove(struct ipath_devdata *dd);
893
894struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
895
896extern int ipath_diag_inuse;
897
898irqreturn_t ipath_intr(int irq, void *devid);
899int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
900 ipath_err_t err);
901#if __IPATH_INFO || __IPATH_DBG
902extern const char *ipath_ibcstatus_str[];
903#endif
904
905/* clean up any per-chip chip-specific stuff */
906void ipath_chip_cleanup(struct ipath_devdata *);
907/* clean up any chip type-specific stuff */
908void ipath_chip_done(void);
909
910void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
911 unsigned cnt);
912void ipath_cancel_sends(struct ipath_devdata *, int);
913
914int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
915void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
916
917int ipath_parse_ushort(const char *str, unsigned short *valp);
918
919void ipath_kreceive(struct ipath_portdata *);
920int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
921int ipath_reset_device(int);
922void ipath_get_faststats(unsigned long);
923int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
924int ipath_set_linkstate(struct ipath_devdata *, u8);
925int ipath_set_mtu(struct ipath_devdata *, u16);
926int ipath_set_lid(struct ipath_devdata *, u32, u8);
927int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
928void ipath_enable_armlaunch(struct ipath_devdata *);
929void ipath_disable_armlaunch(struct ipath_devdata *);
930void ipath_hol_down(struct ipath_devdata *);
931void ipath_hol_up(struct ipath_devdata *);
932void ipath_hol_event(unsigned long);
933void ipath_toggle_rclkrls(struct ipath_devdata *);
934void ipath_sd7220_clr_ibpar(struct ipath_devdata *);
935void ipath_set_relock_poll(struct ipath_devdata *, int);
936void ipath_shutdown_relock_poll(struct ipath_devdata *);
937
938/* for use in system calls, where we want to know device type, etc. */
939#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
940#define subport_fp(fp) \
941 ((struct ipath_filedata *)(fp)->private_data)->subport
942#define tidcursor_fp(fp) \
943 ((struct ipath_filedata *)(fp)->private_data)->tidcursor
944#define user_sdma_queue_fp(fp) \
945 ((struct ipath_filedata *)(fp)->private_data)->pq
946
947/*
948 * values for ipath_flags
949 */
950 /* chip can report link latency (IB 1.2) */
951#define IPATH_HAS_LINK_LATENCY 0x1
952 /* The chip is up and initted */
953#define IPATH_INITTED 0x2
954 /* set if any user code has set kr_rcvhdrsize */
955#define IPATH_RCVHDRSZ_SET 0x4
956 /* The chip is present and valid for accesses */
957#define IPATH_PRESENT 0x8
958 /* HT link0 is only 8 bits wide, ignore upper byte crc
959 * errors, etc. */
960#define IPATH_8BIT_IN_HT0 0x10
961 /* HT link1 is only 8 bits wide, ignore upper byte crc
962 * errors, etc. */
963#define IPATH_8BIT_IN_HT1 0x20
964 /* The link is down */
965#define IPATH_LINKDOWN 0x40
966 /* The link level is up (0x11) */
967#define IPATH_LINKINIT 0x80
968 /* The link is in the armed (0x21) state */
969#define IPATH_LINKARMED 0x100
970 /* The link is in the active (0x31) state */
971#define IPATH_LINKACTIVE 0x200
972 /* link current state is unknown */
973#define IPATH_LINKUNK 0x400
974 /* Write combining flush needed for PIO */
975#define IPATH_PIO_FLUSH_WC 0x1000
976 /* DMA Receive tail pointer */
977#define IPATH_NODMA_RTAIL 0x2000
978 /* no IB cable, or no device on IB cable */
979#define IPATH_NOCABLE 0x4000
980 /* Supports port zero per packet receive interrupts via
981 * GPIO */
982#define IPATH_GPIO_INTR 0x8000
983 /* uses the coded 4byte TID, not 8 byte */
984#define IPATH_4BYTE_TID 0x10000
985 /* packet/word counters are 32 bit, else those 4 counters
986 * are 64bit */
987#define IPATH_32BITCOUNTERS 0x20000
988 /* Interrupt register is 64 bits */
989#define IPATH_INTREG_64 0x40000
990 /* can miss port0 rx interrupts */
991#define IPATH_DISABLED 0x80000 /* administratively disabled */
992 /* Use GPIO interrupts for new counters */
993#define IPATH_GPIO_ERRINTRS 0x100000
994#define IPATH_SWAP_PIOBUFS 0x200000
995 /* Supports Send DMA */
996#define IPATH_HAS_SEND_DMA 0x400000
997 /* Supports Send Count (not just word count) in PBC */
998#define IPATH_HAS_PBC_CNT 0x800000
999 /* Suppress heartbeat, even if turning off loopback */
1000#define IPATH_NO_HRTBT 0x1000000
1001#define IPATH_HAS_THRESH_UPDATE 0x4000000
1002#define IPATH_HAS_MULT_IB_SPEED 0x8000000
1003#define IPATH_IB_AUTONEG_INPROG 0x10000000
1004#define IPATH_IB_AUTONEG_FAILED 0x20000000
1005 /* Linkdown-disable intentionally, Do not attempt to bring up */
1006#define IPATH_IB_LINK_DISABLED 0x40000000
1007#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
1008
1009/* Bits in GPIO for the added interrupts */
1010#define IPATH_GPIO_PORT0_BIT 2
1011#define IPATH_GPIO_RXUVL_BIT 3
1012#define IPATH_GPIO_OVRUN_BIT 4
1013#define IPATH_GPIO_LLI_BIT 5
1014#define IPATH_GPIO_ERRINTR_MASK 0x38
1015
1016/* portdata flag bit offsets */
1017 /* waiting for a packet to arrive */
1018#define IPATH_PORT_WAITING_RCV 2
1019 /* master has not finished initializing */
1020#define IPATH_PORT_MASTER_UNINIT 4
1021 /* waiting for an urgent packet to arrive */
1022#define IPATH_PORT_WAITING_URG 5
1023
1024/* free up any allocated data at closes */
1025void ipath_free_data(struct ipath_portdata *dd);
1026u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
1027void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1028 unsigned len, int avail);
1029void ipath_init_iba6110_funcs(struct ipath_devdata *);
1030void ipath_get_eeprom_info(struct ipath_devdata *);
1031int ipath_update_eeprom_log(struct ipath_devdata *dd);
1032void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
1033u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
1034void ipath_disarm_senderrbufs(struct ipath_devdata *);
1035void ipath_force_pio_avail_update(struct ipath_devdata *);
1036void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
1037
1038/*
1039 * Set LED override, only the two LSBs have "public" meaning, but
1040 * any non-zero value substitutes them for the Link and LinkTrain
1041 * LED states.
1042 */
1043#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
1044#define IPATH_LED_LOG 2 /* Logical (link) YELLOW LED */
1045void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
1046
1047/* send dma routines */
1048int setup_sdma(struct ipath_devdata *);
1049void teardown_sdma(struct ipath_devdata *);
1050void ipath_restart_sdma(struct ipath_devdata *);
1051void ipath_sdma_intr(struct ipath_devdata *);
1052int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *,
1053 u32, struct ipath_verbs_txreq *);
1054/* ipath_sdma_lock should be locked before calling this. */
1055int ipath_sdma_make_progress(struct ipath_devdata *dd);
1056
1057/* must be called under ipath_sdma_lock */
1058static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd)
1059{
1060 return dd->ipath_sdma_descq_cnt -
1061 (dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) -
1062 1 - dd->ipath_sdma_desc_nreserved;
1063}
1064
1065static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt)
1066{
1067 dd->ipath_sdma_desc_nreserved += cnt;
1068}
1069
1070static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt)
1071{
1072 dd->ipath_sdma_desc_nreserved -= cnt;
1073}
1074
1075/*
1076 * number of words used for protocol header if not set by ipath_userinit();
1077 */
1078#define IPATH_DFLT_RCVHDRSIZE 9
1079
1080int ipath_get_user_pages(unsigned long, size_t, struct page **);
1081void ipath_release_user_pages(struct page **, size_t);
1082void ipath_release_user_pages_on_close(struct page **, size_t);
1083int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
1084int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
1085int ipath_tempsense_read(struct ipath_devdata *, u8 regnum);
1086int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data);
1087
1088/* these are used for the registers that vary with port */
1089void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
1090 unsigned, u64);
1091
1092/*
1093 * We could have a single register get/put routine, that takes a group type,
1094 * but this is somewhat clearer and cleaner. It also gives us some error
1095 * checking. 64 bit register reads should always work, but are inefficient
1096 * on opteron (the northbridge always generates 2 separate HT 32 bit reads),
1097 * so we use kreg32 wherever possible. User register and counter register
1098 * reads are always 32 bit reads, so only one form of those routines.
1099 */
1100
1101/*
1102 * At the moment, none of the s-registers are writable, so no
1103 * ipath_write_sreg().
1104 */
1105
1106/**
1107 * ipath_read_ureg32 - read 32-bit virtualized per-port register
1108 * @dd: device
1109 * @regno: register number
1110 * @port: port number
1111 *
1112 * Return the contents of a register that is virtualized to be per port.
1113 * Returns -1 on errors (not distinguishable from valid contents at
1114 * runtime; we may add a separate error variable at some point).
1115 */
1116static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
1117 ipath_ureg regno, int port)
1118{
1119 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
1120 return 0;
1121
1122 return readl(regno + (u64 __iomem *)
1123 (dd->ipath_uregbase +
1124 (char __iomem *)dd->ipath_kregbase +
1125 dd->ipath_ureg_align * port));
1126}
1127
1128/**
1129 * ipath_write_ureg - write 32-bit virtualized per-port register
1130 * @dd: device
1131 * @regno: register number
1132 * @value: value
1133 * @port: port
1134 *
1135 * Write the contents of a register that is virtualized to be per port.
1136 */
1137static inline void ipath_write_ureg(const struct ipath_devdata *dd,
1138 ipath_ureg regno, u64 value, int port)
1139{
1140 u64 __iomem *ubase = (u64 __iomem *)
1141 (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
1142 dd->ipath_ureg_align * port);
1143 if (dd->ipath_kregbase)
1144 writeq(value, &ubase[regno]);
1145}
1146
1147static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
1148 ipath_kreg regno)
1149{
1150 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
1151 return -1;
1152 return readl((u32 __iomem *) & dd->ipath_kregbase[regno]);
1153}
1154
1155static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd,
1156 ipath_kreg regno)
1157{
1158 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
1159 return -1;
1160
1161 return readq(&dd->ipath_kregbase[regno]);
1162}
1163
1164static inline void ipath_write_kreg(const struct ipath_devdata *dd,
1165 ipath_kreg regno, u64 value)
1166{
1167 if (dd->ipath_kregbase)
1168 writeq(value, &dd->ipath_kregbase[regno]);
1169}
1170
1171static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
1172 ipath_sreg regno)
1173{
1174 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
1175 return 0;
1176
1177 return readq(regno + (u64 __iomem *)
1178 (dd->ipath_cregbase +
1179 (char __iomem *)dd->ipath_kregbase));
1180}
1181
1182static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
1183 ipath_sreg regno)
1184{
1185 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
1186 return 0;
1187 return readl(regno + (u64 __iomem *)
1188 (dd->ipath_cregbase +
1189 (char __iomem *)dd->ipath_kregbase));
1190}
1191
1192static inline void ipath_write_creg(const struct ipath_devdata *dd,
1193 ipath_creg regno, u64 value)
1194{
1195 if (dd->ipath_kregbase)
1196 writeq(value, regno + (u64 __iomem *)
1197 (dd->ipath_cregbase +
1198 (char __iomem *)dd->ipath_kregbase));
1199}
1200
1201static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd)
1202{
1203 *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL;
1204}
1205
1206static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd)
1207{
1208 return (u32) le64_to_cpu(*((volatile __le64 *)
1209 pd->port_rcvhdrtail_kvaddr));
1210}
1211
1212static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd)
1213{
1214 const struct ipath_devdata *dd = pd->port_dd;
1215 u32 hdrqtail;
1216
1217 if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
1218 __le32 *rhf_addr;
1219 u32 seq;
1220
1221 rhf_addr = (__le32 *) pd->port_rcvhdrq +
1222 pd->port_head + dd->ipath_rhf_offset;
1223 seq = ipath_hdrget_seq(rhf_addr);
1224 hdrqtail = pd->port_head;
1225 if (seq == pd->port_seq_cnt)
1226 hdrqtail++;
1227 } else
1228 hdrqtail = ipath_get_rcvhdrtail(pd);
1229
1230 return hdrqtail;
1231}
1232
1233static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r)
1234{
1235 return (dd->ipath_flags & IPATH_INTREG_64) ?
1236 ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r);
1237}
1238
1239/*
1240 * from contents of IBCStatus (or a saved copy), return linkstate
1241 * Report ACTIVE_DEFER as ACTIVE, because we treat them the same
1242 * everywhere, anyway (and should be, for almost all purposes).
1243 */
1244static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
1245{
1246 u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) &
1247 INFINIPATH_IBCS_LINKSTATE_MASK;
1248 if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER)
1249 state = INFINIPATH_IBCS_L_STATE_ACTIVE;
1250 return state;
1251}
1252
1253/* from contents of IBCStatus (or a saved copy), return linktrainingstate */
1254static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
1255{
1256 return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1257 dd->ibcs_lts_mask;
1258}
1259
1260/*
1261 * from contents of IBCStatus (or a saved copy), return logical link state
1262 * combination of link state and linktraining state (down, active, init,
1263 * arm, etc.
1264 */
1265static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
1266{
1267 u32 ibs;
1268 ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1269 dd->ibcs_lts_mask;
1270 ibs |= (u32)(ibcs &
1271 (INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
1272 return ibs;
1273}
1274
1275/*
1276 * sysfs interface.
1277 */
1278
1279struct device_driver;
1280
1281extern const char ib_ipath_version[];
1282
1283extern const struct attribute_group *ipath_driver_attr_groups[];
1284
1285int ipath_device_create_group(struct device *, struct ipath_devdata *);
1286void ipath_device_remove_group(struct device *, struct ipath_devdata *);
1287int ipath_expose_reset(struct device *);
1288
1289int ipath_init_ipathfs(void);
1290void ipath_exit_ipathfs(void);
1291int ipathfs_add_device(struct ipath_devdata *);
1292int ipathfs_remove_device(struct ipath_devdata *);
1293
1294/*
1295 * dma_addr wrappers - all 0's invalid for hw
1296 */
1297dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
1298 size_t, int);
1299dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
1300const char *ipath_get_unit_name(int unit);
1301
1302/*
1303 * Flush write combining store buffers (if present) and perform a write
1304 * barrier.
1305 */
1306#if defined(CONFIG_X86_64)
1307#define ipath_flush_wc() asm volatile("sfence" ::: "memory")
1308#else
1309#define ipath_flush_wc() wmb()
1310#endif
1311
1312extern unsigned ipath_debug; /* debugging bit mask */
1313extern unsigned ipath_linkrecovery;
1314extern unsigned ipath_mtu4096;
1315extern struct mutex ipath_mutex;
1316
1317#define IPATH_DRV_NAME "ib_ipath"
1318#define IPATH_MAJOR 233
1319#define IPATH_USER_MINOR_BASE 0
1320#define IPATH_DIAGPKT_MINOR 127
1321#define IPATH_DIAG_MINOR_BASE 129
1322#define IPATH_NMINORS 255
1323
1324#define ipath_dev_err(dd,fmt,...) \
1325 do { \
1326 const struct ipath_devdata *__dd = (dd); \
1327 if (__dd->pcidev) \
1328 dev_err(&__dd->pcidev->dev, "%s: " fmt, \
1329 ipath_get_unit_name(__dd->ipath_unit), \
1330 ##__VA_ARGS__); \
1331 else \
1332 printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \
1333 ipath_get_unit_name(__dd->ipath_unit), \
1334 ##__VA_ARGS__); \
1335 } while (0)
1336
1337#if _IPATH_DEBUGGING
1338
1339# define __IPATH_DBG_WHICH(which,fmt,...) \
1340 do { \
1341 if (unlikely(ipath_debug & (which))) \
1342 printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
1343 __func__,##__VA_ARGS__); \
1344 } while(0)
1345
1346# define ipath_dbg(fmt,...) \
1347 __IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__)
1348# define ipath_cdbg(which,fmt,...) \
1349 __IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__)
1350
1351#else /* ! _IPATH_DEBUGGING */
1352
1353# define ipath_dbg(fmt,...)
1354# define ipath_cdbg(which,fmt,...)
1355
1356#endif /* _IPATH_DEBUGGING */
1357
1358/*
1359 * this is used for formatting hw error messages...
1360 */
1361struct ipath_hwerror_msgs {
1362 u64 mask;
1363 const char *msg;
1364};
1365
1366#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
1367
1368/* in ipath_intr.c... */
1369void ipath_format_hwerrors(u64 hwerrs,
1370 const struct ipath_hwerror_msgs *hwerrmsgs,
1371 size_t nhwerrmsgs,
1372 char *msg, size_t lmsg);
1373
1374#endif /* _IPATH_KERNEL_H */
diff --git a/drivers/staging/rdma/ipath/ipath_keys.c b/drivers/staging/rdma/ipath/ipath_keys.c
deleted file mode 100644
index c0e933fec218..000000000000
--- a/drivers/staging/rdma/ipath/ipath_keys.c
+++ /dev/null
@@ -1,270 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <asm/io.h>
35
36#include "ipath_verbs.h"
37#include "ipath_kernel.h"
38
39/**
40 * ipath_alloc_lkey - allocate an lkey
41 * @rkt: lkey table in which to allocate the lkey
42 * @mr: memory region that this lkey protects
43 *
44 * Returns 1 if successful, otherwise returns 0.
45 */
46
47int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
48{
49 unsigned long flags;
50 u32 r;
51 u32 n;
52 int ret;
53
54 spin_lock_irqsave(&rkt->lock, flags);
55
56 /* Find the next available LKEY */
57 r = n = rkt->next;
58 for (;;) {
59 if (rkt->table[r] == NULL)
60 break;
61 r = (r + 1) & (rkt->max - 1);
62 if (r == n) {
63 spin_unlock_irqrestore(&rkt->lock, flags);
64 ipath_dbg("LKEY table full\n");
65 ret = 0;
66 goto bail;
67 }
68 }
69 rkt->next = (r + 1) & (rkt->max - 1);
70 /*
71 * Make sure lkey is never zero which is reserved to indicate an
72 * unrestricted LKEY.
73 */
74 rkt->gen++;
75 mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) |
76 ((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen)
77 << 8);
78 if (mr->lkey == 0) {
79 mr->lkey |= 1 << 8;
80 rkt->gen++;
81 }
82 rkt->table[r] = mr;
83 spin_unlock_irqrestore(&rkt->lock, flags);
84
85 ret = 1;
86
87bail:
88 return ret;
89}
90
91/**
92 * ipath_free_lkey - free an lkey
93 * @rkt: table from which to free the lkey
94 * @lkey: lkey id to free
95 */
96void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
97{
98 unsigned long flags;
99 u32 r;
100
101 if (lkey == 0)
102 return;
103 r = lkey >> (32 - ib_ipath_lkey_table_size);
104 spin_lock_irqsave(&rkt->lock, flags);
105 rkt->table[r] = NULL;
106 spin_unlock_irqrestore(&rkt->lock, flags);
107}
108
109/**
110 * ipath_lkey_ok - check IB SGE for validity and initialize
111 * @rkt: table containing lkey to check SGE against
112 * @isge: outgoing internal SGE
113 * @sge: SGE to check
114 * @acc: access flags
115 *
116 * Return 1 if valid and successful, otherwise returns 0.
117 *
118 * Check the IB SGE for validity and initialize our internal version
119 * of it.
120 */
121int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
122 struct ib_sge *sge, int acc)
123{
124 struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
125 struct ipath_mregion *mr;
126 unsigned n, m;
127 size_t off;
128 int ret;
129
130 /*
131 * We use LKEY == zero for kernel virtual addresses
132 * (see ipath_get_dma_mr and ipath_dma.c).
133 */
134 if (sge->lkey == 0) {
135 /* always a kernel port, no locking needed */
136 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
137
138 if (pd->user) {
139 ret = 0;
140 goto bail;
141 }
142 isge->mr = NULL;
143 isge->vaddr = (void *) sge->addr;
144 isge->length = sge->length;
145 isge->sge_length = sge->length;
146 ret = 1;
147 goto bail;
148 }
149 mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
150 if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
151 qp->ibqp.pd != mr->pd)) {
152 ret = 0;
153 goto bail;
154 }
155
156 off = sge->addr - mr->user_base;
157 if (unlikely(sge->addr < mr->user_base ||
158 off + sge->length > mr->length ||
159 (mr->access_flags & acc) != acc)) {
160 ret = 0;
161 goto bail;
162 }
163
164 off += mr->offset;
165 m = 0;
166 n = 0;
167 while (off >= mr->map[m]->segs[n].length) {
168 off -= mr->map[m]->segs[n].length;
169 n++;
170 if (n >= IPATH_SEGSZ) {
171 m++;
172 n = 0;
173 }
174 }
175 isge->mr = mr;
176 isge->vaddr = mr->map[m]->segs[n].vaddr + off;
177 isge->length = mr->map[m]->segs[n].length - off;
178 isge->sge_length = sge->length;
179 isge->m = m;
180 isge->n = n;
181
182 ret = 1;
183
184bail:
185 return ret;
186}
187
188/**
189 * ipath_rkey_ok - check the IB virtual address, length, and RKEY
190 * @dev: infiniband device
191 * @ss: SGE state
192 * @len: length of data
193 * @vaddr: virtual address to place data
194 * @rkey: rkey to check
195 * @acc: access flags
196 *
197 * Return 1 if successful, otherwise 0.
198 */
199int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
200 u32 len, u64 vaddr, u32 rkey, int acc)
201{
202 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
203 struct ipath_lkey_table *rkt = &dev->lk_table;
204 struct ipath_sge *sge = &ss->sge;
205 struct ipath_mregion *mr;
206 unsigned n, m;
207 size_t off;
208 int ret;
209
210 /*
211 * We use RKEY == zero for kernel virtual addresses
212 * (see ipath_get_dma_mr and ipath_dma.c).
213 */
214 if (rkey == 0) {
215 /* always a kernel port, no locking needed */
216 struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
217
218 if (pd->user) {
219 ret = 0;
220 goto bail;
221 }
222 sge->mr = NULL;
223 sge->vaddr = (void *) vaddr;
224 sge->length = len;
225 sge->sge_length = len;
226 ss->sg_list = NULL;
227 ss->num_sge = 1;
228 ret = 1;
229 goto bail;
230 }
231
232 mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
233 if (unlikely(mr == NULL || mr->lkey != rkey ||
234 qp->ibqp.pd != mr->pd)) {
235 ret = 0;
236 goto bail;
237 }
238
239 off = vaddr - mr->iova;
240 if (unlikely(vaddr < mr->iova || off + len > mr->length ||
241 (mr->access_flags & acc) == 0)) {
242 ret = 0;
243 goto bail;
244 }
245
246 off += mr->offset;
247 m = 0;
248 n = 0;
249 while (off >= mr->map[m]->segs[n].length) {
250 off -= mr->map[m]->segs[n].length;
251 n++;
252 if (n >= IPATH_SEGSZ) {
253 m++;
254 n = 0;
255 }
256 }
257 sge->mr = mr;
258 sge->vaddr = mr->map[m]->segs[n].vaddr + off;
259 sge->length = mr->map[m]->segs[n].length - off;
260 sge->sge_length = len;
261 sge->m = m;
262 sge->n = n;
263 ss->sg_list = NULL;
264 ss->num_sge = 1;
265
266 ret = 1;
267
268bail:
269 return ret;
270}
diff --git a/drivers/staging/rdma/ipath/ipath_mad.c b/drivers/staging/rdma/ipath/ipath_mad.c
deleted file mode 100644
index ad3a926ab3c5..000000000000
--- a/drivers/staging/rdma/ipath/ipath_mad.c
+++ /dev/null
@@ -1,1521 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <rdma/ib_smi.h>
35#include <rdma/ib_pma.h>
36
37#include "ipath_kernel.h"
38#include "ipath_verbs.h"
39#include "ipath_common.h"
40
41#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004)
42#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008)
43#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C)
44#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C)
45
46static int reply(struct ib_smp *smp)
47{
48 /*
49 * The verbs framework will handle the directed/LID route
50 * packet changes.
51 */
52 smp->method = IB_MGMT_METHOD_GET_RESP;
53 if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
54 smp->status |= IB_SMP_DIRECTION;
55 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
56}
57
58static int recv_subn_get_nodedescription(struct ib_smp *smp,
59 struct ib_device *ibdev)
60{
61 if (smp->attr_mod)
62 smp->status |= IB_SMP_INVALID_FIELD;
63
64 memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
65
66 return reply(smp);
67}
68
69struct nodeinfo {
70 u8 base_version;
71 u8 class_version;
72 u8 node_type;
73 u8 num_ports;
74 __be64 sys_guid;
75 __be64 node_guid;
76 __be64 port_guid;
77 __be16 partition_cap;
78 __be16 device_id;
79 __be32 revision;
80 u8 local_port_num;
81 u8 vendor_id[3];
82} __attribute__ ((packed));
83
84static int recv_subn_get_nodeinfo(struct ib_smp *smp,
85 struct ib_device *ibdev, u8 port)
86{
87 struct nodeinfo *nip = (struct nodeinfo *)&smp->data;
88 struct ipath_devdata *dd = to_idev(ibdev)->dd;
89 u32 vendor, majrev, minrev;
90
91 /* GUID 0 is illegal */
92 if (smp->attr_mod || (dd->ipath_guid == 0))
93 smp->status |= IB_SMP_INVALID_FIELD;
94
95 nip->base_version = 1;
96 nip->class_version = 1;
97 nip->node_type = 1; /* channel adapter */
98 /*
99 * XXX The num_ports value will need a layer function to get
100 * the value if we ever have more than one IB port on a chip.
101 * We will also need to get the GUID for the port.
102 */
103 nip->num_ports = ibdev->phys_port_cnt;
104 /* This is already in network order */
105 nip->sys_guid = to_idev(ibdev)->sys_image_guid;
106 nip->node_guid = dd->ipath_guid;
107 nip->port_guid = dd->ipath_guid;
108 nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
109 nip->device_id = cpu_to_be16(dd->ipath_deviceid);
110 majrev = dd->ipath_majrev;
111 minrev = dd->ipath_minrev;
112 nip->revision = cpu_to_be32((majrev << 16) | minrev);
113 nip->local_port_num = port;
114 vendor = dd->ipath_vendorid;
115 nip->vendor_id[0] = IPATH_SRC_OUI_1;
116 nip->vendor_id[1] = IPATH_SRC_OUI_2;
117 nip->vendor_id[2] = IPATH_SRC_OUI_3;
118
119 return reply(smp);
120}
121
122static int recv_subn_get_guidinfo(struct ib_smp *smp,
123 struct ib_device *ibdev)
124{
125 u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
126 __be64 *p = (__be64 *) smp->data;
127
128 /* 32 blocks of 8 64-bit GUIDs per block */
129
130 memset(smp->data, 0, sizeof(smp->data));
131
132 /*
133 * We only support one GUID for now. If this changes, the
134 * portinfo.guid_cap field needs to be updated too.
135 */
136 if (startgx == 0) {
137 __be64 g = to_idev(ibdev)->dd->ipath_guid;
138 if (g == 0)
139 /* GUID 0 is illegal */
140 smp->status |= IB_SMP_INVALID_FIELD;
141 else
142 /* The first is a copy of the read-only HW GUID. */
143 *p = g;
144 } else
145 smp->status |= IB_SMP_INVALID_FIELD;
146
147 return reply(smp);
148}
149
150static void set_link_width_enabled(struct ipath_devdata *dd, u32 w)
151{
152 (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w);
153}
154
155static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s)
156{
157 (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s);
158}
159
160static int get_overrunthreshold(struct ipath_devdata *dd)
161{
162 return (dd->ipath_ibcctrl >>
163 INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
164 INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
165}
166
167/**
168 * set_overrunthreshold - set the overrun threshold
169 * @dd: the infinipath device
170 * @n: the new threshold
171 *
172 * Note that this will only take effect when the link state changes.
173 */
174static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
175{
176 unsigned v;
177
178 v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
179 INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
180 if (v != n) {
181 dd->ipath_ibcctrl &=
182 ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
183 INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
184 dd->ipath_ibcctrl |=
185 (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
186 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
187 dd->ipath_ibcctrl);
188 }
189 return 0;
190}
191
192static int get_phyerrthreshold(struct ipath_devdata *dd)
193{
194 return (dd->ipath_ibcctrl >>
195 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
196 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
197}
198
199/**
200 * set_phyerrthreshold - set the physical error threshold
201 * @dd: the infinipath device
202 * @n: the new threshold
203 *
204 * Note that this will only take effect when the link state changes.
205 */
206static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
207{
208 unsigned v;
209
210 v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
211 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
212 if (v != n) {
213 dd->ipath_ibcctrl &=
214 ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
215 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
216 dd->ipath_ibcctrl |=
217 (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
218 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
219 dd->ipath_ibcctrl);
220 }
221 return 0;
222}
223
224/**
225 * get_linkdowndefaultstate - get the default linkdown state
226 * @dd: the infinipath device
227 *
228 * Returns zero if the default is POLL, 1 if the default is SLEEP.
229 */
230static int get_linkdowndefaultstate(struct ipath_devdata *dd)
231{
232 return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
233}
234
235static int recv_subn_get_portinfo(struct ib_smp *smp,
236 struct ib_device *ibdev, u8 port)
237{
238 struct ipath_ibdev *dev;
239 struct ipath_devdata *dd;
240 struct ib_port_info *pip = (struct ib_port_info *)smp->data;
241 u16 lid;
242 u8 ibcstat;
243 u8 mtu;
244 int ret;
245
246 if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) {
247 smp->status |= IB_SMP_INVALID_FIELD;
248 ret = reply(smp);
249 goto bail;
250 }
251
252 dev = to_idev(ibdev);
253 dd = dev->dd;
254
255 /* Clear all fields. Only set the non-zero fields. */
256 memset(smp->data, 0, sizeof(smp->data));
257
258 /* Only return the mkey if the protection field allows it. */
259 if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
260 dev->mkeyprot == 0)
261 pip->mkey = dev->mkey;
262 pip->gid_prefix = dev->gid_prefix;
263 lid = dd->ipath_lid;
264 pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
265 pip->sm_lid = cpu_to_be16(dev->sm_lid);
266 pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
267 /* pip->diag_code; */
268 pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
269 pip->local_port_num = port;
270 pip->link_width_enabled = dd->ipath_link_width_enabled;
271 pip->link_width_supported = dd->ipath_link_width_supported;
272 pip->link_width_active = dd->ipath_link_width_active;
273 pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4;
274 ibcstat = dd->ipath_lastibcstat;
275 /* map LinkState to IB portinfo values. */
276 pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1;
277
278 pip->portphysstate_linkdown =
279 (ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) |
280 (get_linkdowndefaultstate(dd) ? 1 : 2);
281 pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc;
282 pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) |
283 dd->ipath_link_speed_enabled;
284 switch (dd->ipath_ibmtu) {
285 case 4096:
286 mtu = IB_MTU_4096;
287 break;
288 case 2048:
289 mtu = IB_MTU_2048;
290 break;
291 case 1024:
292 mtu = IB_MTU_1024;
293 break;
294 case 512:
295 mtu = IB_MTU_512;
296 break;
297 case 256:
298 mtu = IB_MTU_256;
299 break;
300 default: /* oops, something is wrong */
301 mtu = IB_MTU_2048;
302 break;
303 }
304 pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl;
305 pip->vlcap_inittype = 0x10; /* VLCap = VL0, InitType = 0 */
306 pip->vl_high_limit = dev->vl_high_limit;
307 /* pip->vl_arb_high_cap; // only one VL */
308 /* pip->vl_arb_low_cap; // only one VL */
309 /* InitTypeReply = 0 */
310 /* our mtu cap depends on whether 4K MTU enabled or not */
311 pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
312 /* HCAs ignore VLStallCount and HOQLife */
313 /* pip->vlstallcnt_hoqlife; */
314 pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */
315 pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
316 /* P_KeyViolations are counted by hardware. */
317 pip->pkey_violations =
318 cpu_to_be16((ipath_get_cr_errpkey(dd) -
319 dev->z_pkey_violations) & 0xFFFF);
320 pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
321 /* Only the hardware GUID is supported for now */
322 pip->guid_cap = 1;
323 pip->clientrereg_resv_subnetto = dev->subnet_timeout;
324 /* 32.768 usec. response time (guessing) */
325 pip->resv_resptimevalue = 3;
326 pip->localphyerrors_overrunerrors =
327 (get_phyerrthreshold(dd) << 4) |
328 get_overrunthreshold(dd);
329 /* pip->max_credit_hint; */
330 if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
331 u32 v;
332
333 v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY);
334 pip->link_roundtrip_latency[0] = v >> 16;
335 pip->link_roundtrip_latency[1] = v >> 8;
336 pip->link_roundtrip_latency[2] = v;
337 }
338
339 ret = reply(smp);
340
341bail:
342 return ret;
343}
344
345/**
346 * get_pkeys - return the PKEY table for port 0
347 * @dd: the infinipath device
348 * @pkeys: the pkey table is placed here
349 */
350static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
351{
352 /* always a kernel port, no locking needed */
353 struct ipath_portdata *pd = dd->ipath_pd[0];
354
355 memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
356
357 return 0;
358}
359
360static int recv_subn_get_pkeytable(struct ib_smp *smp,
361 struct ib_device *ibdev)
362{
363 u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
364 u16 *p = (u16 *) smp->data;
365 __be16 *q = (__be16 *) smp->data;
366
367 /* 64 blocks of 32 16-bit P_Key entries */
368
369 memset(smp->data, 0, sizeof(smp->data));
370 if (startpx == 0) {
371 struct ipath_ibdev *dev = to_idev(ibdev);
372 unsigned i, n = ipath_get_npkeys(dev->dd);
373
374 get_pkeys(dev->dd, p);
375
376 for (i = 0; i < n; i++)
377 q[i] = cpu_to_be16(p[i]);
378 } else
379 smp->status |= IB_SMP_INVALID_FIELD;
380
381 return reply(smp);
382}
383
384static int recv_subn_set_guidinfo(struct ib_smp *smp,
385 struct ib_device *ibdev)
386{
387 /* The only GUID we support is the first read-only entry. */
388 return recv_subn_get_guidinfo(smp, ibdev);
389}
390
391/**
392 * set_linkdowndefaultstate - set the default linkdown state
393 * @dd: the infinipath device
394 * @sleep: the new state
395 *
396 * Note that this will only take effect when the link state changes.
397 */
398static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
399{
400 if (sleep)
401 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
402 else
403 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
404 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
405 dd->ipath_ibcctrl);
406 return 0;
407}
408
409/**
410 * recv_subn_set_portinfo - set port information
411 * @smp: the incoming SM packet
412 * @ibdev: the infiniband device
413 * @port: the port on the device
414 *
415 * Set Portinfo (see ch. 14.2.5.6).
416 */
417static int recv_subn_set_portinfo(struct ib_smp *smp,
418 struct ib_device *ibdev, u8 port)
419{
420 struct ib_port_info *pip = (struct ib_port_info *)smp->data;
421 struct ib_event event;
422 struct ipath_ibdev *dev;
423 struct ipath_devdata *dd;
424 char clientrereg = 0;
425 u16 lid, smlid;
426 u8 lwe;
427 u8 lse;
428 u8 state;
429 u16 lstate;
430 u32 mtu;
431 int ret, ore;
432
433 if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
434 goto err;
435
436 dev = to_idev(ibdev);
437 dd = dev->dd;
438 event.device = ibdev;
439 event.element.port_num = port;
440
441 dev->mkey = pip->mkey;
442 dev->gid_prefix = pip->gid_prefix;
443 dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
444
445 lid = be16_to_cpu(pip->lid);
446 if (dd->ipath_lid != lid ||
447 dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
448 /* Must be a valid unicast LID address. */
449 if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
450 goto err;
451 ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
452 event.event = IB_EVENT_LID_CHANGE;
453 ib_dispatch_event(&event);
454 }
455
456 smlid = be16_to_cpu(pip->sm_lid);
457 if (smlid != dev->sm_lid) {
458 /* Must be a valid unicast LID address. */
459 if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE)
460 goto err;
461 dev->sm_lid = smlid;
462 event.event = IB_EVENT_SM_CHANGE;
463 ib_dispatch_event(&event);
464 }
465
466 /* Allow 1x or 4x to be set (see 14.2.6.6). */
467 lwe = pip->link_width_enabled;
468 if (lwe) {
469 if (lwe == 0xFF)
470 lwe = dd->ipath_link_width_supported;
471 else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported))
472 goto err;
473 set_link_width_enabled(dd, lwe);
474 }
475
476 /* Allow 2.5 or 5.0 Gbs. */
477 lse = pip->linkspeedactive_enabled & 0xF;
478 if (lse) {
479 if (lse == 15)
480 lse = dd->ipath_link_speed_supported;
481 else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported))
482 goto err;
483 set_link_speed_enabled(dd, lse);
484 }
485
486 /* Set link down default state. */
487 switch (pip->portphysstate_linkdown & 0xF) {
488 case 0: /* NOP */
489 break;
490 case 1: /* SLEEP */
491 if (set_linkdowndefaultstate(dd, 1))
492 goto err;
493 break;
494 case 2: /* POLL */
495 if (set_linkdowndefaultstate(dd, 0))
496 goto err;
497 break;
498 default:
499 goto err;
500 }
501
502 dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
503 dev->vl_high_limit = pip->vl_high_limit;
504
505 switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
506 case IB_MTU_256:
507 mtu = 256;
508 break;
509 case IB_MTU_512:
510 mtu = 512;
511 break;
512 case IB_MTU_1024:
513 mtu = 1024;
514 break;
515 case IB_MTU_2048:
516 mtu = 2048;
517 break;
518 case IB_MTU_4096:
519 if (!ipath_mtu4096)
520 goto err;
521 mtu = 4096;
522 break;
523 default:
524 /* XXX We have already partially updated our state! */
525 goto err;
526 }
527 ipath_set_mtu(dd, mtu);
528
529 dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
530
531 /* We only support VL0 */
532 if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1)
533 goto err;
534
535 if (pip->mkey_violations == 0)
536 dev->mkey_violations = 0;
537
538 /*
539 * Hardware counter can't be reset so snapshot and subtract
540 * later.
541 */
542 if (pip->pkey_violations == 0)
543 dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
544
545 if (pip->qkey_violations == 0)
546 dev->qkey_violations = 0;
547
548 ore = pip->localphyerrors_overrunerrors;
549 if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
550 goto err;
551
552 if (set_overrunthreshold(dd, (ore & 0xF)))
553 goto err;
554
555 dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
556
557 if (pip->clientrereg_resv_subnetto & 0x80) {
558 clientrereg = 1;
559 event.event = IB_EVENT_CLIENT_REREGISTER;
560 ib_dispatch_event(&event);
561 }
562
563 /*
564 * Do the port state change now that the other link parameters
565 * have been set.
566 * Changing the port physical state only makes sense if the link
567 * is down or is being set to down.
568 */
569 state = pip->linkspeed_portstate & 0xF;
570 lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
571 if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
572 goto err;
573
574 /*
575 * Only state changes of DOWN, ARM, and ACTIVE are valid
576 * and must be in the correct state to take effect (see 7.2.6).
577 */
578 switch (state) {
579 case IB_PORT_NOP:
580 if (lstate == 0)
581 break;
582 /* FALLTHROUGH */
583 case IB_PORT_DOWN:
584 if (lstate == 0)
585 lstate = IPATH_IB_LINKDOWN_ONLY;
586 else if (lstate == 1)
587 lstate = IPATH_IB_LINKDOWN_SLEEP;
588 else if (lstate == 2)
589 lstate = IPATH_IB_LINKDOWN;
590 else if (lstate == 3)
591 lstate = IPATH_IB_LINKDOWN_DISABLE;
592 else
593 goto err;
594 ipath_set_linkstate(dd, lstate);
595 if (lstate == IPATH_IB_LINKDOWN_DISABLE) {
596 ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
597 goto done;
598 }
599 ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED |
600 IPATH_LINKACTIVE, 1000);
601 break;
602 case IB_PORT_ARMED:
603 ipath_set_linkstate(dd, IPATH_IB_LINKARM);
604 break;
605 case IB_PORT_ACTIVE:
606 ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
607 break;
608 default:
609 /* XXX We have already partially updated our state! */
610 goto err;
611 }
612
613 ret = recv_subn_get_portinfo(smp, ibdev, port);
614
615 if (clientrereg)
616 pip->clientrereg_resv_subnetto |= 0x80;
617
618 goto done;
619
620err:
621 smp->status |= IB_SMP_INVALID_FIELD;
622 ret = recv_subn_get_portinfo(smp, ibdev, port);
623
624done:
625 return ret;
626}
627
628/**
629 * rm_pkey - decrecment the reference count for the given PKEY
630 * @dd: the infinipath device
631 * @key: the PKEY index
632 *
633 * Return true if this was the last reference and the hardware table entry
634 * needs to be changed.
635 */
636static int rm_pkey(struct ipath_devdata *dd, u16 key)
637{
638 int i;
639 int ret;
640
641 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
642 if (dd->ipath_pkeys[i] != key)
643 continue;
644 if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
645 dd->ipath_pkeys[i] = 0;
646 ret = 1;
647 goto bail;
648 }
649 break;
650 }
651
652 ret = 0;
653
654bail:
655 return ret;
656}
657
658/**
659 * add_pkey - add the given PKEY to the hardware table
660 * @dd: the infinipath device
661 * @key: the PKEY
662 *
663 * Return an error code if unable to add the entry, zero if no change,
664 * or 1 if the hardware PKEY register needs to be updated.
665 */
666static int add_pkey(struct ipath_devdata *dd, u16 key)
667{
668 int i;
669 u16 lkey = key & 0x7FFF;
670 int any = 0;
671 int ret;
672
673 if (lkey == 0x7FFF) {
674 ret = 0;
675 goto bail;
676 }
677
678 /* Look for an empty slot or a matching PKEY. */
679 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
680 if (!dd->ipath_pkeys[i]) {
681 any++;
682 continue;
683 }
684 /* If it matches exactly, try to increment the ref count */
685 if (dd->ipath_pkeys[i] == key) {
686 if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
687 ret = 0;
688 goto bail;
689 }
690 /* Lost the race. Look for an empty slot below. */
691 atomic_dec(&dd->ipath_pkeyrefs[i]);
692 any++;
693 }
694 /*
695 * It makes no sense to have both the limited and unlimited
696 * PKEY set at the same time since the unlimited one will
697 * disable the limited one.
698 */
699 if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
700 ret = -EEXIST;
701 goto bail;
702 }
703 }
704 if (!any) {
705 ret = -EBUSY;
706 goto bail;
707 }
708 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
709 if (!dd->ipath_pkeys[i] &&
710 atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
711 /* for ipathstats, etc. */
712 ipath_stats.sps_pkeys[i] = lkey;
713 dd->ipath_pkeys[i] = key;
714 ret = 1;
715 goto bail;
716 }
717 }
718 ret = -EBUSY;
719
720bail:
721 return ret;
722}
723
724/**
725 * set_pkeys - set the PKEY table for port 0
726 * @dd: the infinipath device
727 * @pkeys: the PKEY table
728 */
729static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
730{
731 struct ipath_portdata *pd;
732 int i;
733 int changed = 0;
734
735 /* always a kernel port, no locking needed */
736 pd = dd->ipath_pd[0];
737
738 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
739 u16 key = pkeys[i];
740 u16 okey = pd->port_pkeys[i];
741
742 if (key == okey)
743 continue;
744 /*
745 * The value of this PKEY table entry is changing.
746 * Remove the old entry in the hardware's array of PKEYs.
747 */
748 if (okey & 0x7FFF)
749 changed |= rm_pkey(dd, okey);
750 if (key & 0x7FFF) {
751 int ret = add_pkey(dd, key);
752
753 if (ret < 0)
754 key = 0;
755 else
756 changed |= ret;
757 }
758 pd->port_pkeys[i] = key;
759 }
760 if (changed) {
761 u64 pkey;
762 struct ib_event event;
763
764 pkey = (u64) dd->ipath_pkeys[0] |
765 ((u64) dd->ipath_pkeys[1] << 16) |
766 ((u64) dd->ipath_pkeys[2] << 32) |
767 ((u64) dd->ipath_pkeys[3] << 48);
768 ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
769 (unsigned long long) pkey);
770 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
771 pkey);
772
773 event.event = IB_EVENT_PKEY_CHANGE;
774 event.device = &dd->verbs_dev->ibdev;
775 event.element.port_num = port;
776 ib_dispatch_event(&event);
777 }
778 return 0;
779}
780
781static int recv_subn_set_pkeytable(struct ib_smp *smp,
782 struct ib_device *ibdev, u8 port)
783{
784 u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
785 __be16 *p = (__be16 *) smp->data;
786 u16 *q = (u16 *) smp->data;
787 struct ipath_ibdev *dev = to_idev(ibdev);
788 unsigned i, n = ipath_get_npkeys(dev->dd);
789
790 for (i = 0; i < n; i++)
791 q[i] = be16_to_cpu(p[i]);
792
793 if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
794 smp->status |= IB_SMP_INVALID_FIELD;
795
796 return recv_subn_get_pkeytable(smp, ibdev);
797}
798
799static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp)
800{
801 struct ib_class_port_info *p =
802 (struct ib_class_port_info *)pmp->data;
803
804 memset(pmp->data, 0, sizeof(pmp->data));
805
806 if (pmp->mad_hdr.attr_mod != 0)
807 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
808
809 /* Indicate AllPortSelect is valid (only one port anyway) */
810 p->capability_mask = cpu_to_be16(1 << 8);
811 p->base_version = 1;
812 p->class_version = 1;
813 /*
814 * Expected response time is 4.096 usec. * 2^18 == 1.073741824
815 * sec.
816 */
817 p->resp_time_value = 18;
818
819 return reply((struct ib_smp *) pmp);
820}
821
822/*
823 * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
824 * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
825 * We support 5 counters which only count the mandatory quantities.
826 */
827#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
828#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \
829 COUNTER_MASK(1, 1) | \
830 COUNTER_MASK(1, 2) | \
831 COUNTER_MASK(1, 3) | \
832 COUNTER_MASK(1, 4))
833
834static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
835 struct ib_device *ibdev, u8 port)
836{
837 struct ib_pma_portsamplescontrol *p =
838 (struct ib_pma_portsamplescontrol *)pmp->data;
839 struct ipath_ibdev *dev = to_idev(ibdev);
840 struct ipath_cregs const *crp = dev->dd->ipath_cregs;
841 unsigned long flags;
842 u8 port_select = p->port_select;
843
844 memset(pmp->data, 0, sizeof(pmp->data));
845
846 p->port_select = port_select;
847 if (pmp->mad_hdr.attr_mod != 0 ||
848 (port_select != port && port_select != 0xFF))
849 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
850 /*
851 * Ticks are 10x the link transfer period which for 2.5Gbs is 4
852 * nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample
853 * intervals are counted in ticks. Since we use Linux timers, that
854 * count in jiffies, we can't sample for less than 1000 ticks if HZ
855 * == 1000 (4000 ticks if HZ is 250). link_speed_active returns 2 for
856 * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that
857 * have hardware support for delaying packets.
858 */
859 if (crp->cr_psstat)
860 p->tick = dev->dd->ipath_link_speed_active - 1;
861 else
862 p->tick = 250; /* 1 usec. */
863 p->counter_width = 4; /* 32 bit counters */
864 p->counter_mask0_9 = COUNTER_MASK0_9;
865 spin_lock_irqsave(&dev->pending_lock, flags);
866 if (crp->cr_psstat)
867 p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat);
868 else
869 p->sample_status = dev->pma_sample_status;
870 p->sample_start = cpu_to_be32(dev->pma_sample_start);
871 p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
872 p->tag = cpu_to_be16(dev->pma_tag);
873 p->counter_select[0] = dev->pma_counter_select[0];
874 p->counter_select[1] = dev->pma_counter_select[1];
875 p->counter_select[2] = dev->pma_counter_select[2];
876 p->counter_select[3] = dev->pma_counter_select[3];
877 p->counter_select[4] = dev->pma_counter_select[4];
878 spin_unlock_irqrestore(&dev->pending_lock, flags);
879
880 return reply((struct ib_smp *) pmp);
881}
882
883static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
884 struct ib_device *ibdev, u8 port)
885{
886 struct ib_pma_portsamplescontrol *p =
887 (struct ib_pma_portsamplescontrol *)pmp->data;
888 struct ipath_ibdev *dev = to_idev(ibdev);
889 struct ipath_cregs const *crp = dev->dd->ipath_cregs;
890 unsigned long flags;
891 u8 status;
892 int ret;
893
894 if (pmp->mad_hdr.attr_mod != 0 ||
895 (p->port_select != port && p->port_select != 0xFF)) {
896 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
897 ret = reply((struct ib_smp *) pmp);
898 goto bail;
899 }
900
901 spin_lock_irqsave(&dev->pending_lock, flags);
902 if (crp->cr_psstat)
903 status = ipath_read_creg32(dev->dd, crp->cr_psstat);
904 else
905 status = dev->pma_sample_status;
906 if (status == IB_PMA_SAMPLE_STATUS_DONE) {
907 dev->pma_sample_start = be32_to_cpu(p->sample_start);
908 dev->pma_sample_interval = be32_to_cpu(p->sample_interval);
909 dev->pma_tag = be16_to_cpu(p->tag);
910 dev->pma_counter_select[0] = p->counter_select[0];
911 dev->pma_counter_select[1] = p->counter_select[1];
912 dev->pma_counter_select[2] = p->counter_select[2];
913 dev->pma_counter_select[3] = p->counter_select[3];
914 dev->pma_counter_select[4] = p->counter_select[4];
915 if (crp->cr_psstat) {
916 ipath_write_creg(dev->dd, crp->cr_psinterval,
917 dev->pma_sample_interval);
918 ipath_write_creg(dev->dd, crp->cr_psstart,
919 dev->pma_sample_start);
920 } else
921 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED;
922 }
923 spin_unlock_irqrestore(&dev->pending_lock, flags);
924
925 ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
926
927bail:
928 return ret;
929}
930
931static u64 get_counter(struct ipath_ibdev *dev,
932 struct ipath_cregs const *crp,
933 __be16 sel)
934{
935 u64 ret;
936
937 switch (sel) {
938 case IB_PMA_PORT_XMIT_DATA:
939 ret = (crp->cr_psxmitdatacount) ?
940 ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) :
941 dev->ipath_sword;
942 break;
943 case IB_PMA_PORT_RCV_DATA:
944 ret = (crp->cr_psrcvdatacount) ?
945 ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) :
946 dev->ipath_rword;
947 break;
948 case IB_PMA_PORT_XMIT_PKTS:
949 ret = (crp->cr_psxmitpktscount) ?
950 ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) :
951 dev->ipath_spkts;
952 break;
953 case IB_PMA_PORT_RCV_PKTS:
954 ret = (crp->cr_psrcvpktscount) ?
955 ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) :
956 dev->ipath_rpkts;
957 break;
958 case IB_PMA_PORT_XMIT_WAIT:
959 ret = (crp->cr_psxmitwaitcount) ?
960 ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) :
961 dev->ipath_xmit_wait;
962 break;
963 default:
964 ret = 0;
965 }
966
967 return ret;
968}
969
970static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp,
971 struct ib_device *ibdev)
972{
973 struct ib_pma_portsamplesresult *p =
974 (struct ib_pma_portsamplesresult *)pmp->data;
975 struct ipath_ibdev *dev = to_idev(ibdev);
976 struct ipath_cregs const *crp = dev->dd->ipath_cregs;
977 u8 status;
978 int i;
979
980 memset(pmp->data, 0, sizeof(pmp->data));
981 p->tag = cpu_to_be16(dev->pma_tag);
982 if (crp->cr_psstat)
983 status = ipath_read_creg32(dev->dd, crp->cr_psstat);
984 else
985 status = dev->pma_sample_status;
986 p->sample_status = cpu_to_be16(status);
987 for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
988 p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
989 cpu_to_be32(
990 get_counter(dev, crp, dev->pma_counter_select[i]));
991
992 return reply((struct ib_smp *) pmp);
993}
994
995static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
996 struct ib_device *ibdev)
997{
998 struct ib_pma_portsamplesresult_ext *p =
999 (struct ib_pma_portsamplesresult_ext *)pmp->data;
1000 struct ipath_ibdev *dev = to_idev(ibdev);
1001 struct ipath_cregs const *crp = dev->dd->ipath_cregs;
1002 u8 status;
1003 int i;
1004
1005 memset(pmp->data, 0, sizeof(pmp->data));
1006 p->tag = cpu_to_be16(dev->pma_tag);
1007 if (crp->cr_psstat)
1008 status = ipath_read_creg32(dev->dd, crp->cr_psstat);
1009 else
1010 status = dev->pma_sample_status;
1011 p->sample_status = cpu_to_be16(status);
1012 /* 64 bits */
1013 p->extended_width = cpu_to_be32(0x80000000);
1014 for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
1015 p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
1016 cpu_to_be64(
1017 get_counter(dev, crp, dev->pma_counter_select[i]));
1018
1019 return reply((struct ib_smp *) pmp);
1020}
1021
1022static int recv_pma_get_portcounters(struct ib_pma_mad *pmp,
1023 struct ib_device *ibdev, u8 port)
1024{
1025 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
1026 pmp->data;
1027 struct ipath_ibdev *dev = to_idev(ibdev);
1028 struct ipath_verbs_counters cntrs;
1029 u8 port_select = p->port_select;
1030
1031 ipath_get_counters(dev->dd, &cntrs);
1032
1033 /* Adjust counters for any resets done. */
1034 cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
1035 cntrs.link_error_recovery_counter -=
1036 dev->z_link_error_recovery_counter;
1037 cntrs.link_downed_counter -= dev->z_link_downed_counter;
1038 cntrs.port_rcv_errors += dev->rcv_errors;
1039 cntrs.port_rcv_errors -= dev->z_port_rcv_errors;
1040 cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors;
1041 cntrs.port_xmit_discards -= dev->z_port_xmit_discards;
1042 cntrs.port_xmit_data -= dev->z_port_xmit_data;
1043 cntrs.port_rcv_data -= dev->z_port_rcv_data;
1044 cntrs.port_xmit_packets -= dev->z_port_xmit_packets;
1045 cntrs.port_rcv_packets -= dev->z_port_rcv_packets;
1046 cntrs.local_link_integrity_errors -=
1047 dev->z_local_link_integrity_errors;
1048 cntrs.excessive_buffer_overrun_errors -=
1049 dev->z_excessive_buffer_overrun_errors;
1050 cntrs.vl15_dropped -= dev->z_vl15_dropped;
1051 cntrs.vl15_dropped += dev->n_vl15_dropped;
1052
1053 memset(pmp->data, 0, sizeof(pmp->data));
1054
1055 p->port_select = port_select;
1056 if (pmp->mad_hdr.attr_mod != 0 ||
1057 (port_select != port && port_select != 0xFF))
1058 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
1059
1060 if (cntrs.symbol_error_counter > 0xFFFFUL)
1061 p->symbol_error_counter = cpu_to_be16(0xFFFF);
1062 else
1063 p->symbol_error_counter =
1064 cpu_to_be16((u16)cntrs.symbol_error_counter);
1065 if (cntrs.link_error_recovery_counter > 0xFFUL)
1066 p->link_error_recovery_counter = 0xFF;
1067 else
1068 p->link_error_recovery_counter =
1069 (u8)cntrs.link_error_recovery_counter;
1070 if (cntrs.link_downed_counter > 0xFFUL)
1071 p->link_downed_counter = 0xFF;
1072 else
1073 p->link_downed_counter = (u8)cntrs.link_downed_counter;
1074 if (cntrs.port_rcv_errors > 0xFFFFUL)
1075 p->port_rcv_errors = cpu_to_be16(0xFFFF);
1076 else
1077 p->port_rcv_errors =
1078 cpu_to_be16((u16) cntrs.port_rcv_errors);
1079 if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
1080 p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
1081 else
1082 p->port_rcv_remphys_errors =
1083 cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
1084 if (cntrs.port_xmit_discards > 0xFFFFUL)
1085 p->port_xmit_discards = cpu_to_be16(0xFFFF);
1086 else
1087 p->port_xmit_discards =
1088 cpu_to_be16((u16)cntrs.port_xmit_discards);
1089 if (cntrs.local_link_integrity_errors > 0xFUL)
1090 cntrs.local_link_integrity_errors = 0xFUL;
1091 if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
1092 cntrs.excessive_buffer_overrun_errors = 0xFUL;
1093 p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
1094 cntrs.excessive_buffer_overrun_errors;
1095 if (cntrs.vl15_dropped > 0xFFFFUL)
1096 p->vl15_dropped = cpu_to_be16(0xFFFF);
1097 else
1098 p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
1099 if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
1100 p->port_xmit_data = cpu_to_be32(0xFFFFFFFF);
1101 else
1102 p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
1103 if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
1104 p->port_rcv_data = cpu_to_be32(0xFFFFFFFF);
1105 else
1106 p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
1107 if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
1108 p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF);
1109 else
1110 p->port_xmit_packets =
1111 cpu_to_be32((u32)cntrs.port_xmit_packets);
1112 if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
1113 p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF);
1114 else
1115 p->port_rcv_packets =
1116 cpu_to_be32((u32) cntrs.port_rcv_packets);
1117
1118 return reply((struct ib_smp *) pmp);
1119}
1120
1121static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp,
1122 struct ib_device *ibdev, u8 port)
1123{
1124 struct ib_pma_portcounters_ext *p =
1125 (struct ib_pma_portcounters_ext *)pmp->data;
1126 struct ipath_ibdev *dev = to_idev(ibdev);
1127 u64 swords, rwords, spkts, rpkts, xwait;
1128 u8 port_select = p->port_select;
1129
1130 ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
1131 &rpkts, &xwait);
1132
1133 /* Adjust counters for any resets done. */
1134 swords -= dev->z_port_xmit_data;
1135 rwords -= dev->z_port_rcv_data;
1136 spkts -= dev->z_port_xmit_packets;
1137 rpkts -= dev->z_port_rcv_packets;
1138
1139 memset(pmp->data, 0, sizeof(pmp->data));
1140
1141 p->port_select = port_select;
1142 if (pmp->mad_hdr.attr_mod != 0 ||
1143 (port_select != port && port_select != 0xFF))
1144 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
1145
1146 p->port_xmit_data = cpu_to_be64(swords);
1147 p->port_rcv_data = cpu_to_be64(rwords);
1148 p->port_xmit_packets = cpu_to_be64(spkts);
1149 p->port_rcv_packets = cpu_to_be64(rpkts);
1150 p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit);
1151 p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv);
1152 p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit);
1153 p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv);
1154
1155 return reply((struct ib_smp *) pmp);
1156}
1157
1158static int recv_pma_set_portcounters(struct ib_pma_mad *pmp,
1159 struct ib_device *ibdev, u8 port)
1160{
1161 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
1162 pmp->data;
1163 struct ipath_ibdev *dev = to_idev(ibdev);
1164 struct ipath_verbs_counters cntrs;
1165
1166 /*
1167 * Since the HW doesn't support clearing counters, we save the
1168 * current count and subtract it from future responses.
1169 */
1170 ipath_get_counters(dev->dd, &cntrs);
1171
1172 if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
1173 dev->z_symbol_error_counter = cntrs.symbol_error_counter;
1174
1175 if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
1176 dev->z_link_error_recovery_counter =
1177 cntrs.link_error_recovery_counter;
1178
1179 if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
1180 dev->z_link_downed_counter = cntrs.link_downed_counter;
1181
1182 if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
1183 dev->z_port_rcv_errors =
1184 cntrs.port_rcv_errors + dev->rcv_errors;
1185
1186 if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
1187 dev->z_port_rcv_remphys_errors =
1188 cntrs.port_rcv_remphys_errors;
1189
1190 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
1191 dev->z_port_xmit_discards = cntrs.port_xmit_discards;
1192
1193 if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
1194 dev->z_local_link_integrity_errors =
1195 cntrs.local_link_integrity_errors;
1196
1197 if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
1198 dev->z_excessive_buffer_overrun_errors =
1199 cntrs.excessive_buffer_overrun_errors;
1200
1201 if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
1202 dev->n_vl15_dropped = 0;
1203 dev->z_vl15_dropped = cntrs.vl15_dropped;
1204 }
1205
1206 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
1207 dev->z_port_xmit_data = cntrs.port_xmit_data;
1208
1209 if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
1210 dev->z_port_rcv_data = cntrs.port_rcv_data;
1211
1212 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
1213 dev->z_port_xmit_packets = cntrs.port_xmit_packets;
1214
1215 if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
1216 dev->z_port_rcv_packets = cntrs.port_rcv_packets;
1217
1218 return recv_pma_get_portcounters(pmp, ibdev, port);
1219}
1220
1221static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp,
1222 struct ib_device *ibdev, u8 port)
1223{
1224 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
1225 pmp->data;
1226 struct ipath_ibdev *dev = to_idev(ibdev);
1227 u64 swords, rwords, spkts, rpkts, xwait;
1228
1229 ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
1230 &rpkts, &xwait);
1231
1232 if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
1233 dev->z_port_xmit_data = swords;
1234
1235 if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
1236 dev->z_port_rcv_data = rwords;
1237
1238 if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
1239 dev->z_port_xmit_packets = spkts;
1240
1241 if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
1242 dev->z_port_rcv_packets = rpkts;
1243
1244 if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
1245 dev->n_unicast_xmit = 0;
1246
1247 if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
1248 dev->n_unicast_rcv = 0;
1249
1250 if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
1251 dev->n_multicast_xmit = 0;
1252
1253 if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
1254 dev->n_multicast_rcv = 0;
1255
1256 return recv_pma_get_portcounters_ext(pmp, ibdev, port);
1257}
1258
1259static int process_subn(struct ib_device *ibdev, int mad_flags,
1260 u8 port_num, const struct ib_mad *in_mad,
1261 struct ib_mad *out_mad)
1262{
1263 struct ib_smp *smp = (struct ib_smp *)out_mad;
1264 struct ipath_ibdev *dev = to_idev(ibdev);
1265 int ret;
1266
1267 *out_mad = *in_mad;
1268 if (smp->class_version != 1) {
1269 smp->status |= IB_SMP_UNSUP_VERSION;
1270 ret = reply(smp);
1271 goto bail;
1272 }
1273
1274 /* Is the mkey in the process of expiring? */
1275 if (dev->mkey_lease_timeout &&
1276 time_after_eq(jiffies, dev->mkey_lease_timeout)) {
1277 /* Clear timeout and mkey protection field. */
1278 dev->mkey_lease_timeout = 0;
1279 dev->mkeyprot = 0;
1280 }
1281
1282 /*
1283 * M_Key checking depends on
1284 * Portinfo:M_Key_protect_bits
1285 */
1286 if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 &&
1287 dev->mkey != smp->mkey &&
1288 (smp->method == IB_MGMT_METHOD_SET ||
1289 (smp->method == IB_MGMT_METHOD_GET &&
1290 dev->mkeyprot >= 2))) {
1291 if (dev->mkey_violations != 0xFFFF)
1292 ++dev->mkey_violations;
1293 if (dev->mkey_lease_timeout ||
1294 dev->mkey_lease_period == 0) {
1295 ret = IB_MAD_RESULT_SUCCESS |
1296 IB_MAD_RESULT_CONSUMED;
1297 goto bail;
1298 }
1299 dev->mkey_lease_timeout = jiffies +
1300 dev->mkey_lease_period * HZ;
1301 /* Future: Generate a trap notice. */
1302 ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1303 goto bail;
1304 } else if (dev->mkey_lease_timeout)
1305 dev->mkey_lease_timeout = 0;
1306
1307 switch (smp->method) {
1308 case IB_MGMT_METHOD_GET:
1309 switch (smp->attr_id) {
1310 case IB_SMP_ATTR_NODE_DESC:
1311 ret = recv_subn_get_nodedescription(smp, ibdev);
1312 goto bail;
1313 case IB_SMP_ATTR_NODE_INFO:
1314 ret = recv_subn_get_nodeinfo(smp, ibdev, port_num);
1315 goto bail;
1316 case IB_SMP_ATTR_GUID_INFO:
1317 ret = recv_subn_get_guidinfo(smp, ibdev);
1318 goto bail;
1319 case IB_SMP_ATTR_PORT_INFO:
1320 ret = recv_subn_get_portinfo(smp, ibdev, port_num);
1321 goto bail;
1322 case IB_SMP_ATTR_PKEY_TABLE:
1323 ret = recv_subn_get_pkeytable(smp, ibdev);
1324 goto bail;
1325 case IB_SMP_ATTR_SM_INFO:
1326 if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
1327 ret = IB_MAD_RESULT_SUCCESS |
1328 IB_MAD_RESULT_CONSUMED;
1329 goto bail;
1330 }
1331 if (dev->port_cap_flags & IB_PORT_SM) {
1332 ret = IB_MAD_RESULT_SUCCESS;
1333 goto bail;
1334 }
1335 /* FALLTHROUGH */
1336 default:
1337 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1338 ret = reply(smp);
1339 goto bail;
1340 }
1341
1342 case IB_MGMT_METHOD_SET:
1343 switch (smp->attr_id) {
1344 case IB_SMP_ATTR_GUID_INFO:
1345 ret = recv_subn_set_guidinfo(smp, ibdev);
1346 goto bail;
1347 case IB_SMP_ATTR_PORT_INFO:
1348 ret = recv_subn_set_portinfo(smp, ibdev, port_num);
1349 goto bail;
1350 case IB_SMP_ATTR_PKEY_TABLE:
1351 ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
1352 goto bail;
1353 case IB_SMP_ATTR_SM_INFO:
1354 if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
1355 ret = IB_MAD_RESULT_SUCCESS |
1356 IB_MAD_RESULT_CONSUMED;
1357 goto bail;
1358 }
1359 if (dev->port_cap_flags & IB_PORT_SM) {
1360 ret = IB_MAD_RESULT_SUCCESS;
1361 goto bail;
1362 }
1363 /* FALLTHROUGH */
1364 default:
1365 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1366 ret = reply(smp);
1367 goto bail;
1368 }
1369
1370 case IB_MGMT_METHOD_TRAP:
1371 case IB_MGMT_METHOD_REPORT:
1372 case IB_MGMT_METHOD_REPORT_RESP:
1373 case IB_MGMT_METHOD_TRAP_REPRESS:
1374 case IB_MGMT_METHOD_GET_RESP:
1375 /*
1376 * The ib_mad module will call us to process responses
1377 * before checking for other consumers.
1378 * Just tell the caller to process it normally.
1379 */
1380 ret = IB_MAD_RESULT_SUCCESS;
1381 goto bail;
1382 default:
1383 smp->status |= IB_SMP_UNSUP_METHOD;
1384 ret = reply(smp);
1385 }
1386
1387bail:
1388 return ret;
1389}
1390
1391static int process_perf(struct ib_device *ibdev, u8 port_num,
1392 const struct ib_mad *in_mad,
1393 struct ib_mad *out_mad)
1394{
1395 struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
1396 int ret;
1397
1398 *out_mad = *in_mad;
1399 if (pmp->mad_hdr.class_version != 1) {
1400 pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
1401 ret = reply((struct ib_smp *) pmp);
1402 goto bail;
1403 }
1404
1405 switch (pmp->mad_hdr.method) {
1406 case IB_MGMT_METHOD_GET:
1407 switch (pmp->mad_hdr.attr_id) {
1408 case IB_PMA_CLASS_PORT_INFO:
1409 ret = recv_pma_get_classportinfo(pmp);
1410 goto bail;
1411 case IB_PMA_PORT_SAMPLES_CONTROL:
1412 ret = recv_pma_get_portsamplescontrol(pmp, ibdev,
1413 port_num);
1414 goto bail;
1415 case IB_PMA_PORT_SAMPLES_RESULT:
1416 ret = recv_pma_get_portsamplesresult(pmp, ibdev);
1417 goto bail;
1418 case IB_PMA_PORT_SAMPLES_RESULT_EXT:
1419 ret = recv_pma_get_portsamplesresult_ext(pmp,
1420 ibdev);
1421 goto bail;
1422 case IB_PMA_PORT_COUNTERS:
1423 ret = recv_pma_get_portcounters(pmp, ibdev,
1424 port_num);
1425 goto bail;
1426 case IB_PMA_PORT_COUNTERS_EXT:
1427 ret = recv_pma_get_portcounters_ext(pmp, ibdev,
1428 port_num);
1429 goto bail;
1430 default:
1431 pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
1432 ret = reply((struct ib_smp *) pmp);
1433 goto bail;
1434 }
1435
1436 case IB_MGMT_METHOD_SET:
1437 switch (pmp->mad_hdr.attr_id) {
1438 case IB_PMA_PORT_SAMPLES_CONTROL:
1439 ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
1440 port_num);
1441 goto bail;
1442 case IB_PMA_PORT_COUNTERS:
1443 ret = recv_pma_set_portcounters(pmp, ibdev,
1444 port_num);
1445 goto bail;
1446 case IB_PMA_PORT_COUNTERS_EXT:
1447 ret = recv_pma_set_portcounters_ext(pmp, ibdev,
1448 port_num);
1449 goto bail;
1450 default:
1451 pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
1452 ret = reply((struct ib_smp *) pmp);
1453 goto bail;
1454 }
1455
1456 case IB_MGMT_METHOD_GET_RESP:
1457 /*
1458 * The ib_mad module will call us to process responses
1459 * before checking for other consumers.
1460 * Just tell the caller to process it normally.
1461 */
1462 ret = IB_MAD_RESULT_SUCCESS;
1463 goto bail;
1464 default:
1465 pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
1466 ret = reply((struct ib_smp *) pmp);
1467 }
1468
1469bail:
1470 return ret;
1471}
1472
1473/**
1474 * ipath_process_mad - process an incoming MAD packet
1475 * @ibdev: the infiniband device this packet came in on
1476 * @mad_flags: MAD flags
1477 * @port_num: the port number this packet came in on
1478 * @in_wc: the work completion entry for this packet
1479 * @in_grh: the global route header for this packet
1480 * @in_mad: the incoming MAD
1481 * @out_mad: any outgoing MAD reply
1482 *
1483 * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
1484 * interested in processing.
1485 *
1486 * Note that the verbs framework has already done the MAD sanity checks,
1487 * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
1488 * MADs.
1489 *
1490 * This is called by the ib_mad module.
1491 */
1492int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
1493 const struct ib_wc *in_wc, const struct ib_grh *in_grh,
1494 const struct ib_mad_hdr *in, size_t in_mad_size,
1495 struct ib_mad_hdr *out, size_t *out_mad_size,
1496 u16 *out_mad_pkey_index)
1497{
1498 int ret;
1499 const struct ib_mad *in_mad = (const struct ib_mad *)in;
1500 struct ib_mad *out_mad = (struct ib_mad *)out;
1501
1502 if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
1503 *out_mad_size != sizeof(*out_mad)))
1504 return IB_MAD_RESULT_FAILURE;
1505
1506 switch (in_mad->mad_hdr.mgmt_class) {
1507 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
1508 case IB_MGMT_CLASS_SUBN_LID_ROUTED:
1509 ret = process_subn(ibdev, mad_flags, port_num,
1510 in_mad, out_mad);
1511 goto bail;
1512 case IB_MGMT_CLASS_PERF_MGMT:
1513 ret = process_perf(ibdev, port_num, in_mad, out_mad);
1514 goto bail;
1515 default:
1516 ret = IB_MAD_RESULT_SUCCESS;
1517 }
1518
1519bail:
1520 return ret;
1521}
diff --git a/drivers/staging/rdma/ipath/ipath_mmap.c b/drivers/staging/rdma/ipath/ipath_mmap.c
deleted file mode 100644
index e73274229404..000000000000
--- a/drivers/staging/rdma/ipath/ipath_mmap.c
+++ /dev/null
@@ -1,174 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <linux/vmalloc.h>
35#include <linux/slab.h>
36#include <linux/mm.h>
37#include <linux/errno.h>
38#include <asm/pgtable.h>
39
40#include "ipath_verbs.h"
41
42/**
43 * ipath_release_mmap_info - free mmap info structure
44 * @ref: a pointer to the kref within struct ipath_mmap_info
45 */
46void ipath_release_mmap_info(struct kref *ref)
47{
48 struct ipath_mmap_info *ip =
49 container_of(ref, struct ipath_mmap_info, ref);
50 struct ipath_ibdev *dev = to_idev(ip->context->device);
51
52 spin_lock_irq(&dev->pending_lock);
53 list_del(&ip->pending_mmaps);
54 spin_unlock_irq(&dev->pending_lock);
55
56 vfree(ip->obj);
57 kfree(ip);
58}
59
60/*
61 * open and close keep track of how many times the CQ is mapped,
62 * to avoid releasing it.
63 */
64static void ipath_vma_open(struct vm_area_struct *vma)
65{
66 struct ipath_mmap_info *ip = vma->vm_private_data;
67
68 kref_get(&ip->ref);
69}
70
71static void ipath_vma_close(struct vm_area_struct *vma)
72{
73 struct ipath_mmap_info *ip = vma->vm_private_data;
74
75 kref_put(&ip->ref, ipath_release_mmap_info);
76}
77
78static const struct vm_operations_struct ipath_vm_ops = {
79 .open = ipath_vma_open,
80 .close = ipath_vma_close,
81};
82
83/**
84 * ipath_mmap - create a new mmap region
85 * @context: the IB user context of the process making the mmap() call
86 * @vma: the VMA to be initialized
87 * Return zero if the mmap is OK. Otherwise, return an errno.
88 */
89int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
90{
91 struct ipath_ibdev *dev = to_idev(context->device);
92 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
93 unsigned long size = vma->vm_end - vma->vm_start;
94 struct ipath_mmap_info *ip, *pp;
95 int ret = -EINVAL;
96
97 /*
98 * Search the device's list of objects waiting for a mmap call.
99 * Normally, this list is very short since a call to create a
100 * CQ, QP, or SRQ is soon followed by a call to mmap().
101 */
102 spin_lock_irq(&dev->pending_lock);
103 list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
104 pending_mmaps) {
105 /* Only the creator is allowed to mmap the object */
106 if (context != ip->context || (__u64) offset != ip->offset)
107 continue;
108 /* Don't allow a mmap larger than the object. */
109 if (size > ip->size)
110 break;
111
112 list_del_init(&ip->pending_mmaps);
113 spin_unlock_irq(&dev->pending_lock);
114
115 ret = remap_vmalloc_range(vma, ip->obj, 0);
116 if (ret)
117 goto done;
118 vma->vm_ops = &ipath_vm_ops;
119 vma->vm_private_data = ip;
120 ipath_vma_open(vma);
121 goto done;
122 }
123 spin_unlock_irq(&dev->pending_lock);
124done:
125 return ret;
126}
127
128/*
129 * Allocate information for ipath_mmap
130 */
131struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
132 u32 size,
133 struct ib_ucontext *context,
134 void *obj) {
135 struct ipath_mmap_info *ip;
136
137 ip = kmalloc(sizeof *ip, GFP_KERNEL);
138 if (!ip)
139 goto bail;
140
141 size = PAGE_ALIGN(size);
142
143 spin_lock_irq(&dev->mmap_offset_lock);
144 if (dev->mmap_offset == 0)
145 dev->mmap_offset = PAGE_SIZE;
146 ip->offset = dev->mmap_offset;
147 dev->mmap_offset += size;
148 spin_unlock_irq(&dev->mmap_offset_lock);
149
150 INIT_LIST_HEAD(&ip->pending_mmaps);
151 ip->size = size;
152 ip->context = context;
153 ip->obj = obj;
154 kref_init(&ip->ref);
155
156bail:
157 return ip;
158}
159
160void ipath_update_mmap_info(struct ipath_ibdev *dev,
161 struct ipath_mmap_info *ip,
162 u32 size, void *obj) {
163 size = PAGE_ALIGN(size);
164
165 spin_lock_irq(&dev->mmap_offset_lock);
166 if (dev->mmap_offset == 0)
167 dev->mmap_offset = PAGE_SIZE;
168 ip->offset = dev->mmap_offset;
169 dev->mmap_offset += size;
170 spin_unlock_irq(&dev->mmap_offset_lock);
171
172 ip->size = size;
173 ip->obj = obj;
174}
diff --git a/drivers/staging/rdma/ipath/ipath_mr.c b/drivers/staging/rdma/ipath/ipath_mr.c
deleted file mode 100644
index b76b0ce66709..000000000000
--- a/drivers/staging/rdma/ipath/ipath_mr.c
+++ /dev/null
@@ -1,370 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/slab.h>
35
36#include <rdma/ib_umem.h>
37#include <rdma/ib_pack.h>
38#include <rdma/ib_smi.h>
39
40#include "ipath_verbs.h"
41
42/* Fast memory region */
43struct ipath_fmr {
44 struct ib_fmr ibfmr;
45 u8 page_shift;
46 struct ipath_mregion mr; /* must be last */
47};
48
49static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
50{
51 return container_of(ibfmr, struct ipath_fmr, ibfmr);
52}
53
54/**
55 * ipath_get_dma_mr - get a DMA memory region
56 * @pd: protection domain for this memory region
57 * @acc: access flags
58 *
59 * Returns the memory region on success, otherwise returns an errno.
60 * Note that all DMA addresses should be created via the
61 * struct ib_dma_mapping_ops functions (see ipath_dma.c).
62 */
63struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
64{
65 struct ipath_mr *mr;
66 struct ib_mr *ret;
67
68 mr = kzalloc(sizeof *mr, GFP_KERNEL);
69 if (!mr) {
70 ret = ERR_PTR(-ENOMEM);
71 goto bail;
72 }
73
74 mr->mr.access_flags = acc;
75 ret = &mr->ibmr;
76
77bail:
78 return ret;
79}
80
81static struct ipath_mr *alloc_mr(int count,
82 struct ipath_lkey_table *lk_table)
83{
84 struct ipath_mr *mr;
85 int m, i = 0;
86
87 /* Allocate struct plus pointers to first level page tables. */
88 m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
89 mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
90 if (!mr)
91 goto done;
92
93 /* Allocate first level page tables. */
94 for (; i < m; i++) {
95 mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
96 if (!mr->mr.map[i])
97 goto bail;
98 }
99 mr->mr.mapsz = m;
100
101 if (!ipath_alloc_lkey(lk_table, &mr->mr))
102 goto bail;
103 mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
104
105 goto done;
106
107bail:
108 while (i) {
109 i--;
110 kfree(mr->mr.map[i]);
111 }
112 kfree(mr);
113 mr = NULL;
114
115done:
116 return mr;
117}
118
119/**
120 * ipath_reg_user_mr - register a userspace memory region
121 * @pd: protection domain for this memory region
122 * @start: starting userspace address
123 * @length: length of region to register
124 * @virt_addr: virtual address to use (from HCA's point of view)
125 * @mr_access_flags: access flags for this memory region
126 * @udata: unused by the InfiniPath driver
127 *
128 * Returns the memory region on success, otherwise returns an errno.
129 */
130struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
131 u64 virt_addr, int mr_access_flags,
132 struct ib_udata *udata)
133{
134 struct ipath_mr *mr;
135 struct ib_umem *umem;
136 int n, m, entry;
137 struct scatterlist *sg;
138 struct ib_mr *ret;
139
140 if (length == 0) {
141 ret = ERR_PTR(-EINVAL);
142 goto bail;
143 }
144
145 umem = ib_umem_get(pd->uobject->context, start, length,
146 mr_access_flags, 0);
147 if (IS_ERR(umem))
148 return (void *) umem;
149
150 n = umem->nmap;
151 mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
152 if (!mr) {
153 ret = ERR_PTR(-ENOMEM);
154 ib_umem_release(umem);
155 goto bail;
156 }
157
158 mr->mr.pd = pd;
159 mr->mr.user_base = start;
160 mr->mr.iova = virt_addr;
161 mr->mr.length = length;
162 mr->mr.offset = ib_umem_offset(umem);
163 mr->mr.access_flags = mr_access_flags;
164 mr->mr.max_segs = n;
165 mr->umem = umem;
166
167 m = 0;
168 n = 0;
169 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
170 void *vaddr;
171
172 vaddr = page_address(sg_page(sg));
173 if (!vaddr) {
174 ret = ERR_PTR(-EINVAL);
175 goto bail;
176 }
177 mr->mr.map[m]->segs[n].vaddr = vaddr;
178 mr->mr.map[m]->segs[n].length = umem->page_size;
179 n++;
180 if (n == IPATH_SEGSZ) {
181 m++;
182 n = 0;
183 }
184 }
185 ret = &mr->ibmr;
186
187bail:
188 return ret;
189}
190
191/**
192 * ipath_dereg_mr - unregister and free a memory region
193 * @ibmr: the memory region to free
194 *
195 * Returns 0 on success.
196 *
197 * Note that this is called to free MRs created by ipath_get_dma_mr()
198 * or ipath_reg_user_mr().
199 */
200int ipath_dereg_mr(struct ib_mr *ibmr)
201{
202 struct ipath_mr *mr = to_imr(ibmr);
203 int i;
204
205 ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey);
206 i = mr->mr.mapsz;
207 while (i) {
208 i--;
209 kfree(mr->mr.map[i]);
210 }
211
212 if (mr->umem)
213 ib_umem_release(mr->umem);
214
215 kfree(mr);
216 return 0;
217}
218
219/**
220 * ipath_alloc_fmr - allocate a fast memory region
221 * @pd: the protection domain for this memory region
222 * @mr_access_flags: access flags for this memory region
223 * @fmr_attr: fast memory region attributes
224 *
225 * Returns the memory region on success, otherwise returns an errno.
226 */
227struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
228 struct ib_fmr_attr *fmr_attr)
229{
230 struct ipath_fmr *fmr;
231 int m, i = 0;
232 struct ib_fmr *ret;
233
234 /* Allocate struct plus pointers to first level page tables. */
235 m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
236 fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
237 if (!fmr)
238 goto bail;
239
240 /* Allocate first level page tables. */
241 for (; i < m; i++) {
242 fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
243 GFP_KERNEL);
244 if (!fmr->mr.map[i])
245 goto bail;
246 }
247 fmr->mr.mapsz = m;
248
249 /*
250 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
251 * rkey.
252 */
253 if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
254 goto bail;
255 fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey;
256 /*
257 * Resources are allocated but no valid mapping (RKEY can't be
258 * used).
259 */
260 fmr->mr.pd = pd;
261 fmr->mr.user_base = 0;
262 fmr->mr.iova = 0;
263 fmr->mr.length = 0;
264 fmr->mr.offset = 0;
265 fmr->mr.access_flags = mr_access_flags;
266 fmr->mr.max_segs = fmr_attr->max_pages;
267 fmr->page_shift = fmr_attr->page_shift;
268
269 ret = &fmr->ibfmr;
270 goto done;
271
272bail:
273 while (i)
274 kfree(fmr->mr.map[--i]);
275 kfree(fmr);
276 ret = ERR_PTR(-ENOMEM);
277
278done:
279 return ret;
280}
281
282/**
283 * ipath_map_phys_fmr - set up a fast memory region
284 * @ibmfr: the fast memory region to set up
285 * @page_list: the list of pages to associate with the fast memory region
286 * @list_len: the number of pages to associate with the fast memory region
287 * @iova: the virtual address of the start of the fast memory region
288 *
289 * This may be called from interrupt context.
290 */
291
292int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
293 int list_len, u64 iova)
294{
295 struct ipath_fmr *fmr = to_ifmr(ibfmr);
296 struct ipath_lkey_table *rkt;
297 unsigned long flags;
298 int m, n, i;
299 u32 ps;
300 int ret;
301
302 if (list_len > fmr->mr.max_segs) {
303 ret = -EINVAL;
304 goto bail;
305 }
306 rkt = &to_idev(ibfmr->device)->lk_table;
307 spin_lock_irqsave(&rkt->lock, flags);
308 fmr->mr.user_base = iova;
309 fmr->mr.iova = iova;
310 ps = 1 << fmr->page_shift;
311 fmr->mr.length = list_len * ps;
312 m = 0;
313 n = 0;
314 ps = 1 << fmr->page_shift;
315 for (i = 0; i < list_len; i++) {
316 fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
317 fmr->mr.map[m]->segs[n].length = ps;
318 if (++n == IPATH_SEGSZ) {
319 m++;
320 n = 0;
321 }
322 }
323 spin_unlock_irqrestore(&rkt->lock, flags);
324 ret = 0;
325
326bail:
327 return ret;
328}
329
330/**
331 * ipath_unmap_fmr - unmap fast memory regions
332 * @fmr_list: the list of fast memory regions to unmap
333 *
334 * Returns 0 on success.
335 */
336int ipath_unmap_fmr(struct list_head *fmr_list)
337{
338 struct ipath_fmr *fmr;
339 struct ipath_lkey_table *rkt;
340 unsigned long flags;
341
342 list_for_each_entry(fmr, fmr_list, ibfmr.list) {
343 rkt = &to_idev(fmr->ibfmr.device)->lk_table;
344 spin_lock_irqsave(&rkt->lock, flags);
345 fmr->mr.user_base = 0;
346 fmr->mr.iova = 0;
347 fmr->mr.length = 0;
348 spin_unlock_irqrestore(&rkt->lock, flags);
349 }
350 return 0;
351}
352
353/**
354 * ipath_dealloc_fmr - deallocate a fast memory region
355 * @ibfmr: the fast memory region to deallocate
356 *
357 * Returns 0 on success.
358 */
359int ipath_dealloc_fmr(struct ib_fmr *ibfmr)
360{
361 struct ipath_fmr *fmr = to_ifmr(ibfmr);
362 int i;
363
364 ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey);
365 i = fmr->mr.mapsz;
366 while (i)
367 kfree(fmr->mr.map[--i]);
368 kfree(fmr);
369 return 0;
370}
diff --git a/drivers/staging/rdma/ipath/ipath_qp.c b/drivers/staging/rdma/ipath/ipath_qp.c
deleted file mode 100644
index 280cd2d638e4..000000000000
--- a/drivers/staging/rdma/ipath/ipath_qp.c
+++ /dev/null
@@ -1,1079 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/err.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37
38#include "ipath_verbs.h"
39#include "ipath_kernel.h"
40
41#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
42#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
43#define mk_qpn(qpt, map, off) (((map) - (qpt)->map) * BITS_PER_PAGE + \
44 (off))
45#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
46 BITS_PER_PAGE, off)
47
48/*
49 * Convert the AETH credit code into the number of credits.
50 */
51static u32 credit_table[31] = {
52 0, /* 0 */
53 1, /* 1 */
54 2, /* 2 */
55 3, /* 3 */
56 4, /* 4 */
57 6, /* 5 */
58 8, /* 6 */
59 12, /* 7 */
60 16, /* 8 */
61 24, /* 9 */
62 32, /* A */
63 48, /* B */
64 64, /* C */
65 96, /* D */
66 128, /* E */
67 192, /* F */
68 256, /* 10 */
69 384, /* 11 */
70 512, /* 12 */
71 768, /* 13 */
72 1024, /* 14 */
73 1536, /* 15 */
74 2048, /* 16 */
75 3072, /* 17 */
76 4096, /* 18 */
77 6144, /* 19 */
78 8192, /* 1A */
79 12288, /* 1B */
80 16384, /* 1C */
81 24576, /* 1D */
82 32768 /* 1E */
83};
84
85
86static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
87{
88 unsigned long page = get_zeroed_page(GFP_KERNEL);
89 unsigned long flags;
90
91 /*
92 * Free the page if someone raced with us installing it.
93 */
94
95 spin_lock_irqsave(&qpt->lock, flags);
96 if (map->page)
97 free_page(page);
98 else
99 map->page = (void *)page;
100 spin_unlock_irqrestore(&qpt->lock, flags);
101}
102
103
104static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
105{
106 u32 i, offset, max_scan, qpn;
107 struct qpn_map *map;
108 u32 ret = -1;
109
110 if (type == IB_QPT_SMI)
111 ret = 0;
112 else if (type == IB_QPT_GSI)
113 ret = 1;
114
115 if (ret != -1) {
116 map = &qpt->map[0];
117 if (unlikely(!map->page)) {
118 get_map_page(qpt, map);
119 if (unlikely(!map->page)) {
120 ret = -ENOMEM;
121 goto bail;
122 }
123 }
124 if (!test_and_set_bit(ret, map->page))
125 atomic_dec(&map->n_free);
126 else
127 ret = -EBUSY;
128 goto bail;
129 }
130
131 qpn = qpt->last + 1;
132 if (qpn >= QPN_MAX)
133 qpn = 2;
134 offset = qpn & BITS_PER_PAGE_MASK;
135 map = &qpt->map[qpn / BITS_PER_PAGE];
136 max_scan = qpt->nmaps - !offset;
137 for (i = 0;;) {
138 if (unlikely(!map->page)) {
139 get_map_page(qpt, map);
140 if (unlikely(!map->page))
141 break;
142 }
143 if (likely(atomic_read(&map->n_free))) {
144 do {
145 if (!test_and_set_bit(offset, map->page)) {
146 atomic_dec(&map->n_free);
147 qpt->last = qpn;
148 ret = qpn;
149 goto bail;
150 }
151 offset = find_next_offset(map, offset);
152 qpn = mk_qpn(qpt, map, offset);
153 /*
154 * This test differs from alloc_pidmap().
155 * If find_next_offset() does find a zero
156 * bit, we don't need to check for QPN
157 * wrapping around past our starting QPN.
158 * We just need to be sure we don't loop
159 * forever.
160 */
161 } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
162 }
163 /*
164 * In order to keep the number of pages allocated to a
165 * minimum, we scan the all existing pages before increasing
166 * the size of the bitmap table.
167 */
168 if (++i > max_scan) {
169 if (qpt->nmaps == QPNMAP_ENTRIES)
170 break;
171 map = &qpt->map[qpt->nmaps++];
172 offset = 0;
173 } else if (map < &qpt->map[qpt->nmaps]) {
174 ++map;
175 offset = 0;
176 } else {
177 map = &qpt->map[0];
178 offset = 2;
179 }
180 qpn = mk_qpn(qpt, map, offset);
181 }
182
183 ret = -ENOMEM;
184
185bail:
186 return ret;
187}
188
189static void free_qpn(struct ipath_qp_table *qpt, u32 qpn)
190{
191 struct qpn_map *map;
192
193 map = qpt->map + qpn / BITS_PER_PAGE;
194 if (map->page)
195 clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
196 atomic_inc(&map->n_free);
197}
198
199/**
200 * ipath_alloc_qpn - allocate a QP number
201 * @qpt: the QP table
202 * @qp: the QP
203 * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special)
204 *
205 * Allocate the next available QPN and put the QP into the hash table.
206 * The hash table holds a reference to the QP.
207 */
208static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
209 enum ib_qp_type type)
210{
211 unsigned long flags;
212 int ret;
213
214 ret = alloc_qpn(qpt, type);
215 if (ret < 0)
216 goto bail;
217 qp->ibqp.qp_num = ret;
218
219 /* Add the QP to the hash table. */
220 spin_lock_irqsave(&qpt->lock, flags);
221
222 ret %= qpt->max;
223 qp->next = qpt->table[ret];
224 qpt->table[ret] = qp;
225 atomic_inc(&qp->refcount);
226
227 spin_unlock_irqrestore(&qpt->lock, flags);
228 ret = 0;
229
230bail:
231 return ret;
232}
233
234/**
235 * ipath_free_qp - remove a QP from the QP table
236 * @qpt: the QP table
237 * @qp: the QP to remove
238 *
239 * Remove the QP from the table so it can't be found asynchronously by
240 * the receive interrupt routine.
241 */
242static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
243{
244 struct ipath_qp *q, **qpp;
245 unsigned long flags;
246
247 spin_lock_irqsave(&qpt->lock, flags);
248
249 /* Remove QP from the hash table. */
250 qpp = &qpt->table[qp->ibqp.qp_num % qpt->max];
251 for (; (q = *qpp) != NULL; qpp = &q->next) {
252 if (q == qp) {
253 *qpp = qp->next;
254 qp->next = NULL;
255 atomic_dec(&qp->refcount);
256 break;
257 }
258 }
259
260 spin_unlock_irqrestore(&qpt->lock, flags);
261}
262
263/**
264 * ipath_free_all_qps - check for QPs still in use
265 * @qpt: the QP table to empty
266 *
267 * There should not be any QPs still in use.
268 * Free memory for table.
269 */
270unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
271{
272 unsigned long flags;
273 struct ipath_qp *qp;
274 u32 n, qp_inuse = 0;
275
276 spin_lock_irqsave(&qpt->lock, flags);
277 for (n = 0; n < qpt->max; n++) {
278 qp = qpt->table[n];
279 qpt->table[n] = NULL;
280
281 for (; qp; qp = qp->next)
282 qp_inuse++;
283 }
284 spin_unlock_irqrestore(&qpt->lock, flags);
285
286 for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
287 if (qpt->map[n].page)
288 free_page((unsigned long) qpt->map[n].page);
289 return qp_inuse;
290}
291
292/**
293 * ipath_lookup_qpn - return the QP with the given QPN
294 * @qpt: the QP table
295 * @qpn: the QP number to look up
296 *
297 * The caller is responsible for decrementing the QP reference count
298 * when done.
299 */
300struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
301{
302 unsigned long flags;
303 struct ipath_qp *qp;
304
305 spin_lock_irqsave(&qpt->lock, flags);
306
307 for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) {
308 if (qp->ibqp.qp_num == qpn) {
309 atomic_inc(&qp->refcount);
310 break;
311 }
312 }
313
314 spin_unlock_irqrestore(&qpt->lock, flags);
315 return qp;
316}
317
318/**
319 * ipath_reset_qp - initialize the QP state to the reset state
320 * @qp: the QP to reset
321 * @type: the QP type
322 */
323static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
324{
325 qp->remote_qpn = 0;
326 qp->qkey = 0;
327 qp->qp_access_flags = 0;
328 atomic_set(&qp->s_dma_busy, 0);
329 qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
330 qp->s_hdrwords = 0;
331 qp->s_wqe = NULL;
332 qp->s_pkt_delay = 0;
333 qp->s_draining = 0;
334 qp->s_psn = 0;
335 qp->r_psn = 0;
336 qp->r_msn = 0;
337 if (type == IB_QPT_RC) {
338 qp->s_state = IB_OPCODE_RC_SEND_LAST;
339 qp->r_state = IB_OPCODE_RC_SEND_LAST;
340 } else {
341 qp->s_state = IB_OPCODE_UC_SEND_LAST;
342 qp->r_state = IB_OPCODE_UC_SEND_LAST;
343 }
344 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
345 qp->r_nak_state = 0;
346 qp->r_aflags = 0;
347 qp->r_flags = 0;
348 qp->s_rnr_timeout = 0;
349 qp->s_head = 0;
350 qp->s_tail = 0;
351 qp->s_cur = 0;
352 qp->s_last = 0;
353 qp->s_ssn = 1;
354 qp->s_lsn = 0;
355 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
356 qp->r_head_ack_queue = 0;
357 qp->s_tail_ack_queue = 0;
358 qp->s_num_rd_atomic = 0;
359 if (qp->r_rq.wq) {
360 qp->r_rq.wq->head = 0;
361 qp->r_rq.wq->tail = 0;
362 }
363}
364
365/**
366 * ipath_error_qp - put a QP into the error state
367 * @qp: the QP to put into the error state
368 * @err: the receive completion error to signal if a RWQE is active
369 *
370 * Flushes both send and receive work queues.
371 * Returns true if last WQE event should be generated.
372 * The QP s_lock should be held and interrupts disabled.
373 * If we are already in error state, just return.
374 */
375
376int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
377{
378 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
379 struct ib_wc wc;
380 int ret = 0;
381
382 if (qp->state == IB_QPS_ERR)
383 goto bail;
384
385 qp->state = IB_QPS_ERR;
386
387 spin_lock(&dev->pending_lock);
388 if (!list_empty(&qp->timerwait))
389 list_del_init(&qp->timerwait);
390 if (!list_empty(&qp->piowait))
391 list_del_init(&qp->piowait);
392 spin_unlock(&dev->pending_lock);
393
394 /* Schedule the sending tasklet to drain the send work queue. */
395 if (qp->s_last != qp->s_head)
396 ipath_schedule_send(qp);
397
398 memset(&wc, 0, sizeof(wc));
399 wc.qp = &qp->ibqp;
400 wc.opcode = IB_WC_RECV;
401
402 if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
403 wc.wr_id = qp->r_wr_id;
404 wc.status = err;
405 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
406 }
407 wc.status = IB_WC_WR_FLUSH_ERR;
408
409 if (qp->r_rq.wq) {
410 struct ipath_rwq *wq;
411 u32 head;
412 u32 tail;
413
414 spin_lock(&qp->r_rq.lock);
415
416 /* sanity check pointers before trusting them */
417 wq = qp->r_rq.wq;
418 head = wq->head;
419 if (head >= qp->r_rq.size)
420 head = 0;
421 tail = wq->tail;
422 if (tail >= qp->r_rq.size)
423 tail = 0;
424 while (tail != head) {
425 wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
426 if (++tail >= qp->r_rq.size)
427 tail = 0;
428 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
429 }
430 wq->tail = tail;
431
432 spin_unlock(&qp->r_rq.lock);
433 } else if (qp->ibqp.event_handler)
434 ret = 1;
435
436bail:
437 return ret;
438}
439
440/**
441 * ipath_modify_qp - modify the attributes of a queue pair
442 * @ibqp: the queue pair who's attributes we're modifying
443 * @attr: the new attributes
444 * @attr_mask: the mask of attributes to modify
445 * @udata: user data for ipathverbs.so
446 *
447 * Returns 0 on success, otherwise returns an errno.
448 */
449int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
450 int attr_mask, struct ib_udata *udata)
451{
452 struct ipath_ibdev *dev = to_idev(ibqp->device);
453 struct ipath_qp *qp = to_iqp(ibqp);
454 enum ib_qp_state cur_state, new_state;
455 int lastwqe = 0;
456 int ret;
457
458 spin_lock_irq(&qp->s_lock);
459
460 cur_state = attr_mask & IB_QP_CUR_STATE ?
461 attr->cur_qp_state : qp->state;
462 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
463
464 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
465 attr_mask, IB_LINK_LAYER_UNSPECIFIED))
466 goto inval;
467
468 if (attr_mask & IB_QP_AV) {
469 if (attr->ah_attr.dlid == 0 ||
470 attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
471 goto inval;
472
473 if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
474 (attr->ah_attr.grh.sgid_index > 1))
475 goto inval;
476 }
477
478 if (attr_mask & IB_QP_PKEY_INDEX)
479 if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
480 goto inval;
481
482 if (attr_mask & IB_QP_MIN_RNR_TIMER)
483 if (attr->min_rnr_timer > 31)
484 goto inval;
485
486 if (attr_mask & IB_QP_PORT)
487 if (attr->port_num == 0 ||
488 attr->port_num > ibqp->device->phys_port_cnt)
489 goto inval;
490
491 /*
492 * don't allow invalid Path MTU values or greater than 2048
493 * unless we are configured for a 4KB MTU
494 */
495 if ((attr_mask & IB_QP_PATH_MTU) &&
496 (ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
497 (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
498 goto inval;
499
500 if (attr_mask & IB_QP_PATH_MIG_STATE)
501 if (attr->path_mig_state != IB_MIG_MIGRATED &&
502 attr->path_mig_state != IB_MIG_REARM)
503 goto inval;
504
505 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
506 if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
507 goto inval;
508
509 switch (new_state) {
510 case IB_QPS_RESET:
511 if (qp->state != IB_QPS_RESET) {
512 qp->state = IB_QPS_RESET;
513 spin_lock(&dev->pending_lock);
514 if (!list_empty(&qp->timerwait))
515 list_del_init(&qp->timerwait);
516 if (!list_empty(&qp->piowait))
517 list_del_init(&qp->piowait);
518 spin_unlock(&dev->pending_lock);
519 qp->s_flags &= ~IPATH_S_ANY_WAIT;
520 spin_unlock_irq(&qp->s_lock);
521 /* Stop the sending tasklet */
522 tasklet_kill(&qp->s_task);
523 wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
524 spin_lock_irq(&qp->s_lock);
525 }
526 ipath_reset_qp(qp, ibqp->qp_type);
527 break;
528
529 case IB_QPS_SQD:
530 qp->s_draining = qp->s_last != qp->s_cur;
531 qp->state = new_state;
532 break;
533
534 case IB_QPS_SQE:
535 if (qp->ibqp.qp_type == IB_QPT_RC)
536 goto inval;
537 qp->state = new_state;
538 break;
539
540 case IB_QPS_ERR:
541 lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
542 break;
543
544 default:
545 qp->state = new_state;
546 break;
547 }
548
549 if (attr_mask & IB_QP_PKEY_INDEX)
550 qp->s_pkey_index = attr->pkey_index;
551
552 if (attr_mask & IB_QP_DEST_QPN)
553 qp->remote_qpn = attr->dest_qp_num;
554
555 if (attr_mask & IB_QP_SQ_PSN) {
556 qp->s_psn = qp->s_next_psn = attr->sq_psn;
557 qp->s_last_psn = qp->s_next_psn - 1;
558 }
559
560 if (attr_mask & IB_QP_RQ_PSN)
561 qp->r_psn = attr->rq_psn;
562
563 if (attr_mask & IB_QP_ACCESS_FLAGS)
564 qp->qp_access_flags = attr->qp_access_flags;
565
566 if (attr_mask & IB_QP_AV) {
567 qp->remote_ah_attr = attr->ah_attr;
568 qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
569 }
570
571 if (attr_mask & IB_QP_PATH_MTU)
572 qp->path_mtu = attr->path_mtu;
573
574 if (attr_mask & IB_QP_RETRY_CNT)
575 qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;
576
577 if (attr_mask & IB_QP_RNR_RETRY) {
578 qp->s_rnr_retry = attr->rnr_retry;
579 if (qp->s_rnr_retry > 7)
580 qp->s_rnr_retry = 7;
581 qp->s_rnr_retry_cnt = qp->s_rnr_retry;
582 }
583
584 if (attr_mask & IB_QP_MIN_RNR_TIMER)
585 qp->r_min_rnr_timer = attr->min_rnr_timer;
586
587 if (attr_mask & IB_QP_TIMEOUT)
588 qp->timeout = attr->timeout;
589
590 if (attr_mask & IB_QP_QKEY)
591 qp->qkey = attr->qkey;
592
593 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
594 qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
595
596 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
597 qp->s_max_rd_atomic = attr->max_rd_atomic;
598
599 spin_unlock_irq(&qp->s_lock);
600
601 if (lastwqe) {
602 struct ib_event ev;
603
604 ev.device = qp->ibqp.device;
605 ev.element.qp = &qp->ibqp;
606 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
607 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
608 }
609 ret = 0;
610 goto bail;
611
612inval:
613 spin_unlock_irq(&qp->s_lock);
614 ret = -EINVAL;
615
616bail:
617 return ret;
618}
619
620int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
621 int attr_mask, struct ib_qp_init_attr *init_attr)
622{
623 struct ipath_qp *qp = to_iqp(ibqp);
624
625 attr->qp_state = qp->state;
626 attr->cur_qp_state = attr->qp_state;
627 attr->path_mtu = qp->path_mtu;
628 attr->path_mig_state = 0;
629 attr->qkey = qp->qkey;
630 attr->rq_psn = qp->r_psn;
631 attr->sq_psn = qp->s_next_psn;
632 attr->dest_qp_num = qp->remote_qpn;
633 attr->qp_access_flags = qp->qp_access_flags;
634 attr->cap.max_send_wr = qp->s_size - 1;
635 attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
636 attr->cap.max_send_sge = qp->s_max_sge;
637 attr->cap.max_recv_sge = qp->r_rq.max_sge;
638 attr->cap.max_inline_data = 0;
639 attr->ah_attr = qp->remote_ah_attr;
640 memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr));
641 attr->pkey_index = qp->s_pkey_index;
642 attr->alt_pkey_index = 0;
643 attr->en_sqd_async_notify = 0;
644 attr->sq_draining = qp->s_draining;
645 attr->max_rd_atomic = qp->s_max_rd_atomic;
646 attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
647 attr->min_rnr_timer = qp->r_min_rnr_timer;
648 attr->port_num = 1;
649 attr->timeout = qp->timeout;
650 attr->retry_cnt = qp->s_retry_cnt;
651 attr->rnr_retry = qp->s_rnr_retry_cnt;
652 attr->alt_port_num = 0;
653 attr->alt_timeout = 0;
654
655 init_attr->event_handler = qp->ibqp.event_handler;
656 init_attr->qp_context = qp->ibqp.qp_context;
657 init_attr->send_cq = qp->ibqp.send_cq;
658 init_attr->recv_cq = qp->ibqp.recv_cq;
659 init_attr->srq = qp->ibqp.srq;
660 init_attr->cap = attr->cap;
661 if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
662 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
663 else
664 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
665 init_attr->qp_type = qp->ibqp.qp_type;
666 init_attr->port_num = 1;
667 return 0;
668}
669
670/**
671 * ipath_compute_aeth - compute the AETH (syndrome + MSN)
672 * @qp: the queue pair to compute the AETH for
673 *
674 * Returns the AETH.
675 */
676__be32 ipath_compute_aeth(struct ipath_qp *qp)
677{
678 u32 aeth = qp->r_msn & IPATH_MSN_MASK;
679
680 if (qp->ibqp.srq) {
681 /*
682 * Shared receive queues don't generate credits.
683 * Set the credit field to the invalid value.
684 */
685 aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT;
686 } else {
687 u32 min, max, x;
688 u32 credits;
689 struct ipath_rwq *wq = qp->r_rq.wq;
690 u32 head;
691 u32 tail;
692
693 /* sanity check pointers before trusting them */
694 head = wq->head;
695 if (head >= qp->r_rq.size)
696 head = 0;
697 tail = wq->tail;
698 if (tail >= qp->r_rq.size)
699 tail = 0;
700 /*
701 * Compute the number of credits available (RWQEs).
702 * XXX Not holding the r_rq.lock here so there is a small
703 * chance that the pair of reads are not atomic.
704 */
705 credits = head - tail;
706 if ((int)credits < 0)
707 credits += qp->r_rq.size;
708 /*
709 * Binary search the credit table to find the code to
710 * use.
711 */
712 min = 0;
713 max = 31;
714 for (;;) {
715 x = (min + max) / 2;
716 if (credit_table[x] == credits)
717 break;
718 if (credit_table[x] > credits)
719 max = x;
720 else if (min == x)
721 break;
722 else
723 min = x;
724 }
725 aeth |= x << IPATH_AETH_CREDIT_SHIFT;
726 }
727 return cpu_to_be32(aeth);
728}
729
730/**
731 * ipath_create_qp - create a queue pair for a device
732 * @ibpd: the protection domain who's device we create the queue pair for
733 * @init_attr: the attributes of the queue pair
734 * @udata: unused by InfiniPath
735 *
736 * Returns the queue pair on success, otherwise returns an errno.
737 *
738 * Called by the ib_create_qp() core verbs function.
739 */
740struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
741 struct ib_qp_init_attr *init_attr,
742 struct ib_udata *udata)
743{
744 struct ipath_qp *qp;
745 int err;
746 struct ipath_swqe *swq = NULL;
747 struct ipath_ibdev *dev;
748 size_t sz;
749 size_t sg_list_sz;
750 struct ib_qp *ret;
751
752 if (init_attr->create_flags) {
753 ret = ERR_PTR(-EINVAL);
754 goto bail;
755 }
756
757 if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
758 init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) {
759 ret = ERR_PTR(-EINVAL);
760 goto bail;
761 }
762
763 /* Check receive queue parameters if no SRQ is specified. */
764 if (!init_attr->srq) {
765 if (init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
766 init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
767 ret = ERR_PTR(-EINVAL);
768 goto bail;
769 }
770 if (init_attr->cap.max_send_sge +
771 init_attr->cap.max_send_wr +
772 init_attr->cap.max_recv_sge +
773 init_attr->cap.max_recv_wr == 0) {
774 ret = ERR_PTR(-EINVAL);
775 goto bail;
776 }
777 }
778
779 switch (init_attr->qp_type) {
780 case IB_QPT_UC:
781 case IB_QPT_RC:
782 case IB_QPT_UD:
783 case IB_QPT_SMI:
784 case IB_QPT_GSI:
785 sz = sizeof(struct ipath_sge) *
786 init_attr->cap.max_send_sge +
787 sizeof(struct ipath_swqe);
788 swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
789 if (swq == NULL) {
790 ret = ERR_PTR(-ENOMEM);
791 goto bail;
792 }
793 sz = sizeof(*qp);
794 sg_list_sz = 0;
795 if (init_attr->srq) {
796 struct ipath_srq *srq = to_isrq(init_attr->srq);
797
798 if (srq->rq.max_sge > 1)
799 sg_list_sz = sizeof(*qp->r_sg_list) *
800 (srq->rq.max_sge - 1);
801 } else if (init_attr->cap.max_recv_sge > 1)
802 sg_list_sz = sizeof(*qp->r_sg_list) *
803 (init_attr->cap.max_recv_sge - 1);
804 qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
805 if (!qp) {
806 ret = ERR_PTR(-ENOMEM);
807 goto bail_swq;
808 }
809 if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
810 init_attr->qp_type == IB_QPT_SMI ||
811 init_attr->qp_type == IB_QPT_GSI)) {
812 qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
813 if (!qp->r_ud_sg_list) {
814 ret = ERR_PTR(-ENOMEM);
815 goto bail_qp;
816 }
817 } else
818 qp->r_ud_sg_list = NULL;
819 if (init_attr->srq) {
820 sz = 0;
821 qp->r_rq.size = 0;
822 qp->r_rq.max_sge = 0;
823 qp->r_rq.wq = NULL;
824 init_attr->cap.max_recv_wr = 0;
825 init_attr->cap.max_recv_sge = 0;
826 } else {
827 qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
828 qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
829 sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
830 sizeof(struct ipath_rwqe);
831 qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
832 qp->r_rq.size * sz);
833 if (!qp->r_rq.wq) {
834 ret = ERR_PTR(-ENOMEM);
835 goto bail_sg_list;
836 }
837 }
838
839 /*
840 * ib_create_qp() will initialize qp->ibqp
841 * except for qp->ibqp.qp_num.
842 */
843 spin_lock_init(&qp->s_lock);
844 spin_lock_init(&qp->r_rq.lock);
845 atomic_set(&qp->refcount, 0);
846 init_waitqueue_head(&qp->wait);
847 init_waitqueue_head(&qp->wait_dma);
848 tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
849 INIT_LIST_HEAD(&qp->piowait);
850 INIT_LIST_HEAD(&qp->timerwait);
851 qp->state = IB_QPS_RESET;
852 qp->s_wq = swq;
853 qp->s_size = init_attr->cap.max_send_wr + 1;
854 qp->s_max_sge = init_attr->cap.max_send_sge;
855 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
856 qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
857 else
858 qp->s_flags = 0;
859 dev = to_idev(ibpd->device);
860 err = ipath_alloc_qpn(&dev->qp_table, qp,
861 init_attr->qp_type);
862 if (err) {
863 ret = ERR_PTR(err);
864 vfree(qp->r_rq.wq);
865 goto bail_sg_list;
866 }
867 qp->ip = NULL;
868 qp->s_tx = NULL;
869 ipath_reset_qp(qp, init_attr->qp_type);
870 break;
871
872 default:
873 /* Don't support raw QPs */
874 ret = ERR_PTR(-ENOSYS);
875 goto bail;
876 }
877
878 init_attr->cap.max_inline_data = 0;
879
880 /*
881 * Return the address of the RWQ as the offset to mmap.
882 * See ipath_mmap() for details.
883 */
884 if (udata && udata->outlen >= sizeof(__u64)) {
885 if (!qp->r_rq.wq) {
886 __u64 offset = 0;
887
888 err = ib_copy_to_udata(udata, &offset,
889 sizeof(offset));
890 if (err) {
891 ret = ERR_PTR(err);
892 goto bail_ip;
893 }
894 } else {
895 u32 s = sizeof(struct ipath_rwq) +
896 qp->r_rq.size * sz;
897
898 qp->ip =
899 ipath_create_mmap_info(dev, s,
900 ibpd->uobject->context,
901 qp->r_rq.wq);
902 if (!qp->ip) {
903 ret = ERR_PTR(-ENOMEM);
904 goto bail_ip;
905 }
906
907 err = ib_copy_to_udata(udata, &(qp->ip->offset),
908 sizeof(qp->ip->offset));
909 if (err) {
910 ret = ERR_PTR(err);
911 goto bail_ip;
912 }
913 }
914 }
915
916 spin_lock(&dev->n_qps_lock);
917 if (dev->n_qps_allocated == ib_ipath_max_qps) {
918 spin_unlock(&dev->n_qps_lock);
919 ret = ERR_PTR(-ENOMEM);
920 goto bail_ip;
921 }
922
923 dev->n_qps_allocated++;
924 spin_unlock(&dev->n_qps_lock);
925
926 if (qp->ip) {
927 spin_lock_irq(&dev->pending_lock);
928 list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
929 spin_unlock_irq(&dev->pending_lock);
930 }
931
932 ret = &qp->ibqp;
933 goto bail;
934
935bail_ip:
936 if (qp->ip)
937 kref_put(&qp->ip->ref, ipath_release_mmap_info);
938 else
939 vfree(qp->r_rq.wq);
940 ipath_free_qp(&dev->qp_table, qp);
941 free_qpn(&dev->qp_table, qp->ibqp.qp_num);
942bail_sg_list:
943 kfree(qp->r_ud_sg_list);
944bail_qp:
945 kfree(qp);
946bail_swq:
947 vfree(swq);
948bail:
949 return ret;
950}
951
952/**
953 * ipath_destroy_qp - destroy a queue pair
954 * @ibqp: the queue pair to destroy
955 *
956 * Returns 0 on success.
957 *
958 * Note that this can be called while the QP is actively sending or
959 * receiving!
960 */
961int ipath_destroy_qp(struct ib_qp *ibqp)
962{
963 struct ipath_qp *qp = to_iqp(ibqp);
964 struct ipath_ibdev *dev = to_idev(ibqp->device);
965
966 /* Make sure HW and driver activity is stopped. */
967 spin_lock_irq(&qp->s_lock);
968 if (qp->state != IB_QPS_RESET) {
969 qp->state = IB_QPS_RESET;
970 spin_lock(&dev->pending_lock);
971 if (!list_empty(&qp->timerwait))
972 list_del_init(&qp->timerwait);
973 if (!list_empty(&qp->piowait))
974 list_del_init(&qp->piowait);
975 spin_unlock(&dev->pending_lock);
976 qp->s_flags &= ~IPATH_S_ANY_WAIT;
977 spin_unlock_irq(&qp->s_lock);
978 /* Stop the sending tasklet */
979 tasklet_kill(&qp->s_task);
980 wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
981 } else
982 spin_unlock_irq(&qp->s_lock);
983
984 ipath_free_qp(&dev->qp_table, qp);
985
986 if (qp->s_tx) {
987 atomic_dec(&qp->refcount);
988 if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
989 kfree(qp->s_tx->txreq.map_addr);
990 spin_lock_irq(&dev->pending_lock);
991 list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
992 spin_unlock_irq(&dev->pending_lock);
993 qp->s_tx = NULL;
994 }
995
996 wait_event(qp->wait, !atomic_read(&qp->refcount));
997
998 /* all user's cleaned up, mark it available */
999 free_qpn(&dev->qp_table, qp->ibqp.qp_num);
1000 spin_lock(&dev->n_qps_lock);
1001 dev->n_qps_allocated--;
1002 spin_unlock(&dev->n_qps_lock);
1003
1004 if (qp->ip)
1005 kref_put(&qp->ip->ref, ipath_release_mmap_info);
1006 else
1007 vfree(qp->r_rq.wq);
1008 kfree(qp->r_ud_sg_list);
1009 vfree(qp->s_wq);
1010 kfree(qp);
1011 return 0;
1012}
1013
1014/**
1015 * ipath_init_qp_table - initialize the QP table for a device
1016 * @idev: the device who's QP table we're initializing
1017 * @size: the size of the QP table
1018 *
1019 * Returns 0 on success, otherwise returns an errno.
1020 */
1021int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
1022{
1023 int i;
1024 int ret;
1025
1026 idev->qp_table.last = 1; /* QPN 0 and 1 are special. */
1027 idev->qp_table.max = size;
1028 idev->qp_table.nmaps = 1;
1029 idev->qp_table.table = kcalloc(size, sizeof(*idev->qp_table.table),
1030 GFP_KERNEL);
1031 if (idev->qp_table.table == NULL) {
1032 ret = -ENOMEM;
1033 goto bail;
1034 }
1035
1036 for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) {
1037 atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE);
1038 idev->qp_table.map[i].page = NULL;
1039 }
1040
1041 ret = 0;
1042
1043bail:
1044 return ret;
1045}
1046
1047/**
1048 * ipath_get_credit - flush the send work queue of a QP
1049 * @qp: the qp who's send work queue to flush
1050 * @aeth: the Acknowledge Extended Transport Header
1051 *
1052 * The QP s_lock should be held.
1053 */
1054void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
1055{
1056 u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK;
1057
1058 /*
1059 * If the credit is invalid, we can send
1060 * as many packets as we like. Otherwise, we have to
1061 * honor the credit field.
1062 */
1063 if (credit == IPATH_AETH_CREDIT_INVAL)
1064 qp->s_lsn = (u32) -1;
1065 else if (qp->s_lsn != (u32) -1) {
1066 /* Compute new LSN (i.e., MSN + credit) */
1067 credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK;
1068 if (ipath_cmp24(credit, qp->s_lsn) > 0)
1069 qp->s_lsn = credit;
1070 }
1071
1072 /* Restart sending if it was blocked due to lack of credits. */
1073 if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
1074 qp->s_cur != qp->s_head &&
1075 (qp->s_lsn == (u32) -1 ||
1076 ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
1077 qp->s_lsn + 1) <= 0))
1078 ipath_schedule_send(qp);
1079}
diff --git a/drivers/staging/rdma/ipath/ipath_rc.c b/drivers/staging/rdma/ipath/ipath_rc.c
deleted file mode 100644
index d4aa53574e57..000000000000
--- a/drivers/staging/rdma/ipath/ipath_rc.c
+++ /dev/null
@@ -1,1969 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/io.h>
35
36#include "ipath_verbs.h"
37#include "ipath_kernel.h"
38
39/* cut down ridiculously long IB macro names */
40#define OP(x) IB_OPCODE_RC_##x
41
42static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
43 u32 psn, u32 pmtu)
44{
45 u32 len;
46
47 len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
48 ss->sge = wqe->sg_list[0];
49 ss->sg_list = wqe->sg_list + 1;
50 ss->num_sge = wqe->wr.num_sge;
51 ipath_skip_sge(ss, len);
52 return wqe->length - len;
53}
54
55/**
56 * ipath_init_restart- initialize the qp->s_sge after a restart
57 * @qp: the QP who's SGE we're restarting
58 * @wqe: the work queue to initialize the QP's SGE from
59 *
60 * The QP s_lock should be held and interrupts disabled.
61 */
62static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
63{
64 struct ipath_ibdev *dev;
65
66 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
67 ib_mtu_enum_to_int(qp->path_mtu));
68 dev = to_idev(qp->ibqp.device);
69 spin_lock(&dev->pending_lock);
70 if (list_empty(&qp->timerwait))
71 list_add_tail(&qp->timerwait,
72 &dev->pending[dev->pending_index]);
73 spin_unlock(&dev->pending_lock);
74}
75
76/**
77 * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
78 * @qp: a pointer to the QP
79 * @ohdr: a pointer to the IB header being constructed
80 * @pmtu: the path MTU
81 *
82 * Return 1 if constructed; otherwise, return 0.
83 * Note that we are in the responder's side of the QP context.
84 * Note the QP s_lock must be held.
85 */
86static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
87 struct ipath_other_headers *ohdr, u32 pmtu)
88{
89 struct ipath_ack_entry *e;
90 u32 hwords;
91 u32 len;
92 u32 bth0;
93 u32 bth2;
94
95 /* Don't send an ACK if we aren't supposed to. */
96 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
97 goto bail;
98
99 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
100 hwords = 5;
101
102 switch (qp->s_ack_state) {
103 case OP(RDMA_READ_RESPONSE_LAST):
104 case OP(RDMA_READ_RESPONSE_ONLY):
105 case OP(ATOMIC_ACKNOWLEDGE):
106 /*
107 * We can increment the tail pointer now that the last
108 * response has been sent instead of only being
109 * constructed.
110 */
111 if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
112 qp->s_tail_ack_queue = 0;
113 /* FALLTHROUGH */
114 case OP(SEND_ONLY):
115 case OP(ACKNOWLEDGE):
116 /* Check for no next entry in the queue. */
117 if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
118 if (qp->s_flags & IPATH_S_ACK_PENDING)
119 goto normal;
120 qp->s_ack_state = OP(ACKNOWLEDGE);
121 goto bail;
122 }
123
124 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
125 if (e->opcode == OP(RDMA_READ_REQUEST)) {
126 /* Copy SGE state in case we need to resend */
127 qp->s_ack_rdma_sge = e->rdma_sge;
128 qp->s_cur_sge = &qp->s_ack_rdma_sge;
129 len = e->rdma_sge.sge.sge_length;
130 if (len > pmtu) {
131 len = pmtu;
132 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
133 } else {
134 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
135 e->sent = 1;
136 }
137 ohdr->u.aeth = ipath_compute_aeth(qp);
138 hwords++;
139 qp->s_ack_rdma_psn = e->psn;
140 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
141 } else {
142 /* COMPARE_SWAP or FETCH_ADD */
143 qp->s_cur_sge = NULL;
144 len = 0;
145 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
146 ohdr->u.at.aeth = ipath_compute_aeth(qp);
147 ohdr->u.at.atomic_ack_eth[0] =
148 cpu_to_be32(e->atomic_data >> 32);
149 ohdr->u.at.atomic_ack_eth[1] =
150 cpu_to_be32(e->atomic_data);
151 hwords += sizeof(ohdr->u.at) / sizeof(u32);
152 bth2 = e->psn;
153 e->sent = 1;
154 }
155 bth0 = qp->s_ack_state << 24;
156 break;
157
158 case OP(RDMA_READ_RESPONSE_FIRST):
159 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
160 /* FALLTHROUGH */
161 case OP(RDMA_READ_RESPONSE_MIDDLE):
162 len = qp->s_ack_rdma_sge.sge.sge_length;
163 if (len > pmtu)
164 len = pmtu;
165 else {
166 ohdr->u.aeth = ipath_compute_aeth(qp);
167 hwords++;
168 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
169 qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
170 }
171 bth0 = qp->s_ack_state << 24;
172 bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
173 break;
174
175 default:
176 normal:
177 /*
178 * Send a regular ACK.
179 * Set the s_ack_state so we wait until after sending
180 * the ACK before setting s_ack_state to ACKNOWLEDGE
181 * (see above).
182 */
183 qp->s_ack_state = OP(SEND_ONLY);
184 qp->s_flags &= ~IPATH_S_ACK_PENDING;
185 qp->s_cur_sge = NULL;
186 if (qp->s_nak_state)
187 ohdr->u.aeth =
188 cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
189 (qp->s_nak_state <<
190 IPATH_AETH_CREDIT_SHIFT));
191 else
192 ohdr->u.aeth = ipath_compute_aeth(qp);
193 hwords++;
194 len = 0;
195 bth0 = OP(ACKNOWLEDGE) << 24;
196 bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
197 }
198 qp->s_hdrwords = hwords;
199 qp->s_cur_size = len;
200 ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
201 return 1;
202
203bail:
204 return 0;
205}
206
207/**
208 * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
209 * @qp: a pointer to the QP
210 *
211 * Return 1 if constructed; otherwise, return 0.
212 */
213int ipath_make_rc_req(struct ipath_qp *qp)
214{
215 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
216 struct ipath_other_headers *ohdr;
217 struct ipath_sge_state *ss;
218 struct ipath_swqe *wqe;
219 u32 hwords;
220 u32 len;
221 u32 bth0;
222 u32 bth2;
223 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
224 char newreq;
225 unsigned long flags;
226 int ret = 0;
227
228 ohdr = &qp->s_hdr.u.oth;
229 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
230 ohdr = &qp->s_hdr.u.l.oth;
231
232 /*
233 * The lock is needed to synchronize between the sending tasklet,
234 * the receive interrupt handler, and timeout resends.
235 */
236 spin_lock_irqsave(&qp->s_lock, flags);
237
238 /* Sending responses has higher priority over sending requests. */
239 if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
240 (qp->s_flags & IPATH_S_ACK_PENDING) ||
241 qp->s_ack_state != OP(ACKNOWLEDGE)) &&
242 ipath_make_rc_ack(dev, qp, ohdr, pmtu))
243 goto done;
244
245 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
246 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
247 goto bail;
248 /* We are in the error state, flush the work request. */
249 if (qp->s_last == qp->s_head)
250 goto bail;
251 /* If DMAs are in progress, we can't flush immediately. */
252 if (atomic_read(&qp->s_dma_busy)) {
253 qp->s_flags |= IPATH_S_WAIT_DMA;
254 goto bail;
255 }
256 wqe = get_swqe_ptr(qp, qp->s_last);
257 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
258 goto done;
259 }
260
261 /* Leave BUSY set until RNR timeout. */
262 if (qp->s_rnr_timeout) {
263 qp->s_flags |= IPATH_S_WAITING;
264 goto bail;
265 }
266
267 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
268 hwords = 5;
269 bth0 = 1 << 22; /* Set M bit */
270
271 /* Send a request. */
272 wqe = get_swqe_ptr(qp, qp->s_cur);
273 switch (qp->s_state) {
274 default:
275 if (!(ib_ipath_state_ops[qp->state] &
276 IPATH_PROCESS_NEXT_SEND_OK))
277 goto bail;
278 /*
279 * Resend an old request or start a new one.
280 *
281 * We keep track of the current SWQE so that
282 * we don't reset the "furthest progress" state
283 * if we need to back up.
284 */
285 newreq = 0;
286 if (qp->s_cur == qp->s_tail) {
287 /* Check if send work queue is empty. */
288 if (qp->s_tail == qp->s_head)
289 goto bail;
290 /*
291 * If a fence is requested, wait for previous
292 * RDMA read and atomic operations to finish.
293 */
294 if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
295 qp->s_num_rd_atomic) {
296 qp->s_flags |= IPATH_S_FENCE_PENDING;
297 goto bail;
298 }
299 wqe->psn = qp->s_next_psn;
300 newreq = 1;
301 }
302 /*
303 * Note that we have to be careful not to modify the
304 * original work request since we may need to resend
305 * it.
306 */
307 len = wqe->length;
308 ss = &qp->s_sge;
309 bth2 = 0;
310 switch (wqe->wr.opcode) {
311 case IB_WR_SEND:
312 case IB_WR_SEND_WITH_IMM:
313 /* If no credit, return. */
314 if (qp->s_lsn != (u32) -1 &&
315 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
316 qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
317 goto bail;
318 }
319 wqe->lpsn = wqe->psn;
320 if (len > pmtu) {
321 wqe->lpsn += (len - 1) / pmtu;
322 qp->s_state = OP(SEND_FIRST);
323 len = pmtu;
324 break;
325 }
326 if (wqe->wr.opcode == IB_WR_SEND)
327 qp->s_state = OP(SEND_ONLY);
328 else {
329 qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
330 /* Immediate data comes after the BTH */
331 ohdr->u.imm_data = wqe->wr.ex.imm_data;
332 hwords += 1;
333 }
334 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
335 bth0 |= 1 << 23;
336 bth2 = 1 << 31; /* Request ACK. */
337 if (++qp->s_cur == qp->s_size)
338 qp->s_cur = 0;
339 break;
340
341 case IB_WR_RDMA_WRITE:
342 if (newreq && qp->s_lsn != (u32) -1)
343 qp->s_lsn++;
344 /* FALLTHROUGH */
345 case IB_WR_RDMA_WRITE_WITH_IMM:
346 /* If no credit, return. */
347 if (qp->s_lsn != (u32) -1 &&
348 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
349 qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
350 goto bail;
351 }
352 ohdr->u.rc.reth.vaddr =
353 cpu_to_be64(wqe->rdma_wr.remote_addr);
354 ohdr->u.rc.reth.rkey =
355 cpu_to_be32(wqe->rdma_wr.rkey);
356 ohdr->u.rc.reth.length = cpu_to_be32(len);
357 hwords += sizeof(struct ib_reth) / sizeof(u32);
358 wqe->lpsn = wqe->psn;
359 if (len > pmtu) {
360 wqe->lpsn += (len - 1) / pmtu;
361 qp->s_state = OP(RDMA_WRITE_FIRST);
362 len = pmtu;
363 break;
364 }
365 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
366 qp->s_state = OP(RDMA_WRITE_ONLY);
367 else {
368 qp->s_state =
369 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
370 /* Immediate data comes after RETH */
371 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
372 hwords += 1;
373 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
374 bth0 |= 1 << 23;
375 }
376 bth2 = 1 << 31; /* Request ACK. */
377 if (++qp->s_cur == qp->s_size)
378 qp->s_cur = 0;
379 break;
380
381 case IB_WR_RDMA_READ:
382 /*
383 * Don't allow more operations to be started
384 * than the QP limits allow.
385 */
386 if (newreq) {
387 if (qp->s_num_rd_atomic >=
388 qp->s_max_rd_atomic) {
389 qp->s_flags |= IPATH_S_RDMAR_PENDING;
390 goto bail;
391 }
392 qp->s_num_rd_atomic++;
393 if (qp->s_lsn != (u32) -1)
394 qp->s_lsn++;
395 /*
396 * Adjust s_next_psn to count the
397 * expected number of responses.
398 */
399 if (len > pmtu)
400 qp->s_next_psn += (len - 1) / pmtu;
401 wqe->lpsn = qp->s_next_psn++;
402 }
403 ohdr->u.rc.reth.vaddr =
404 cpu_to_be64(wqe->rdma_wr.remote_addr);
405 ohdr->u.rc.reth.rkey =
406 cpu_to_be32(wqe->rdma_wr.rkey);
407 ohdr->u.rc.reth.length = cpu_to_be32(len);
408 qp->s_state = OP(RDMA_READ_REQUEST);
409 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
410 ss = NULL;
411 len = 0;
412 if (++qp->s_cur == qp->s_size)
413 qp->s_cur = 0;
414 break;
415
416 case IB_WR_ATOMIC_CMP_AND_SWP:
417 case IB_WR_ATOMIC_FETCH_AND_ADD:
418 /*
419 * Don't allow more operations to be started
420 * than the QP limits allow.
421 */
422 if (newreq) {
423 if (qp->s_num_rd_atomic >=
424 qp->s_max_rd_atomic) {
425 qp->s_flags |= IPATH_S_RDMAR_PENDING;
426 goto bail;
427 }
428 qp->s_num_rd_atomic++;
429 if (qp->s_lsn != (u32) -1)
430 qp->s_lsn++;
431 wqe->lpsn = wqe->psn;
432 }
433 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
434 qp->s_state = OP(COMPARE_SWAP);
435 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
436 wqe->atomic_wr.swap);
437 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
438 wqe->atomic_wr.compare_add);
439 } else {
440 qp->s_state = OP(FETCH_ADD);
441 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
442 wqe->atomic_wr.compare_add);
443 ohdr->u.atomic_eth.compare_data = 0;
444 }
445 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
446 wqe->atomic_wr.remote_addr >> 32);
447 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
448 wqe->atomic_wr.remote_addr);
449 ohdr->u.atomic_eth.rkey = cpu_to_be32(
450 wqe->atomic_wr.rkey);
451 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
452 ss = NULL;
453 len = 0;
454 if (++qp->s_cur == qp->s_size)
455 qp->s_cur = 0;
456 break;
457
458 default:
459 goto bail;
460 }
461 qp->s_sge.sge = wqe->sg_list[0];
462 qp->s_sge.sg_list = wqe->sg_list + 1;
463 qp->s_sge.num_sge = wqe->wr.num_sge;
464 qp->s_len = wqe->length;
465 if (newreq) {
466 qp->s_tail++;
467 if (qp->s_tail >= qp->s_size)
468 qp->s_tail = 0;
469 }
470 bth2 |= qp->s_psn & IPATH_PSN_MASK;
471 if (wqe->wr.opcode == IB_WR_RDMA_READ)
472 qp->s_psn = wqe->lpsn + 1;
473 else {
474 qp->s_psn++;
475 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
476 qp->s_next_psn = qp->s_psn;
477 }
478 /*
479 * Put the QP on the pending list so lost ACKs will cause
480 * a retry. More than one request can be pending so the
481 * QP may already be on the dev->pending list.
482 */
483 spin_lock(&dev->pending_lock);
484 if (list_empty(&qp->timerwait))
485 list_add_tail(&qp->timerwait,
486 &dev->pending[dev->pending_index]);
487 spin_unlock(&dev->pending_lock);
488 break;
489
490 case OP(RDMA_READ_RESPONSE_FIRST):
491 /*
492 * This case can only happen if a send is restarted.
493 * See ipath_restart_rc().
494 */
495 ipath_init_restart(qp, wqe);
496 /* FALLTHROUGH */
497 case OP(SEND_FIRST):
498 qp->s_state = OP(SEND_MIDDLE);
499 /* FALLTHROUGH */
500 case OP(SEND_MIDDLE):
501 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
502 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
503 qp->s_next_psn = qp->s_psn;
504 ss = &qp->s_sge;
505 len = qp->s_len;
506 if (len > pmtu) {
507 len = pmtu;
508 break;
509 }
510 if (wqe->wr.opcode == IB_WR_SEND)
511 qp->s_state = OP(SEND_LAST);
512 else {
513 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
514 /* Immediate data comes after the BTH */
515 ohdr->u.imm_data = wqe->wr.ex.imm_data;
516 hwords += 1;
517 }
518 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
519 bth0 |= 1 << 23;
520 bth2 |= 1 << 31; /* Request ACK. */
521 qp->s_cur++;
522 if (qp->s_cur >= qp->s_size)
523 qp->s_cur = 0;
524 break;
525
526 case OP(RDMA_READ_RESPONSE_LAST):
527 /*
528 * This case can only happen if a RDMA write is restarted.
529 * See ipath_restart_rc().
530 */
531 ipath_init_restart(qp, wqe);
532 /* FALLTHROUGH */
533 case OP(RDMA_WRITE_FIRST):
534 qp->s_state = OP(RDMA_WRITE_MIDDLE);
535 /* FALLTHROUGH */
536 case OP(RDMA_WRITE_MIDDLE):
537 bth2 = qp->s_psn++ & IPATH_PSN_MASK;
538 if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
539 qp->s_next_psn = qp->s_psn;
540 ss = &qp->s_sge;
541 len = qp->s_len;
542 if (len > pmtu) {
543 len = pmtu;
544 break;
545 }
546 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
547 qp->s_state = OP(RDMA_WRITE_LAST);
548 else {
549 qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
550 /* Immediate data comes after the BTH */
551 ohdr->u.imm_data = wqe->wr.ex.imm_data;
552 hwords += 1;
553 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
554 bth0 |= 1 << 23;
555 }
556 bth2 |= 1 << 31; /* Request ACK. */
557 qp->s_cur++;
558 if (qp->s_cur >= qp->s_size)
559 qp->s_cur = 0;
560 break;
561
562 case OP(RDMA_READ_RESPONSE_MIDDLE):
563 /*
564 * This case can only happen if a RDMA read is restarted.
565 * See ipath_restart_rc().
566 */
567 ipath_init_restart(qp, wqe);
568 len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
569 ohdr->u.rc.reth.vaddr =
570 cpu_to_be64(wqe->rdma_wr.remote_addr + len);
571 ohdr->u.rc.reth.rkey =
572 cpu_to_be32(wqe->rdma_wr.rkey);
573 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
574 qp->s_state = OP(RDMA_READ_REQUEST);
575 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
576 bth2 = qp->s_psn & IPATH_PSN_MASK;
577 qp->s_psn = wqe->lpsn + 1;
578 ss = NULL;
579 len = 0;
580 qp->s_cur++;
581 if (qp->s_cur == qp->s_size)
582 qp->s_cur = 0;
583 break;
584 }
585 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
586 bth2 |= 1 << 31; /* Request ACK. */
587 qp->s_len -= len;
588 qp->s_hdrwords = hwords;
589 qp->s_cur_sge = ss;
590 qp->s_cur_size = len;
591 ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
592done:
593 ret = 1;
594 goto unlock;
595
596bail:
597 qp->s_flags &= ~IPATH_S_BUSY;
598unlock:
599 spin_unlock_irqrestore(&qp->s_lock, flags);
600 return ret;
601}
602
603/**
604 * send_rc_ack - Construct an ACK packet and send it
605 * @qp: a pointer to the QP
606 *
607 * This is called from ipath_rc_rcv() and only uses the receive
608 * side QP state.
609 * Note that RDMA reads and atomics are handled in the
610 * send side QP state and tasklet.
611 */
612static void send_rc_ack(struct ipath_qp *qp)
613{
614 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
615 struct ipath_devdata *dd;
616 u16 lrh0;
617 u32 bth0;
618 u32 hwords;
619 u32 __iomem *piobuf;
620 struct ipath_ib_header hdr;
621 struct ipath_other_headers *ohdr;
622 unsigned long flags;
623
624 spin_lock_irqsave(&qp->s_lock, flags);
625
626 /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
627 if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
628 (qp->s_flags & IPATH_S_ACK_PENDING) ||
629 qp->s_ack_state != OP(ACKNOWLEDGE))
630 goto queue_ack;
631
632 spin_unlock_irqrestore(&qp->s_lock, flags);
633
634 /* Don't try to send ACKs if the link isn't ACTIVE */
635 dd = dev->dd;
636 if (!(dd->ipath_flags & IPATH_LINKACTIVE))
637 goto done;
638
639 piobuf = ipath_getpiobuf(dd, 0, NULL);
640 if (!piobuf) {
641 /*
642 * We are out of PIO buffers at the moment.
643 * Pass responsibility for sending the ACK to the
644 * send tasklet so that when a PIO buffer becomes
645 * available, the ACK is sent ahead of other outgoing
646 * packets.
647 */
648 spin_lock_irqsave(&qp->s_lock, flags);
649 goto queue_ack;
650 }
651
652 /* Construct the header. */
653 ohdr = &hdr.u.oth;
654 lrh0 = IPATH_LRH_BTH;
655 /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
656 hwords = 6;
657 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
658 hwords += ipath_make_grh(dev, &hdr.u.l.grh,
659 &qp->remote_ah_attr.grh,
660 hwords, 0);
661 ohdr = &hdr.u.l.oth;
662 lrh0 = IPATH_LRH_GRH;
663 }
664 /* read pkey_index w/o lock (its atomic) */
665 bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
666 (OP(ACKNOWLEDGE) << 24) | (1 << 22);
667 if (qp->r_nak_state)
668 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
669 (qp->r_nak_state <<
670 IPATH_AETH_CREDIT_SHIFT));
671 else
672 ohdr->u.aeth = ipath_compute_aeth(qp);
673 lrh0 |= qp->remote_ah_attr.sl << 4;
674 hdr.lrh[0] = cpu_to_be16(lrh0);
675 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
676 hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
677 hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
678 qp->remote_ah_attr.src_path_bits);
679 ohdr->bth[0] = cpu_to_be32(bth0);
680 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
681 ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
682
683 writeq(hwords + 1, piobuf);
684
685 if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
686 u32 *hdrp = (u32 *) &hdr;
687
688 ipath_flush_wc();
689 __iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
690 ipath_flush_wc();
691 __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
692 } else
693 __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
694
695 ipath_flush_wc();
696
697 dev->n_unicast_xmit++;
698 goto done;
699
700queue_ack:
701 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
702 dev->n_rc_qacks++;
703 qp->s_flags |= IPATH_S_ACK_PENDING;
704 qp->s_nak_state = qp->r_nak_state;
705 qp->s_ack_psn = qp->r_ack_psn;
706
707 /* Schedule the send tasklet. */
708 ipath_schedule_send(qp);
709 }
710 spin_unlock_irqrestore(&qp->s_lock, flags);
711done:
712 return;
713}
714
715/**
716 * reset_psn - reset the QP state to send starting from PSN
717 * @qp: the QP
718 * @psn: the packet sequence number to restart at
719 *
720 * This is called from ipath_rc_rcv() to process an incoming RC ACK
721 * for the given QP.
722 * Called at interrupt level with the QP s_lock held.
723 */
724static void reset_psn(struct ipath_qp *qp, u32 psn)
725{
726 u32 n = qp->s_last;
727 struct ipath_swqe *wqe = get_swqe_ptr(qp, n);
728 u32 opcode;
729
730 qp->s_cur = n;
731
732 /*
733 * If we are starting the request from the beginning,
734 * let the normal send code handle initialization.
735 */
736 if (ipath_cmp24(psn, wqe->psn) <= 0) {
737 qp->s_state = OP(SEND_LAST);
738 goto done;
739 }
740
741 /* Find the work request opcode corresponding to the given PSN. */
742 opcode = wqe->wr.opcode;
743 for (;;) {
744 int diff;
745
746 if (++n == qp->s_size)
747 n = 0;
748 if (n == qp->s_tail)
749 break;
750 wqe = get_swqe_ptr(qp, n);
751 diff = ipath_cmp24(psn, wqe->psn);
752 if (diff < 0)
753 break;
754 qp->s_cur = n;
755 /*
756 * If we are starting the request from the beginning,
757 * let the normal send code handle initialization.
758 */
759 if (diff == 0) {
760 qp->s_state = OP(SEND_LAST);
761 goto done;
762 }
763 opcode = wqe->wr.opcode;
764 }
765
766 /*
767 * Set the state to restart in the middle of a request.
768 * Don't change the s_sge, s_cur_sge, or s_cur_size.
769 * See ipath_make_rc_req().
770 */
771 switch (opcode) {
772 case IB_WR_SEND:
773 case IB_WR_SEND_WITH_IMM:
774 qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
775 break;
776
777 case IB_WR_RDMA_WRITE:
778 case IB_WR_RDMA_WRITE_WITH_IMM:
779 qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
780 break;
781
782 case IB_WR_RDMA_READ:
783 qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
784 break;
785
786 default:
787 /*
788 * This case shouldn't happen since its only
789 * one PSN per req.
790 */
791 qp->s_state = OP(SEND_LAST);
792 }
793done:
794 qp->s_psn = psn;
795}
796
797/**
798 * ipath_restart_rc - back up requester to resend the last un-ACKed request
799 * @qp: the QP to restart
800 * @psn: packet sequence number for the request
801 * @wc: the work completion request
802 *
803 * The QP s_lock should be held and interrupts disabled.
804 */
805void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
806{
807 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
808 struct ipath_ibdev *dev;
809
810 if (qp->s_retry == 0) {
811 ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
812 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
813 goto bail;
814 }
815 qp->s_retry--;
816
817 /*
818 * Remove the QP from the timeout queue.
819 * Note: it may already have been removed by ipath_ib_timer().
820 */
821 dev = to_idev(qp->ibqp.device);
822 spin_lock(&dev->pending_lock);
823 if (!list_empty(&qp->timerwait))
824 list_del_init(&qp->timerwait);
825 if (!list_empty(&qp->piowait))
826 list_del_init(&qp->piowait);
827 spin_unlock(&dev->pending_lock);
828
829 if (wqe->wr.opcode == IB_WR_RDMA_READ)
830 dev->n_rc_resends++;
831 else
832 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
833
834 reset_psn(qp, psn);
835 ipath_schedule_send(qp);
836
837bail:
838 return;
839}
840
841static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
842{
843 qp->s_last_psn = psn;
844}
845
846/**
847 * do_rc_ack - process an incoming RC ACK
848 * @qp: the QP the ACK came in on
849 * @psn: the packet sequence number of the ACK
850 * @opcode: the opcode of the request that resulted in the ACK
851 *
852 * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
853 * for the given QP.
854 * Called at interrupt level with the QP s_lock held and interrupts disabled.
855 * Returns 1 if OK, 0 if current operation should be aborted (NAK).
856 */
857static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
858 u64 val)
859{
860 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
861 struct ib_wc wc;
862 enum ib_wc_status status;
863 struct ipath_swqe *wqe;
864 int ret = 0;
865 u32 ack_psn;
866 int diff;
867
868 /*
869 * Remove the QP from the timeout queue (or RNR timeout queue).
870 * If ipath_ib_timer() has already removed it,
871 * it's OK since we hold the QP s_lock and ipath_restart_rc()
872 * just won't find anything to restart if we ACK everything.
873 */
874 spin_lock(&dev->pending_lock);
875 if (!list_empty(&qp->timerwait))
876 list_del_init(&qp->timerwait);
877 spin_unlock(&dev->pending_lock);
878
879 /*
880 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
881 * requests and implicitly NAK RDMA read and atomic requests issued
882 * before the NAK'ed request. The MSN won't include the NAK'ed
883 * request but will include an ACK'ed request(s).
884 */
885 ack_psn = psn;
886 if (aeth >> 29)
887 ack_psn--;
888 wqe = get_swqe_ptr(qp, qp->s_last);
889
890 /*
891 * The MSN might be for a later WQE than the PSN indicates so
892 * only complete WQEs that the PSN finishes.
893 */
894 while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
895 /*
896 * RDMA_READ_RESPONSE_ONLY is a special case since
897 * we want to generate completion events for everything
898 * before the RDMA read, copy the data, then generate
899 * the completion for the read.
900 */
901 if (wqe->wr.opcode == IB_WR_RDMA_READ &&
902 opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
903 diff == 0) {
904 ret = 1;
905 goto bail;
906 }
907 /*
908 * If this request is a RDMA read or atomic, and the ACK is
909 * for a later operation, this ACK NAKs the RDMA read or
910 * atomic. In other words, only a RDMA_READ_LAST or ONLY
911 * can ACK a RDMA read and likewise for atomic ops. Note
912 * that the NAK case can only happen if relaxed ordering is
913 * used and requests are sent after an RDMA read or atomic
914 * is sent but before the response is received.
915 */
916 if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
917 (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
918 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
919 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
920 (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
921 /*
922 * The last valid PSN seen is the previous
923 * request's.
924 */
925 update_last_psn(qp, wqe->psn - 1);
926 /* Retry this request. */
927 ipath_restart_rc(qp, wqe->psn);
928 /*
929 * No need to process the ACK/NAK since we are
930 * restarting an earlier request.
931 */
932 goto bail;
933 }
934 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
935 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
936 *(u64 *) wqe->sg_list[0].vaddr = val;
937 if (qp->s_num_rd_atomic &&
938 (wqe->wr.opcode == IB_WR_RDMA_READ ||
939 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
940 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
941 qp->s_num_rd_atomic--;
942 /* Restart sending task if fence is complete */
943 if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
944 !qp->s_num_rd_atomic) ||
945 qp->s_flags & IPATH_S_RDMAR_PENDING)
946 ipath_schedule_send(qp);
947 }
948 /* Post a send completion queue entry if requested. */
949 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
950 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
951 memset(&wc, 0, sizeof wc);
952 wc.wr_id = wqe->wr.wr_id;
953 wc.status = IB_WC_SUCCESS;
954 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
955 wc.byte_len = wqe->length;
956 wc.qp = &qp->ibqp;
957 wc.src_qp = qp->remote_qpn;
958 wc.slid = qp->remote_ah_attr.dlid;
959 wc.sl = qp->remote_ah_attr.sl;
960 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
961 }
962 qp->s_retry = qp->s_retry_cnt;
963 /*
964 * If we are completing a request which is in the process of
965 * being resent, we can stop resending it since we know the
966 * responder has already seen it.
967 */
968 if (qp->s_last == qp->s_cur) {
969 if (++qp->s_cur >= qp->s_size)
970 qp->s_cur = 0;
971 qp->s_last = qp->s_cur;
972 if (qp->s_last == qp->s_tail)
973 break;
974 wqe = get_swqe_ptr(qp, qp->s_cur);
975 qp->s_state = OP(SEND_LAST);
976 qp->s_psn = wqe->psn;
977 } else {
978 if (++qp->s_last >= qp->s_size)
979 qp->s_last = 0;
980 if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
981 qp->s_draining = 0;
982 if (qp->s_last == qp->s_tail)
983 break;
984 wqe = get_swqe_ptr(qp, qp->s_last);
985 }
986 }
987
988 switch (aeth >> 29) {
989 case 0: /* ACK */
990 dev->n_rc_acks++;
991 /* If this is a partial ACK, reset the retransmit timer. */
992 if (qp->s_last != qp->s_tail) {
993 spin_lock(&dev->pending_lock);
994 if (list_empty(&qp->timerwait))
995 list_add_tail(&qp->timerwait,
996 &dev->pending[dev->pending_index]);
997 spin_unlock(&dev->pending_lock);
998 /*
999 * If we get a partial ACK for a resent operation,
1000 * we can stop resending the earlier packets and
1001 * continue with the next packet the receiver wants.
1002 */
1003 if (ipath_cmp24(qp->s_psn, psn) <= 0) {
1004 reset_psn(qp, psn + 1);
1005 ipath_schedule_send(qp);
1006 }
1007 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
1008 qp->s_state = OP(SEND_LAST);
1009 qp->s_psn = psn + 1;
1010 }
1011 ipath_get_credit(qp, aeth);
1012 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1013 qp->s_retry = qp->s_retry_cnt;
1014 update_last_psn(qp, psn);
1015 ret = 1;
1016 goto bail;
1017
1018 case 1: /* RNR NAK */
1019 dev->n_rnr_naks++;
1020 if (qp->s_last == qp->s_tail)
1021 goto bail;
1022 if (qp->s_rnr_retry == 0) {
1023 status = IB_WC_RNR_RETRY_EXC_ERR;
1024 goto class_b;
1025 }
1026 if (qp->s_rnr_retry_cnt < 7)
1027 qp->s_rnr_retry--;
1028
1029 /* The last valid PSN is the previous PSN. */
1030 update_last_psn(qp, psn - 1);
1031
1032 if (wqe->wr.opcode == IB_WR_RDMA_READ)
1033 dev->n_rc_resends++;
1034 else
1035 dev->n_rc_resends +=
1036 (qp->s_psn - psn) & IPATH_PSN_MASK;
1037
1038 reset_psn(qp, psn);
1039
1040 qp->s_rnr_timeout =
1041 ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
1042 IPATH_AETH_CREDIT_MASK];
1043 ipath_insert_rnr_queue(qp);
1044 ipath_schedule_send(qp);
1045 goto bail;
1046
1047 case 3: /* NAK */
1048 if (qp->s_last == qp->s_tail)
1049 goto bail;
1050 /* The last valid PSN is the previous PSN. */
1051 update_last_psn(qp, psn - 1);
1052 switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
1053 IPATH_AETH_CREDIT_MASK) {
1054 case 0: /* PSN sequence error */
1055 dev->n_seq_naks++;
1056 /*
1057 * Back up to the responder's expected PSN.
1058 * Note that we might get a NAK in the middle of an
1059 * RDMA READ response which terminates the RDMA
1060 * READ.
1061 */
1062 ipath_restart_rc(qp, psn);
1063 break;
1064
1065 case 1: /* Invalid Request */
1066 status = IB_WC_REM_INV_REQ_ERR;
1067 dev->n_other_naks++;
1068 goto class_b;
1069
1070 case 2: /* Remote Access Error */
1071 status = IB_WC_REM_ACCESS_ERR;
1072 dev->n_other_naks++;
1073 goto class_b;
1074
1075 case 3: /* Remote Operation Error */
1076 status = IB_WC_REM_OP_ERR;
1077 dev->n_other_naks++;
1078 class_b:
1079 ipath_send_complete(qp, wqe, status);
1080 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1081 break;
1082
1083 default:
1084 /* Ignore other reserved NAK error codes */
1085 goto reserved;
1086 }
1087 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1088 goto bail;
1089
1090 default: /* 2: reserved */
1091 reserved:
1092 /* Ignore reserved NAK codes. */
1093 goto bail;
1094 }
1095
1096bail:
1097 return ret;
1098}
1099
1100/**
1101 * ipath_rc_rcv_resp - process an incoming RC response packet
1102 * @dev: the device this packet came in on
1103 * @ohdr: the other headers for this packet
1104 * @data: the packet data
1105 * @tlen: the packet length
1106 * @qp: the QP for this packet
1107 * @opcode: the opcode for this packet
1108 * @psn: the packet sequence number for this packet
1109 * @hdrsize: the header length
1110 * @pmtu: the path MTU
1111 * @header_in_data: true if part of the header data is in the data buffer
1112 *
1113 * This is called from ipath_rc_rcv() to process an incoming RC response
1114 * packet for the given QP.
1115 * Called at interrupt level.
1116 */
1117static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1118 struct ipath_other_headers *ohdr,
1119 void *data, u32 tlen,
1120 struct ipath_qp *qp,
1121 u32 opcode,
1122 u32 psn, u32 hdrsize, u32 pmtu,
1123 int header_in_data)
1124{
1125 struct ipath_swqe *wqe;
1126 enum ib_wc_status status;
1127 unsigned long flags;
1128 int diff;
1129 u32 pad;
1130 u32 aeth;
1131 u64 val;
1132
1133 spin_lock_irqsave(&qp->s_lock, flags);
1134
1135 /* Double check we can process this now that we hold the s_lock. */
1136 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1137 goto ack_done;
1138
1139 /* Ignore invalid responses. */
1140 if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
1141 goto ack_done;
1142
1143 /* Ignore duplicate responses. */
1144 diff = ipath_cmp24(psn, qp->s_last_psn);
1145 if (unlikely(diff <= 0)) {
1146 /* Update credits for "ghost" ACKs */
1147 if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
1148 if (!header_in_data)
1149 aeth = be32_to_cpu(ohdr->u.aeth);
1150 else {
1151 aeth = be32_to_cpu(((__be32 *) data)[0]);
1152 data += sizeof(__be32);
1153 }
1154 if ((aeth >> 29) == 0)
1155 ipath_get_credit(qp, aeth);
1156 }
1157 goto ack_done;
1158 }
1159
1160 if (unlikely(qp->s_last == qp->s_tail))
1161 goto ack_done;
1162 wqe = get_swqe_ptr(qp, qp->s_last);
1163 status = IB_WC_SUCCESS;
1164
1165 switch (opcode) {
1166 case OP(ACKNOWLEDGE):
1167 case OP(ATOMIC_ACKNOWLEDGE):
1168 case OP(RDMA_READ_RESPONSE_FIRST):
1169 if (!header_in_data)
1170 aeth = be32_to_cpu(ohdr->u.aeth);
1171 else {
1172 aeth = be32_to_cpu(((__be32 *) data)[0]);
1173 data += sizeof(__be32);
1174 }
1175 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
1176 if (!header_in_data) {
1177 __be32 *p = ohdr->u.at.atomic_ack_eth;
1178
1179 val = ((u64) be32_to_cpu(p[0]) << 32) |
1180 be32_to_cpu(p[1]);
1181 } else
1182 val = be64_to_cpu(((__be64 *) data)[0]);
1183 } else
1184 val = 0;
1185 if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
1186 opcode != OP(RDMA_READ_RESPONSE_FIRST))
1187 goto ack_done;
1188 hdrsize += 4;
1189 wqe = get_swqe_ptr(qp, qp->s_last);
1190 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1191 goto ack_op_err;
1192 qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
1193 /*
1194 * If this is a response to a resent RDMA read, we
1195 * have to be careful to copy the data to the right
1196 * location.
1197 */
1198 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1199 wqe, psn, pmtu);
1200 goto read_middle;
1201
1202 case OP(RDMA_READ_RESPONSE_MIDDLE):
1203 /* no AETH, no ACK */
1204 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1205 dev->n_rdma_seq++;
1206 if (qp->r_flags & IPATH_R_RDMAR_SEQ)
1207 goto ack_done;
1208 qp->r_flags |= IPATH_R_RDMAR_SEQ;
1209 ipath_restart_rc(qp, qp->s_last_psn + 1);
1210 goto ack_done;
1211 }
1212 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1213 goto ack_op_err;
1214 read_middle:
1215 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1216 goto ack_len_err;
1217 if (unlikely(pmtu >= qp->s_rdma_read_len))
1218 goto ack_len_err;
1219
1220 /* We got a response so update the timeout. */
1221 spin_lock(&dev->pending_lock);
1222 if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
1223 list_move_tail(&qp->timerwait,
1224 &dev->pending[dev->pending_index]);
1225 spin_unlock(&dev->pending_lock);
1226
1227 if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
1228 qp->s_retry = qp->s_retry_cnt;
1229
1230 /*
1231 * Update the RDMA receive state but do the copy w/o
1232 * holding the locks and blocking interrupts.
1233 */
1234 qp->s_rdma_read_len -= pmtu;
1235 update_last_psn(qp, psn);
1236 spin_unlock_irqrestore(&qp->s_lock, flags);
1237 ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
1238 goto bail;
1239
1240 case OP(RDMA_READ_RESPONSE_ONLY):
1241 if (!header_in_data)
1242 aeth = be32_to_cpu(ohdr->u.aeth);
1243 else
1244 aeth = be32_to_cpu(((__be32 *) data)[0]);
1245 if (!do_rc_ack(qp, aeth, psn, opcode, 0))
1246 goto ack_done;
1247 /* Get the number of bytes the message was padded by. */
1248 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1249 /*
1250 * Check that the data size is >= 0 && <= pmtu.
1251 * Remember to account for the AETH header (4) and
1252 * ICRC (4).
1253 */
1254 if (unlikely(tlen < (hdrsize + pad + 8)))
1255 goto ack_len_err;
1256 /*
1257 * If this is a response to a resent RDMA read, we
1258 * have to be careful to copy the data to the right
1259 * location.
1260 */
1261 wqe = get_swqe_ptr(qp, qp->s_last);
1262 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1263 wqe, psn, pmtu);
1264 goto read_last;
1265
1266 case OP(RDMA_READ_RESPONSE_LAST):
1267 /* ACKs READ req. */
1268 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1269 dev->n_rdma_seq++;
1270 if (qp->r_flags & IPATH_R_RDMAR_SEQ)
1271 goto ack_done;
1272 qp->r_flags |= IPATH_R_RDMAR_SEQ;
1273 ipath_restart_rc(qp, qp->s_last_psn + 1);
1274 goto ack_done;
1275 }
1276 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1277 goto ack_op_err;
1278 /* Get the number of bytes the message was padded by. */
1279 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1280 /*
1281 * Check that the data size is >= 1 && <= pmtu.
1282 * Remember to account for the AETH header (4) and
1283 * ICRC (4).
1284 */
1285 if (unlikely(tlen <= (hdrsize + pad + 8)))
1286 goto ack_len_err;
1287 read_last:
1288 tlen -= hdrsize + pad + 8;
1289 if (unlikely(tlen != qp->s_rdma_read_len))
1290 goto ack_len_err;
1291 if (!header_in_data)
1292 aeth = be32_to_cpu(ohdr->u.aeth);
1293 else {
1294 aeth = be32_to_cpu(((__be32 *) data)[0]);
1295 data += sizeof(__be32);
1296 }
1297 ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
1298 (void) do_rc_ack(qp, aeth, psn,
1299 OP(RDMA_READ_RESPONSE_LAST), 0);
1300 goto ack_done;
1301 }
1302
1303ack_op_err:
1304 status = IB_WC_LOC_QP_OP_ERR;
1305 goto ack_err;
1306
1307ack_len_err:
1308 status = IB_WC_LOC_LEN_ERR;
1309ack_err:
1310 ipath_send_complete(qp, wqe, status);
1311 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1312ack_done:
1313 spin_unlock_irqrestore(&qp->s_lock, flags);
1314bail:
1315 return;
1316}
1317
1318/**
1319 * ipath_rc_rcv_error - process an incoming duplicate or error RC packet
1320 * @dev: the device this packet came in on
1321 * @ohdr: the other headers for this packet
1322 * @data: the packet data
1323 * @qp: the QP for this packet
1324 * @opcode: the opcode for this packet
1325 * @psn: the packet sequence number for this packet
1326 * @diff: the difference between the PSN and the expected PSN
1327 * @header_in_data: true if part of the header data is in the data buffer
1328 *
1329 * This is called from ipath_rc_rcv() to process an unexpected
1330 * incoming RC packet for the given QP.
1331 * Called at interrupt level.
1332 * Return 1 if no more processing is needed; otherwise return 0 to
1333 * schedule a response to be sent.
1334 */
1335static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1336 struct ipath_other_headers *ohdr,
1337 void *data,
1338 struct ipath_qp *qp,
1339 u32 opcode,
1340 u32 psn,
1341 int diff,
1342 int header_in_data)
1343{
1344 struct ipath_ack_entry *e;
1345 u8 i, prev;
1346 int old_req;
1347 unsigned long flags;
1348
1349 if (diff > 0) {
1350 /*
1351 * Packet sequence error.
1352 * A NAK will ACK earlier sends and RDMA writes.
1353 * Don't queue the NAK if we already sent one.
1354 */
1355 if (!qp->r_nak_state) {
1356 qp->r_nak_state = IB_NAK_PSN_ERROR;
1357 /* Use the expected PSN. */
1358 qp->r_ack_psn = qp->r_psn;
1359 goto send_ack;
1360 }
1361 goto done;
1362 }
1363
1364 /*
1365 * Handle a duplicate request. Don't re-execute SEND, RDMA
1366 * write or atomic op. Don't NAK errors, just silently drop
1367 * the duplicate request. Note that r_sge, r_len, and
1368 * r_rcv_len may be in use so don't modify them.
1369 *
1370 * We are supposed to ACK the earliest duplicate PSN but we
1371 * can coalesce an outstanding duplicate ACK. We have to
1372 * send the earliest so that RDMA reads can be restarted at
1373 * the requester's expected PSN.
1374 *
1375 * First, find where this duplicate PSN falls within the
1376 * ACKs previously sent.
1377 */
1378 psn &= IPATH_PSN_MASK;
1379 e = NULL;
1380 old_req = 1;
1381
1382 spin_lock_irqsave(&qp->s_lock, flags);
1383 /* Double check we can process this now that we hold the s_lock. */
1384 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1385 goto unlock_done;
1386
1387 for (i = qp->r_head_ack_queue; ; i = prev) {
1388 if (i == qp->s_tail_ack_queue)
1389 old_req = 0;
1390 if (i)
1391 prev = i - 1;
1392 else
1393 prev = IPATH_MAX_RDMA_ATOMIC;
1394 if (prev == qp->r_head_ack_queue) {
1395 e = NULL;
1396 break;
1397 }
1398 e = &qp->s_ack_queue[prev];
1399 if (!e->opcode) {
1400 e = NULL;
1401 break;
1402 }
1403 if (ipath_cmp24(psn, e->psn) >= 0) {
1404 if (prev == qp->s_tail_ack_queue)
1405 old_req = 0;
1406 break;
1407 }
1408 }
1409 switch (opcode) {
1410 case OP(RDMA_READ_REQUEST): {
1411 struct ib_reth *reth;
1412 u32 offset;
1413 u32 len;
1414
1415 /*
1416 * If we didn't find the RDMA read request in the ack queue,
1417 * or the send tasklet is already backed up to send an
1418 * earlier entry, we can ignore this request.
1419 */
1420 if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
1421 goto unlock_done;
1422 /* RETH comes after BTH */
1423 if (!header_in_data)
1424 reth = &ohdr->u.rc.reth;
1425 else {
1426 reth = (struct ib_reth *)data;
1427 data += sizeof(*reth);
1428 }
1429 /*
1430 * Address range must be a subset of the original
1431 * request and start on pmtu boundaries.
1432 * We reuse the old ack_queue slot since the requester
1433 * should not back up and request an earlier PSN for the
1434 * same request.
1435 */
1436 offset = ((psn - e->psn) & IPATH_PSN_MASK) *
1437 ib_mtu_enum_to_int(qp->path_mtu);
1438 len = be32_to_cpu(reth->length);
1439 if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
1440 goto unlock_done;
1441 if (len != 0) {
1442 u32 rkey = be32_to_cpu(reth->rkey);
1443 u64 vaddr = be64_to_cpu(reth->vaddr);
1444 int ok;
1445
1446 ok = ipath_rkey_ok(qp, &e->rdma_sge,
1447 len, vaddr, rkey,
1448 IB_ACCESS_REMOTE_READ);
1449 if (unlikely(!ok))
1450 goto unlock_done;
1451 } else {
1452 e->rdma_sge.sg_list = NULL;
1453 e->rdma_sge.num_sge = 0;
1454 e->rdma_sge.sge.mr = NULL;
1455 e->rdma_sge.sge.vaddr = NULL;
1456 e->rdma_sge.sge.length = 0;
1457 e->rdma_sge.sge.sge_length = 0;
1458 }
1459 e->psn = psn;
1460 qp->s_ack_state = OP(ACKNOWLEDGE);
1461 qp->s_tail_ack_queue = prev;
1462 break;
1463 }
1464
1465 case OP(COMPARE_SWAP):
1466 case OP(FETCH_ADD): {
1467 /*
1468 * If we didn't find the atomic request in the ack queue
1469 * or the send tasklet is already backed up to send an
1470 * earlier entry, we can ignore this request.
1471 */
1472 if (!e || e->opcode != (u8) opcode || old_req)
1473 goto unlock_done;
1474 qp->s_ack_state = OP(ACKNOWLEDGE);
1475 qp->s_tail_ack_queue = prev;
1476 break;
1477 }
1478
1479 default:
1480 if (old_req)
1481 goto unlock_done;
1482 /*
1483 * Resend the most recent ACK if this request is
1484 * after all the previous RDMA reads and atomics.
1485 */
1486 if (i == qp->r_head_ack_queue) {
1487 spin_unlock_irqrestore(&qp->s_lock, flags);
1488 qp->r_nak_state = 0;
1489 qp->r_ack_psn = qp->r_psn - 1;
1490 goto send_ack;
1491 }
1492 /*
1493 * Try to send a simple ACK to work around a Mellanox bug
1494 * which doesn't accept a RDMA read response or atomic
1495 * response as an ACK for earlier SENDs or RDMA writes.
1496 */
1497 if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
1498 !(qp->s_flags & IPATH_S_ACK_PENDING) &&
1499 qp->s_ack_state == OP(ACKNOWLEDGE)) {
1500 spin_unlock_irqrestore(&qp->s_lock, flags);
1501 qp->r_nak_state = 0;
1502 qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
1503 goto send_ack;
1504 }
1505 /*
1506 * Resend the RDMA read or atomic op which
1507 * ACKs this duplicate request.
1508 */
1509 qp->s_ack_state = OP(ACKNOWLEDGE);
1510 qp->s_tail_ack_queue = i;
1511 break;
1512 }
1513 qp->r_nak_state = 0;
1514 ipath_schedule_send(qp);
1515
1516unlock_done:
1517 spin_unlock_irqrestore(&qp->s_lock, flags);
1518done:
1519 return 1;
1520
1521send_ack:
1522 return 0;
1523}
1524
1525void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
1526{
1527 unsigned long flags;
1528 int lastwqe;
1529
1530 spin_lock_irqsave(&qp->s_lock, flags);
1531 lastwqe = ipath_error_qp(qp, err);
1532 spin_unlock_irqrestore(&qp->s_lock, flags);
1533
1534 if (lastwqe) {
1535 struct ib_event ev;
1536
1537 ev.device = qp->ibqp.device;
1538 ev.element.qp = &qp->ibqp;
1539 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1540 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1541 }
1542}
1543
1544static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
1545{
1546 unsigned next;
1547
1548 next = n + 1;
1549 if (next > IPATH_MAX_RDMA_ATOMIC)
1550 next = 0;
1551 if (n == qp->s_tail_ack_queue) {
1552 qp->s_tail_ack_queue = next;
1553 qp->s_ack_state = OP(ACKNOWLEDGE);
1554 }
1555}
1556
1557/**
1558 * ipath_rc_rcv - process an incoming RC packet
1559 * @dev: the device this packet came in on
1560 * @hdr: the header of this packet
1561 * @has_grh: true if the header has a GRH
1562 * @data: the packet data
1563 * @tlen: the packet length
1564 * @qp: the QP for this packet
1565 *
1566 * This is called from ipath_qp_rcv() to process an incoming RC packet
1567 * for the given QP.
1568 * Called at interrupt level.
1569 */
1570void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1571 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
1572{
1573 struct ipath_other_headers *ohdr;
1574 u32 opcode;
1575 u32 hdrsize;
1576 u32 psn;
1577 u32 pad;
1578 struct ib_wc wc;
1579 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
1580 int diff;
1581 struct ib_reth *reth;
1582 int header_in_data;
1583 unsigned long flags;
1584
1585 /* Validate the SLID. See Ch. 9.6.1.5 */
1586 if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
1587 goto done;
1588
1589 /* Check for GRH */
1590 if (!has_grh) {
1591 ohdr = &hdr->u.oth;
1592 hdrsize = 8 + 12; /* LRH + BTH */
1593 psn = be32_to_cpu(ohdr->bth[2]);
1594 header_in_data = 0;
1595 } else {
1596 ohdr = &hdr->u.l.oth;
1597 hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
1598 /*
1599 * The header with GRH is 60 bytes and the core driver sets
1600 * the eager header buffer size to 56 bytes so the last 4
1601 * bytes of the BTH header (PSN) is in the data buffer.
1602 */
1603 header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
1604 if (header_in_data) {
1605 psn = be32_to_cpu(((__be32 *) data)[0]);
1606 data += sizeof(__be32);
1607 } else
1608 psn = be32_to_cpu(ohdr->bth[2]);
1609 }
1610
1611 /*
1612 * Process responses (ACKs) before anything else. Note that the
1613 * packet sequence number will be for something in the send work
1614 * queue rather than the expected receive packet sequence number.
1615 * In other words, this QP is the requester.
1616 */
1617 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
1618 if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
1619 opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1620 ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
1621 hdrsize, pmtu, header_in_data);
1622 goto done;
1623 }
1624
1625 /* Compute 24 bits worth of difference. */
1626 diff = ipath_cmp24(psn, qp->r_psn);
1627 if (unlikely(diff)) {
1628 if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
1629 psn, diff, header_in_data))
1630 goto done;
1631 goto send_ack;
1632 }
1633
1634 /* Check for opcode sequence errors. */
1635 switch (qp->r_state) {
1636 case OP(SEND_FIRST):
1637 case OP(SEND_MIDDLE):
1638 if (opcode == OP(SEND_MIDDLE) ||
1639 opcode == OP(SEND_LAST) ||
1640 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1641 break;
1642 goto nack_inv;
1643
1644 case OP(RDMA_WRITE_FIRST):
1645 case OP(RDMA_WRITE_MIDDLE):
1646 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
1647 opcode == OP(RDMA_WRITE_LAST) ||
1648 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1649 break;
1650 goto nack_inv;
1651
1652 default:
1653 if (opcode == OP(SEND_MIDDLE) ||
1654 opcode == OP(SEND_LAST) ||
1655 opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
1656 opcode == OP(RDMA_WRITE_MIDDLE) ||
1657 opcode == OP(RDMA_WRITE_LAST) ||
1658 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1659 goto nack_inv;
1660 /*
1661 * Note that it is up to the requester to not send a new
1662 * RDMA read or atomic operation before receiving an ACK
1663 * for the previous operation.
1664 */
1665 break;
1666 }
1667
1668 memset(&wc, 0, sizeof wc);
1669
1670 /* OK, process the packet. */
1671 switch (opcode) {
1672 case OP(SEND_FIRST):
1673 if (!ipath_get_rwqe(qp, 0))
1674 goto rnr_nak;
1675 qp->r_rcv_len = 0;
1676 /* FALLTHROUGH */
1677 case OP(SEND_MIDDLE):
1678 case OP(RDMA_WRITE_MIDDLE):
1679 send_middle:
1680 /* Check for invalid length PMTU or posted rwqe len. */
1681 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1682 goto nack_inv;
1683 qp->r_rcv_len += pmtu;
1684 if (unlikely(qp->r_rcv_len > qp->r_len))
1685 goto nack_inv;
1686 ipath_copy_sge(&qp->r_sge, data, pmtu);
1687 break;
1688
1689 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
1690 /* consume RWQE */
1691 if (!ipath_get_rwqe(qp, 1))
1692 goto rnr_nak;
1693 goto send_last_imm;
1694
1695 case OP(SEND_ONLY):
1696 case OP(SEND_ONLY_WITH_IMMEDIATE):
1697 if (!ipath_get_rwqe(qp, 0))
1698 goto rnr_nak;
1699 qp->r_rcv_len = 0;
1700 if (opcode == OP(SEND_ONLY))
1701 goto send_last;
1702 /* FALLTHROUGH */
1703 case OP(SEND_LAST_WITH_IMMEDIATE):
1704 send_last_imm:
1705 if (header_in_data) {
1706 wc.ex.imm_data = *(__be32 *) data;
1707 data += sizeof(__be32);
1708 } else {
1709 /* Immediate data comes after BTH */
1710 wc.ex.imm_data = ohdr->u.imm_data;
1711 }
1712 hdrsize += 4;
1713 wc.wc_flags = IB_WC_WITH_IMM;
1714 /* FALLTHROUGH */
1715 case OP(SEND_LAST):
1716 case OP(RDMA_WRITE_LAST):
1717 send_last:
1718 /* Get the number of bytes the message was padded by. */
1719 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1720 /* Check for invalid length. */
1721 /* XXX LAST len should be >= 1 */
1722 if (unlikely(tlen < (hdrsize + pad + 4)))
1723 goto nack_inv;
1724 /* Don't count the CRC. */
1725 tlen -= (hdrsize + pad + 4);
1726 wc.byte_len = tlen + qp->r_rcv_len;
1727 if (unlikely(wc.byte_len > qp->r_len))
1728 goto nack_inv;
1729 ipath_copy_sge(&qp->r_sge, data, tlen);
1730 qp->r_msn++;
1731 if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
1732 break;
1733 wc.wr_id = qp->r_wr_id;
1734 wc.status = IB_WC_SUCCESS;
1735 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
1736 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
1737 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
1738 else
1739 wc.opcode = IB_WC_RECV;
1740 wc.qp = &qp->ibqp;
1741 wc.src_qp = qp->remote_qpn;
1742 wc.slid = qp->remote_ah_attr.dlid;
1743 wc.sl = qp->remote_ah_attr.sl;
1744 /* Signal completion event if the solicited bit is set. */
1745 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
1746 (ohdr->bth[0] &
1747 cpu_to_be32(1 << 23)) != 0);
1748 break;
1749
1750 case OP(RDMA_WRITE_FIRST):
1751 case OP(RDMA_WRITE_ONLY):
1752 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
1753 if (unlikely(!(qp->qp_access_flags &
1754 IB_ACCESS_REMOTE_WRITE)))
1755 goto nack_inv;
1756 /* consume RWQE */
1757 /* RETH comes after BTH */
1758 if (!header_in_data)
1759 reth = &ohdr->u.rc.reth;
1760 else {
1761 reth = (struct ib_reth *)data;
1762 data += sizeof(*reth);
1763 }
1764 hdrsize += sizeof(*reth);
1765 qp->r_len = be32_to_cpu(reth->length);
1766 qp->r_rcv_len = 0;
1767 if (qp->r_len != 0) {
1768 u32 rkey = be32_to_cpu(reth->rkey);
1769 u64 vaddr = be64_to_cpu(reth->vaddr);
1770 int ok;
1771
1772 /* Check rkey & NAK */
1773 ok = ipath_rkey_ok(qp, &qp->r_sge,
1774 qp->r_len, vaddr, rkey,
1775 IB_ACCESS_REMOTE_WRITE);
1776 if (unlikely(!ok))
1777 goto nack_acc;
1778 } else {
1779 qp->r_sge.sg_list = NULL;
1780 qp->r_sge.sge.mr = NULL;
1781 qp->r_sge.sge.vaddr = NULL;
1782 qp->r_sge.sge.length = 0;
1783 qp->r_sge.sge.sge_length = 0;
1784 }
1785 if (opcode == OP(RDMA_WRITE_FIRST))
1786 goto send_middle;
1787 else if (opcode == OP(RDMA_WRITE_ONLY))
1788 goto send_last;
1789 if (!ipath_get_rwqe(qp, 1))
1790 goto rnr_nak;
1791 goto send_last_imm;
1792
1793 case OP(RDMA_READ_REQUEST): {
1794 struct ipath_ack_entry *e;
1795 u32 len;
1796 u8 next;
1797
1798 if (unlikely(!(qp->qp_access_flags &
1799 IB_ACCESS_REMOTE_READ)))
1800 goto nack_inv;
1801 next = qp->r_head_ack_queue + 1;
1802 if (next > IPATH_MAX_RDMA_ATOMIC)
1803 next = 0;
1804 spin_lock_irqsave(&qp->s_lock, flags);
1805 /* Double check we can process this while holding the s_lock. */
1806 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1807 goto unlock;
1808 if (unlikely(next == qp->s_tail_ack_queue)) {
1809 if (!qp->s_ack_queue[next].sent)
1810 goto nack_inv_unlck;
1811 ipath_update_ack_queue(qp, next);
1812 }
1813 e = &qp->s_ack_queue[qp->r_head_ack_queue];
1814 /* RETH comes after BTH */
1815 if (!header_in_data)
1816 reth = &ohdr->u.rc.reth;
1817 else {
1818 reth = (struct ib_reth *)data;
1819 data += sizeof(*reth);
1820 }
1821 len = be32_to_cpu(reth->length);
1822 if (len) {
1823 u32 rkey = be32_to_cpu(reth->rkey);
1824 u64 vaddr = be64_to_cpu(reth->vaddr);
1825 int ok;
1826
1827 /* Check rkey & NAK */
1828 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
1829 rkey, IB_ACCESS_REMOTE_READ);
1830 if (unlikely(!ok))
1831 goto nack_acc_unlck;
1832 /*
1833 * Update the next expected PSN. We add 1 later
1834 * below, so only add the remainder here.
1835 */
1836 if (len > pmtu)
1837 qp->r_psn += (len - 1) / pmtu;
1838 } else {
1839 e->rdma_sge.sg_list = NULL;
1840 e->rdma_sge.num_sge = 0;
1841 e->rdma_sge.sge.mr = NULL;
1842 e->rdma_sge.sge.vaddr = NULL;
1843 e->rdma_sge.sge.length = 0;
1844 e->rdma_sge.sge.sge_length = 0;
1845 }
1846 e->opcode = opcode;
1847 e->sent = 0;
1848 e->psn = psn;
1849 /*
1850 * We need to increment the MSN here instead of when we
1851 * finish sending the result since a duplicate request would
1852 * increment it more than once.
1853 */
1854 qp->r_msn++;
1855 qp->r_psn++;
1856 qp->r_state = opcode;
1857 qp->r_nak_state = 0;
1858 qp->r_head_ack_queue = next;
1859
1860 /* Schedule the send tasklet. */
1861 ipath_schedule_send(qp);
1862
1863 goto unlock;
1864 }
1865
1866 case OP(COMPARE_SWAP):
1867 case OP(FETCH_ADD): {
1868 struct ib_atomic_eth *ateth;
1869 struct ipath_ack_entry *e;
1870 u64 vaddr;
1871 atomic64_t *maddr;
1872 u64 sdata;
1873 u32 rkey;
1874 u8 next;
1875
1876 if (unlikely(!(qp->qp_access_flags &
1877 IB_ACCESS_REMOTE_ATOMIC)))
1878 goto nack_inv;
1879 next = qp->r_head_ack_queue + 1;
1880 if (next > IPATH_MAX_RDMA_ATOMIC)
1881 next = 0;
1882 spin_lock_irqsave(&qp->s_lock, flags);
1883 /* Double check we can process this while holding the s_lock. */
1884 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1885 goto unlock;
1886 if (unlikely(next == qp->s_tail_ack_queue)) {
1887 if (!qp->s_ack_queue[next].sent)
1888 goto nack_inv_unlck;
1889 ipath_update_ack_queue(qp, next);
1890 }
1891 if (!header_in_data)
1892 ateth = &ohdr->u.atomic_eth;
1893 else
1894 ateth = (struct ib_atomic_eth *)data;
1895 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
1896 be32_to_cpu(ateth->vaddr[1]);
1897 if (unlikely(vaddr & (sizeof(u64) - 1)))
1898 goto nack_inv_unlck;
1899 rkey = be32_to_cpu(ateth->rkey);
1900 /* Check rkey & NAK */
1901 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
1902 sizeof(u64), vaddr, rkey,
1903 IB_ACCESS_REMOTE_ATOMIC)))
1904 goto nack_acc_unlck;
1905 /* Perform atomic OP and save result. */
1906 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
1907 sdata = be64_to_cpu(ateth->swap_data);
1908 e = &qp->s_ack_queue[qp->r_head_ack_queue];
1909 e->atomic_data = (opcode == OP(FETCH_ADD)) ?
1910 (u64) atomic64_add_return(sdata, maddr) - sdata :
1911 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
1912 be64_to_cpu(ateth->compare_data),
1913 sdata);
1914 e->opcode = opcode;
1915 e->sent = 0;
1916 e->psn = psn & IPATH_PSN_MASK;
1917 qp->r_msn++;
1918 qp->r_psn++;
1919 qp->r_state = opcode;
1920 qp->r_nak_state = 0;
1921 qp->r_head_ack_queue = next;
1922
1923 /* Schedule the send tasklet. */
1924 ipath_schedule_send(qp);
1925
1926 goto unlock;
1927 }
1928
1929 default:
1930 /* NAK unknown opcodes. */
1931 goto nack_inv;
1932 }
1933 qp->r_psn++;
1934 qp->r_state = opcode;
1935 qp->r_ack_psn = psn;
1936 qp->r_nak_state = 0;
1937 /* Send an ACK if requested or required. */
1938 if (psn & (1 << 31))
1939 goto send_ack;
1940 goto done;
1941
1942rnr_nak:
1943 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1944 qp->r_ack_psn = qp->r_psn;
1945 goto send_ack;
1946
1947nack_inv_unlck:
1948 spin_unlock_irqrestore(&qp->s_lock, flags);
1949nack_inv:
1950 ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
1951 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1952 qp->r_ack_psn = qp->r_psn;
1953 goto send_ack;
1954
1955nack_acc_unlck:
1956 spin_unlock_irqrestore(&qp->s_lock, flags);
1957nack_acc:
1958 ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
1959 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1960 qp->r_ack_psn = qp->r_psn;
1961send_ack:
1962 send_rc_ack(qp);
1963 goto done;
1964
1965unlock:
1966 spin_unlock_irqrestore(&qp->s_lock, flags);
1967done:
1968 return;
1969}
diff --git a/drivers/staging/rdma/ipath/ipath_registers.h b/drivers/staging/rdma/ipath/ipath_registers.h
deleted file mode 100644
index 8f44d0cf3833..000000000000
--- a/drivers/staging/rdma/ipath/ipath_registers.h
+++ /dev/null
@@ -1,512 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef _IPATH_REGISTERS_H
35#define _IPATH_REGISTERS_H
36
37/*
38 * This file should only be included by kernel source, and by the diags. It
39 * defines the registers, and their contents, for InfiniPath chips.
40 */
41
42/*
43 * These are the InfiniPath register and buffer bit definitions,
44 * that are visible to software, and needed only by the kernel
45 * and diag code. A few, that are visible to protocol and user
46 * code are in ipath_common.h. Some bits are specific
47 * to a given chip implementation, and have been moved to the
48 * chip-specific source file
49 */
50
51/* kr_revision bits */
52#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF
53#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0
54#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF
55#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8
56#define INFINIPATH_R_ARCH_MASK 0xFF
57#define INFINIPATH_R_ARCH_SHIFT 16
58#define INFINIPATH_R_SOFTWARE_MASK 0xFF
59#define INFINIPATH_R_SOFTWARE_SHIFT 24
60#define INFINIPATH_R_BOARDID_MASK 0xFF
61#define INFINIPATH_R_BOARDID_SHIFT 32
62
63/* kr_control bits */
64#define INFINIPATH_C_FREEZEMODE 0x00000002
65#define INFINIPATH_C_LINKENABLE 0x00000004
66
67/* kr_sendctrl bits */
68#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
69#define INFINIPATH_S_UPDTHRESH_SHIFT 24
70#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
71
72#define IPATH_S_ABORT 0
73#define IPATH_S_PIOINTBUFAVAIL 1
74#define IPATH_S_PIOBUFAVAILUPD 2
75#define IPATH_S_PIOENABLE 3
76#define IPATH_S_SDMAINTENABLE 9
77#define IPATH_S_SDMASINGLEDESCRIPTOR 10
78#define IPATH_S_SDMAENABLE 11
79#define IPATH_S_SDMAHALT 12
80#define IPATH_S_DISARM 31
81
82#define INFINIPATH_S_ABORT (1U << IPATH_S_ABORT)
83#define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL)
84#define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD)
85#define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE)
86#define INFINIPATH_S_SDMAINTENABLE (1U << IPATH_S_SDMAINTENABLE)
87#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \
88 (1U << IPATH_S_SDMASINGLEDESCRIPTOR)
89#define INFINIPATH_S_SDMAENABLE (1U << IPATH_S_SDMAENABLE)
90#define INFINIPATH_S_SDMAHALT (1U << IPATH_S_SDMAHALT)
91#define INFINIPATH_S_DISARM (1U << IPATH_S_DISARM)
92
93/* kr_rcvctrl bits that are the same on multiple chips */
94#define INFINIPATH_R_PORTENABLE_SHIFT 0
95#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
96
97/* kr_intstatus, kr_intclear, kr_intmask bits */
98#define INFINIPATH_I_SDMAINT 0x8000000000000000ULL
99#define INFINIPATH_I_SDMADISABLED 0x4000000000000000ULL
100#define INFINIPATH_I_ERROR 0x0000000080000000ULL
101#define INFINIPATH_I_SPIOSENT 0x0000000040000000ULL
102#define INFINIPATH_I_SPIOBUFAVAIL 0x0000000020000000ULL
103#define INFINIPATH_I_GPIO 0x0000000010000000ULL
104#define INFINIPATH_I_JINT 0x0000000004000000ULL
105
106/* kr_errorstatus, kr_errorclear, kr_errormask bits */
107#define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL
108#define INFINIPATH_E_RVCRC 0x0000000000000002ULL
109#define INFINIPATH_E_RICRC 0x0000000000000004ULL
110#define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL
111#define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL
112#define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL
113#define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL
114#define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL
115#define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL
116#define INFINIPATH_E_REBP 0x0000000000000200ULL
117#define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL
118#define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL
119#define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL
120#define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL
121#define INFINIPATH_E_RBADTID 0x0000000000004000ULL
122#define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL
123#define INFINIPATH_E_RHDR 0x0000000000010000ULL
124#define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL
125#define INFINIPATH_E_SENDSPECIALTRIGGER 0x0000000008000000ULL
126#define INFINIPATH_E_SDMADISABLED 0x0000000010000000ULL
127#define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL
128#define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL
129#define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL
130#define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL
131#define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL
132#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL
133#define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL
134#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL
135#define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL
136#define INFINIPATH_E_SENDBUFMISUSE 0x0000004000000000ULL
137#define INFINIPATH_E_SDMAGENMISMATCH 0x0000008000000000ULL
138#define INFINIPATH_E_SDMAOUTOFBOUND 0x0000010000000000ULL
139#define INFINIPATH_E_SDMATAILOUTOFBOUND 0x0000020000000000ULL
140#define INFINIPATH_E_SDMABASE 0x0000040000000000ULL
141#define INFINIPATH_E_SDMA1STDESC 0x0000080000000000ULL
142#define INFINIPATH_E_SDMARPYTAG 0x0000100000000000ULL
143#define INFINIPATH_E_SDMADWEN 0x0000200000000000ULL
144#define INFINIPATH_E_SDMAMISSINGDW 0x0000400000000000ULL
145#define INFINIPATH_E_SDMAUNEXPDATA 0x0000800000000000ULL
146#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL
147#define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL
148#define INFINIPATH_E_RESET 0x0004000000000000ULL
149#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
150#define INFINIPATH_E_SDMADESCADDRMISALIGN 0x0010000000000000ULL
151#define INFINIPATH_E_INVALIDEEPCMD 0x0020000000000000ULL
152
153/*
154 * this is used to print "common" packet errors only when the
155 * __IPATH_ERRPKTDBG bit is set in ipath_debug.
156 */
157#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
158 | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
159 | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
160 | INFINIPATH_E_REBP )
161
162/* Convenience for decoding Send DMA errors */
163#define INFINIPATH_E_SDMAERRS ( \
164 INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \
165 INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \
166 INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \
167 INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \
168 INFINIPATH_E_SDMAUNEXPDATA | \
169 INFINIPATH_E_SDMADESCADDRMISALIGN | \
170 INFINIPATH_E_SDMADISABLED | \
171 INFINIPATH_E_SENDBUFMISUSE)
172
173/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
174/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
175 * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID
176 * bit 4: flag buffer, 5: datainfo, 6: header info */
177#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
178#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
179#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
180#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
181#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
182#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
183/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
184#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL
185#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL
186#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
187/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
188#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
189#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
190#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL
191#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
192#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
193#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
194#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
195/* waldo specific -- find the rest in ipath_6110.c */
196#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
197/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */
198#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
199
200/* kr_hwdiagctrl bits */
201#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
202#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40
203#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL
204#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44
205#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR 0x0000000400000000ULL
206#define INFINIPATH_DC_COUNTERDISABLE 0x1000000000000000ULL
207#define INFINIPATH_DC_COUNTERWREN 0x2000000000000000ULL
208#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL
209#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL
210
211/* kr_ibcctrl bits */
212#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL
213#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0
214#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL
215#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
216#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
217#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
218/* cycle through TS1/TS2 till OK */
219#define INFINIPATH_IBCC_LINKINITCMD_POLL 2
220/* wait for TS1, then go on */
221#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
222#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
223#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
224#define INFINIPATH_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */
225#define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */
226#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */
227#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
228#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
229#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20
230#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL
231#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32
232#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL
233#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36
234#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL
235#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40
236#define INFINIPATH_IBCC_LOOPBACK 0x8000000000000000ULL
237#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
238
239/* kr_ibcstatus bits */
240#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
241#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
242
243#define INFINIPATH_IBCS_TXREADY 0x40000000
244#define INFINIPATH_IBCS_TXCREDITOK 0x80000000
245/* link training states (shift by
246 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
247#define INFINIPATH_IBCS_LT_STATE_DISABLED 0x00
248#define INFINIPATH_IBCS_LT_STATE_LINKUP 0x01
249#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE 0x02
250#define INFINIPATH_IBCS_LT_STATE_POLLQUIET 0x03
251#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY 0x04
252#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET 0x05
253#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE 0x08
254#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG 0x09
255#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT 0x0a
256#define INFINIPATH_IBCS_LT_STATE_CFGIDLE 0x0b
257#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c
258#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e
259#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f
260/* link state machine states (shift by ibcs_ls_shift) */
261#define INFINIPATH_IBCS_L_STATE_DOWN 0x0
262#define INFINIPATH_IBCS_L_STATE_INIT 0x1
263#define INFINIPATH_IBCS_L_STATE_ARM 0x2
264#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3
265#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4
266
267
268/* kr_extstatus bits */
269#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
270#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL
271#define INFINIPATH_EXTS_GPIOIN_SHIFT 48
272
273/* kr_extctrl bits */
274#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL
275#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32
276#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL
277#define INFINIPATH_EXTC_GPIOOE_SHIFT 48
278#define INFINIPATH_EXTC_SERDESENABLE 0x80000000ULL
279#define INFINIPATH_EXTC_SERDESCONNECT 0x40000000ULL
280#define INFINIPATH_EXTC_SERDESENTRUNKING 0x20000000ULL
281#define INFINIPATH_EXTC_SERDESDISRXFIFO 0x10000000ULL
282#define INFINIPATH_EXTC_SERDESENPLPBK1 0x08000000ULL
283#define INFINIPATH_EXTC_SERDESENPLPBK2 0x04000000ULL
284#define INFINIPATH_EXTC_SERDESENENCDEC 0x02000000ULL
285#define INFINIPATH_EXTC_LED1SECPORT_ON 0x00000020ULL
286#define INFINIPATH_EXTC_LED2SECPORT_ON 0x00000010ULL
287#define INFINIPATH_EXTC_LED1PRIPORT_ON 0x00000008ULL
288#define INFINIPATH_EXTC_LED2PRIPORT_ON 0x00000004ULL
289#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL
290#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL
291
292/* kr_partitionkey bits */
293#define INFINIPATH_PKEY_SIZE 16
294#define INFINIPATH_PKEY_MASK 0xFFFF
295#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF
296
297/* kr_serdesconfig0 bits */
298#define INFINIPATH_SERDC0_RESET_MASK 0xfULL /* overal reset bits */
299#define INFINIPATH_SERDC0_RESET_PLL 0x10000000ULL /* pll reset */
300/* tx idle enables (per lane) */
301#define INFINIPATH_SERDC0_TXIDLE 0xF000ULL
302/* rx detect enables (per lane) */
303#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL
304/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
305#define INFINIPATH_SERDC0_L1PWR_DN 0xF0ULL
306
307/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */
308#define INFINIPATH_XGXS_RX_POL_SHIFT 19
309#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
310
311
312/*
313 * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
314 * PIO send buffers. This is well beyond anything currently
315 * defined in the InfiniBand spec.
316 */
317#define IPATH_PIO_MAXIBHDR 128
318
319typedef u64 ipath_err_t;
320
321/* The following change with the type of device, so
322 * need to be part of the ipath_devdata struct, or
323 * we could have problems plugging in devices of
324 * different types (e.g. one HT, one PCIE)
325 * in one system, to be managed by one driver.
326 * On the other hand, this file is may also be included
327 * by other code, so leave the declarations here
328 * temporarily. Minor footprint issue if common-model
329 * linker used, none if C89+ linker used.
330 */
331
332/* mask of defined bits for various registers */
333extern u64 infinipath_i_bitsextant;
334extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
335
336/* masks that are different in various chips, or only exist in some chips */
337extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
338
339/*
340 * These are the infinipath general register numbers (not offsets).
341 * The kernel registers are used directly, those beyond the kernel
342 * registers are calculated from one of the base registers. The use of
343 * an integer type doesn't allow type-checking as thorough as, say,
344 * an enum but allows for better hiding of chip differences.
345 */
346typedef const u16 ipath_kreg, /* infinipath general registers */
347 ipath_creg, /* infinipath counter registers */
348 ipath_sreg; /* kernel-only, infinipath send registers */
349
350/*
351 * These are the chip registers common to all infinipath chips, and
352 * used both by the kernel and the diagnostics or other user code.
353 * They are all implemented such that 64 bit accesses work.
354 * Some implement no more than 32 bits. Because 64 bit reads
355 * require 2 HT cmds on opteron, we access those with 32 bit
356 * reads for efficiency (they are written as 64 bits, since
357 * the extra 32 bits are nearly free on writes, and it slightly reduces
358 * complexity). The rest are all accessed as 64 bits.
359 */
360struct ipath_kregs {
361 /* These are the 32 bit group */
362 ipath_kreg kr_control;
363 ipath_kreg kr_counterregbase;
364 ipath_kreg kr_intmask;
365 ipath_kreg kr_intstatus;
366 ipath_kreg kr_pagealign;
367 ipath_kreg kr_portcnt;
368 ipath_kreg kr_rcvtidbase;
369 ipath_kreg kr_rcvtidcnt;
370 ipath_kreg kr_rcvegrbase;
371 ipath_kreg kr_rcvegrcnt;
372 ipath_kreg kr_scratch;
373 ipath_kreg kr_sendctrl;
374 ipath_kreg kr_sendpiobufbase;
375 ipath_kreg kr_sendpiobufcnt;
376 ipath_kreg kr_sendpiosize;
377 ipath_kreg kr_sendregbase;
378 ipath_kreg kr_userregbase;
379 /* These are the 64 bit group */
380 ipath_kreg kr_debugport;
381 ipath_kreg kr_debugportselect;
382 ipath_kreg kr_errorclear;
383 ipath_kreg kr_errormask;
384 ipath_kreg kr_errorstatus;
385 ipath_kreg kr_extctrl;
386 ipath_kreg kr_extstatus;
387 ipath_kreg kr_gpio_clear;
388 ipath_kreg kr_gpio_mask;
389 ipath_kreg kr_gpio_out;
390 ipath_kreg kr_gpio_status;
391 ipath_kreg kr_hwdiagctrl;
392 ipath_kreg kr_hwerrclear;
393 ipath_kreg kr_hwerrmask;
394 ipath_kreg kr_hwerrstatus;
395 ipath_kreg kr_ibcctrl;
396 ipath_kreg kr_ibcstatus;
397 ipath_kreg kr_intblocked;
398 ipath_kreg kr_intclear;
399 ipath_kreg kr_interruptconfig;
400 ipath_kreg kr_mdio;
401 ipath_kreg kr_partitionkey;
402 ipath_kreg kr_rcvbthqp;
403 ipath_kreg kr_rcvbufbase;
404 ipath_kreg kr_rcvbufsize;
405 ipath_kreg kr_rcvctrl;
406 ipath_kreg kr_rcvhdrcnt;
407 ipath_kreg kr_rcvhdrentsize;
408 ipath_kreg kr_rcvhdrsize;
409 ipath_kreg kr_rcvintmembase;
410 ipath_kreg kr_rcvintmemsize;
411 ipath_kreg kr_revision;
412 ipath_kreg kr_sendbuffererror;
413 ipath_kreg kr_sendpioavailaddr;
414 ipath_kreg kr_serdesconfig0;
415 ipath_kreg kr_serdesconfig1;
416 ipath_kreg kr_serdesstatus;
417 ipath_kreg kr_txintmembase;
418 ipath_kreg kr_txintmemsize;
419 ipath_kreg kr_xgxsconfig;
420 ipath_kreg kr_ibpllcfg;
421 /* use these two (and the following N ports) only with
422 * ipath_k*_kreg64_port(); not *kreg64() */
423 ipath_kreg kr_rcvhdraddr;
424 ipath_kreg kr_rcvhdrtailaddr;
425
426 /* remaining registers are not present on all types of infinipath
427 chips */
428 ipath_kreg kr_rcvpktledcnt;
429 ipath_kreg kr_pcierbuftestreg0;
430 ipath_kreg kr_pcierbuftestreg1;
431 ipath_kreg kr_pcieq0serdesconfig0;
432 ipath_kreg kr_pcieq0serdesconfig1;
433 ipath_kreg kr_pcieq0serdesstatus;
434 ipath_kreg kr_pcieq1serdesconfig0;
435 ipath_kreg kr_pcieq1serdesconfig1;
436 ipath_kreg kr_pcieq1serdesstatus;
437 ipath_kreg kr_hrtbt_guid;
438 ipath_kreg kr_ibcddrctrl;
439 ipath_kreg kr_ibcddrstatus;
440 ipath_kreg kr_jintreload;
441
442 /* send dma related regs */
443 ipath_kreg kr_senddmabase;
444 ipath_kreg kr_senddmalengen;
445 ipath_kreg kr_senddmatail;
446 ipath_kreg kr_senddmahead;
447 ipath_kreg kr_senddmaheadaddr;
448 ipath_kreg kr_senddmabufmask0;
449 ipath_kreg kr_senddmabufmask1;
450 ipath_kreg kr_senddmabufmask2;
451 ipath_kreg kr_senddmastatus;
452
453 /* SerDes related regs (IBA7220-only) */
454 ipath_kreg kr_ibserdesctrl;
455 ipath_kreg kr_ib_epbacc;
456 ipath_kreg kr_ib_epbtrans;
457 ipath_kreg kr_pcie_epbacc;
458 ipath_kreg kr_pcie_epbtrans;
459 ipath_kreg kr_ib_ddsrxeq;
460};
461
462struct ipath_cregs {
463 ipath_creg cr_badformatcnt;
464 ipath_creg cr_erricrccnt;
465 ipath_creg cr_errlinkcnt;
466 ipath_creg cr_errlpcrccnt;
467 ipath_creg cr_errpkey;
468 ipath_creg cr_errrcvflowctrlcnt;
469 ipath_creg cr_err_rlencnt;
470 ipath_creg cr_errslencnt;
471 ipath_creg cr_errtidfull;
472 ipath_creg cr_errtidvalid;
473 ipath_creg cr_errvcrccnt;
474 ipath_creg cr_ibstatuschange;
475 ipath_creg cr_intcnt;
476 ipath_creg cr_invalidrlencnt;
477 ipath_creg cr_invalidslencnt;
478 ipath_creg cr_lbflowstallcnt;
479 ipath_creg cr_iblinkdowncnt;
480 ipath_creg cr_iblinkerrrecovcnt;
481 ipath_creg cr_ibsymbolerrcnt;
482 ipath_creg cr_pktrcvcnt;
483 ipath_creg cr_pktrcvflowctrlcnt;
484 ipath_creg cr_pktsendcnt;
485 ipath_creg cr_pktsendflowcnt;
486 ipath_creg cr_portovflcnt;
487 ipath_creg cr_rcvebpcnt;
488 ipath_creg cr_rcvovflcnt;
489 ipath_creg cr_rxdroppktcnt;
490 ipath_creg cr_senddropped;
491 ipath_creg cr_sendstallcnt;
492 ipath_creg cr_sendunderruncnt;
493 ipath_creg cr_unsupvlcnt;
494 ipath_creg cr_wordrcvcnt;
495 ipath_creg cr_wordsendcnt;
496 ipath_creg cr_vl15droppedpktcnt;
497 ipath_creg cr_rxotherlocalphyerrcnt;
498 ipath_creg cr_excessbufferovflcnt;
499 ipath_creg cr_locallinkintegrityerrcnt;
500 ipath_creg cr_rxvlerrcnt;
501 ipath_creg cr_rxdlidfltrcnt;
502 ipath_creg cr_psstat;
503 ipath_creg cr_psstart;
504 ipath_creg cr_psinterval;
505 ipath_creg cr_psrcvdatacount;
506 ipath_creg cr_psrcvpktscount;
507 ipath_creg cr_psxmitdatacount;
508 ipath_creg cr_psxmitpktscount;
509 ipath_creg cr_psxmitwaitcount;
510};
511
512#endif /* _IPATH_REGISTERS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_ruc.c b/drivers/staging/rdma/ipath/ipath_ruc.c
deleted file mode 100644
index e541a01f1f61..000000000000
--- a/drivers/staging/rdma/ipath/ipath_ruc.c
+++ /dev/null
@@ -1,733 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/spinlock.h>
35
36#include "ipath_verbs.h"
37#include "ipath_kernel.h"
38
39/*
40 * Convert the AETH RNR timeout code into the number of milliseconds.
41 */
42const u32 ib_ipath_rnr_table[32] = {
43 656, /* 0 */
44 1, /* 1 */
45 1, /* 2 */
46 1, /* 3 */
47 1, /* 4 */
48 1, /* 5 */
49 1, /* 6 */
50 1, /* 7 */
51 1, /* 8 */
52 1, /* 9 */
53 1, /* A */
54 1, /* B */
55 1, /* C */
56 1, /* D */
57 2, /* E */
58 2, /* F */
59 3, /* 10 */
60 4, /* 11 */
61 6, /* 12 */
62 8, /* 13 */
63 11, /* 14 */
64 16, /* 15 */
65 21, /* 16 */
66 31, /* 17 */
67 41, /* 18 */
68 62, /* 19 */
69 82, /* 1A */
70 123, /* 1B */
71 164, /* 1C */
72 246, /* 1D */
73 328, /* 1E */
74 492 /* 1F */
75};
76
77/**
78 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
79 * @qp: the QP
80 *
81 * Called with the QP s_lock held and interrupts disabled.
82 * XXX Use a simple list for now. We might need a priority
83 * queue if we have lots of QPs waiting for RNR timeouts
84 * but that should be rare.
85 */
86void ipath_insert_rnr_queue(struct ipath_qp *qp)
87{
88 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
89
90 /* We already did a spin_lock_irqsave(), so just use spin_lock */
91 spin_lock(&dev->pending_lock);
92 if (list_empty(&dev->rnrwait))
93 list_add(&qp->timerwait, &dev->rnrwait);
94 else {
95 struct list_head *l = &dev->rnrwait;
96 struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
97 timerwait);
98
99 while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
100 qp->s_rnr_timeout -= nqp->s_rnr_timeout;
101 l = l->next;
102 if (l->next == &dev->rnrwait) {
103 nqp = NULL;
104 break;
105 }
106 nqp = list_entry(l->next, struct ipath_qp,
107 timerwait);
108 }
109 if (nqp)
110 nqp->s_rnr_timeout -= qp->s_rnr_timeout;
111 list_add(&qp->timerwait, l);
112 }
113 spin_unlock(&dev->pending_lock);
114}
115
116/**
117 * ipath_init_sge - Validate a RWQE and fill in the SGE state
118 * @qp: the QP
119 *
120 * Return 1 if OK.
121 */
122int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
123 u32 *lengthp, struct ipath_sge_state *ss)
124{
125 int i, j, ret;
126 struct ib_wc wc;
127
128 *lengthp = 0;
129 for (i = j = 0; i < wqe->num_sge; i++) {
130 if (wqe->sg_list[i].length == 0)
131 continue;
132 /* Check LKEY */
133 if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
134 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
135 goto bad_lkey;
136 *lengthp += wqe->sg_list[i].length;
137 j++;
138 }
139 ss->num_sge = j;
140 ret = 1;
141 goto bail;
142
143bad_lkey:
144 memset(&wc, 0, sizeof(wc));
145 wc.wr_id = wqe->wr_id;
146 wc.status = IB_WC_LOC_PROT_ERR;
147 wc.opcode = IB_WC_RECV;
148 wc.qp = &qp->ibqp;
149 /* Signal solicited completion event. */
150 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
151 ret = 0;
152bail:
153 return ret;
154}
155
156/**
157 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
158 * @qp: the QP
159 * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
160 *
161 * Return 0 if no RWQE is available, otherwise return 1.
162 *
163 * Can be called from interrupt level.
164 */
165int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
166{
167 unsigned long flags;
168 struct ipath_rq *rq;
169 struct ipath_rwq *wq;
170 struct ipath_srq *srq;
171 struct ipath_rwqe *wqe;
172 void (*handler)(struct ib_event *, void *);
173 u32 tail;
174 int ret;
175
176 if (qp->ibqp.srq) {
177 srq = to_isrq(qp->ibqp.srq);
178 handler = srq->ibsrq.event_handler;
179 rq = &srq->rq;
180 } else {
181 srq = NULL;
182 handler = NULL;
183 rq = &qp->r_rq;
184 }
185
186 spin_lock_irqsave(&rq->lock, flags);
187 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
188 ret = 0;
189 goto unlock;
190 }
191
192 wq = rq->wq;
193 tail = wq->tail;
194 /* Validate tail before using it since it is user writable. */
195 if (tail >= rq->size)
196 tail = 0;
197 do {
198 if (unlikely(tail == wq->head)) {
199 ret = 0;
200 goto unlock;
201 }
202 /* Make sure entry is read after head index is read. */
203 smp_rmb();
204 wqe = get_rwqe_ptr(rq, tail);
205 if (++tail >= rq->size)
206 tail = 0;
207 if (wr_id_only)
208 break;
209 qp->r_sge.sg_list = qp->r_sg_list;
210 } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge));
211 qp->r_wr_id = wqe->wr_id;
212 wq->tail = tail;
213
214 ret = 1;
215 set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
216 if (handler) {
217 u32 n;
218
219 /*
220 * validate head pointer value and compute
221 * the number of remaining WQEs.
222 */
223 n = wq->head;
224 if (n >= rq->size)
225 n = 0;
226 if (n < tail)
227 n += rq->size - tail;
228 else
229 n -= tail;
230 if (n < srq->limit) {
231 struct ib_event ev;
232
233 srq->limit = 0;
234 spin_unlock_irqrestore(&rq->lock, flags);
235 ev.device = qp->ibqp.device;
236 ev.element.srq = qp->ibqp.srq;
237 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
238 handler(&ev, srq->ibsrq.srq_context);
239 goto bail;
240 }
241 }
242unlock:
243 spin_unlock_irqrestore(&rq->lock, flags);
244bail:
245 return ret;
246}
247
248/**
249 * ipath_ruc_loopback - handle UC and RC lookback requests
250 * @sqp: the sending QP
251 *
252 * This is called from ipath_do_send() to
253 * forward a WQE addressed to the same HCA.
254 * Note that although we are single threaded due to the tasklet, we still
255 * have to protect against post_send(). We don't have to worry about
256 * receive interrupts since this is a connected protocol and all packets
257 * will pass through here.
258 */
259static void ipath_ruc_loopback(struct ipath_qp *sqp)
260{
261 struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
262 struct ipath_qp *qp;
263 struct ipath_swqe *wqe;
264 struct ipath_sge *sge;
265 unsigned long flags;
266 struct ib_wc wc;
267 u64 sdata;
268 atomic64_t *maddr;
269 enum ib_wc_status send_status;
270
271 /*
272 * Note that we check the responder QP state after
273 * checking the requester's state.
274 */
275 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
276
277 spin_lock_irqsave(&sqp->s_lock, flags);
278
279 /* Return if we are already busy processing a work request. */
280 if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
281 !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
282 goto unlock;
283
284 sqp->s_flags |= IPATH_S_BUSY;
285
286again:
287 if (sqp->s_last == sqp->s_head)
288 goto clr_busy;
289 wqe = get_swqe_ptr(sqp, sqp->s_last);
290
291 /* Return if it is not OK to start a new work reqeust. */
292 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
293 if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
294 goto clr_busy;
295 /* We are in the error state, flush the work request. */
296 send_status = IB_WC_WR_FLUSH_ERR;
297 goto flush_send;
298 }
299
300 /*
301 * We can rely on the entry not changing without the s_lock
302 * being held until we update s_last.
303 * We increment s_cur to indicate s_last is in progress.
304 */
305 if (sqp->s_last == sqp->s_cur) {
306 if (++sqp->s_cur >= sqp->s_size)
307 sqp->s_cur = 0;
308 }
309 spin_unlock_irqrestore(&sqp->s_lock, flags);
310
311 if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
312 dev->n_pkt_drops++;
313 /*
314 * For RC, the requester would timeout and retry so
315 * shortcut the timeouts and just signal too many retries.
316 */
317 if (sqp->ibqp.qp_type == IB_QPT_RC)
318 send_status = IB_WC_RETRY_EXC_ERR;
319 else
320 send_status = IB_WC_SUCCESS;
321 goto serr;
322 }
323
324 memset(&wc, 0, sizeof wc);
325 send_status = IB_WC_SUCCESS;
326
327 sqp->s_sge.sge = wqe->sg_list[0];
328 sqp->s_sge.sg_list = wqe->sg_list + 1;
329 sqp->s_sge.num_sge = wqe->wr.num_sge;
330 sqp->s_len = wqe->length;
331 switch (wqe->wr.opcode) {
332 case IB_WR_SEND_WITH_IMM:
333 wc.wc_flags = IB_WC_WITH_IMM;
334 wc.ex.imm_data = wqe->wr.ex.imm_data;
335 /* FALLTHROUGH */
336 case IB_WR_SEND:
337 if (!ipath_get_rwqe(qp, 0))
338 goto rnr_nak;
339 break;
340
341 case IB_WR_RDMA_WRITE_WITH_IMM:
342 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
343 goto inv_err;
344 wc.wc_flags = IB_WC_WITH_IMM;
345 wc.ex.imm_data = wqe->wr.ex.imm_data;
346 if (!ipath_get_rwqe(qp, 1))
347 goto rnr_nak;
348 /* FALLTHROUGH */
349 case IB_WR_RDMA_WRITE:
350 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
351 goto inv_err;
352 if (wqe->length == 0)
353 break;
354 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
355 wqe->rdma_wr.remote_addr,
356 wqe->rdma_wr.rkey,
357 IB_ACCESS_REMOTE_WRITE)))
358 goto acc_err;
359 break;
360
361 case IB_WR_RDMA_READ:
362 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
363 goto inv_err;
364 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
365 wqe->rdma_wr.remote_addr,
366 wqe->rdma_wr.rkey,
367 IB_ACCESS_REMOTE_READ)))
368 goto acc_err;
369 qp->r_sge.sge = wqe->sg_list[0];
370 qp->r_sge.sg_list = wqe->sg_list + 1;
371 qp->r_sge.num_sge = wqe->wr.num_sge;
372 break;
373
374 case IB_WR_ATOMIC_CMP_AND_SWP:
375 case IB_WR_ATOMIC_FETCH_AND_ADD:
376 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
377 goto inv_err;
378 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
379 wqe->atomic_wr.remote_addr,
380 wqe->atomic_wr.rkey,
381 IB_ACCESS_REMOTE_ATOMIC)))
382 goto acc_err;
383 /* Perform atomic OP and save result. */
384 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
385 sdata = wqe->atomic_wr.compare_add;
386 *(u64 *) sqp->s_sge.sge.vaddr =
387 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
388 (u64) atomic64_add_return(sdata, maddr) - sdata :
389 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
390 sdata, wqe->atomic_wr.swap);
391 goto send_comp;
392
393 default:
394 send_status = IB_WC_LOC_QP_OP_ERR;
395 goto serr;
396 }
397
398 sge = &sqp->s_sge.sge;
399 while (sqp->s_len) {
400 u32 len = sqp->s_len;
401
402 if (len > sge->length)
403 len = sge->length;
404 if (len > sge->sge_length)
405 len = sge->sge_length;
406 BUG_ON(len == 0);
407 ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
408 sge->vaddr += len;
409 sge->length -= len;
410 sge->sge_length -= len;
411 if (sge->sge_length == 0) {
412 if (--sqp->s_sge.num_sge)
413 *sge = *sqp->s_sge.sg_list++;
414 } else if (sge->length == 0 && sge->mr != NULL) {
415 if (++sge->n >= IPATH_SEGSZ) {
416 if (++sge->m >= sge->mr->mapsz)
417 break;
418 sge->n = 0;
419 }
420 sge->vaddr =
421 sge->mr->map[sge->m]->segs[sge->n].vaddr;
422 sge->length =
423 sge->mr->map[sge->m]->segs[sge->n].length;
424 }
425 sqp->s_len -= len;
426 }
427
428 if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
429 goto send_comp;
430
431 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
432 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
433 else
434 wc.opcode = IB_WC_RECV;
435 wc.wr_id = qp->r_wr_id;
436 wc.status = IB_WC_SUCCESS;
437 wc.byte_len = wqe->length;
438 wc.qp = &qp->ibqp;
439 wc.src_qp = qp->remote_qpn;
440 wc.slid = qp->remote_ah_attr.dlid;
441 wc.sl = qp->remote_ah_attr.sl;
442 wc.port_num = 1;
443 /* Signal completion event if the solicited bit is set. */
444 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
445 wqe->wr.send_flags & IB_SEND_SOLICITED);
446
447send_comp:
448 spin_lock_irqsave(&sqp->s_lock, flags);
449flush_send:
450 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
451 ipath_send_complete(sqp, wqe, send_status);
452 goto again;
453
454rnr_nak:
455 /* Handle RNR NAK */
456 if (qp->ibqp.qp_type == IB_QPT_UC)
457 goto send_comp;
458 /*
459 * Note: we don't need the s_lock held since the BUSY flag
460 * makes this single threaded.
461 */
462 if (sqp->s_rnr_retry == 0) {
463 send_status = IB_WC_RNR_RETRY_EXC_ERR;
464 goto serr;
465 }
466 if (sqp->s_rnr_retry_cnt < 7)
467 sqp->s_rnr_retry--;
468 spin_lock_irqsave(&sqp->s_lock, flags);
469 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
470 goto clr_busy;
471 sqp->s_flags |= IPATH_S_WAITING;
472 dev->n_rnr_naks++;
473 sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
474 ipath_insert_rnr_queue(sqp);
475 goto clr_busy;
476
477inv_err:
478 send_status = IB_WC_REM_INV_REQ_ERR;
479 wc.status = IB_WC_LOC_QP_OP_ERR;
480 goto err;
481
482acc_err:
483 send_status = IB_WC_REM_ACCESS_ERR;
484 wc.status = IB_WC_LOC_PROT_ERR;
485err:
486 /* responder goes to error state */
487 ipath_rc_error(qp, wc.status);
488
489serr:
490 spin_lock_irqsave(&sqp->s_lock, flags);
491 ipath_send_complete(sqp, wqe, send_status);
492 if (sqp->ibqp.qp_type == IB_QPT_RC) {
493 int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
494
495 sqp->s_flags &= ~IPATH_S_BUSY;
496 spin_unlock_irqrestore(&sqp->s_lock, flags);
497 if (lastwqe) {
498 struct ib_event ev;
499
500 ev.device = sqp->ibqp.device;
501 ev.element.qp = &sqp->ibqp;
502 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
503 sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
504 }
505 goto done;
506 }
507clr_busy:
508 sqp->s_flags &= ~IPATH_S_BUSY;
509unlock:
510 spin_unlock_irqrestore(&sqp->s_lock, flags);
511done:
512 if (qp && atomic_dec_and_test(&qp->refcount))
513 wake_up(&qp->wait);
514}
515
516static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
517{
518 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
519 qp->ibqp.qp_type == IB_QPT_SMI) {
520 unsigned long flags;
521
522 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
523 dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
524 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
525 dd->ipath_sendctrl);
526 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
527 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
528 }
529}
530
531/**
532 * ipath_no_bufs_available - tell the layer driver we need buffers
533 * @qp: the QP that caused the problem
534 * @dev: the device we ran out of buffers on
535 *
536 * Called when we run out of PIO buffers.
537 * If we are now in the error state, return zero to flush the
538 * send work request.
539 */
540static int ipath_no_bufs_available(struct ipath_qp *qp,
541 struct ipath_ibdev *dev)
542{
543 unsigned long flags;
544 int ret = 1;
545
546 /*
547 * Note that as soon as want_buffer() is called and
548 * possibly before it returns, ipath_ib_piobufavail()
549 * could be called. Therefore, put QP on the piowait list before
550 * enabling the PIO avail interrupt.
551 */
552 spin_lock_irqsave(&qp->s_lock, flags);
553 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
554 dev->n_piowait++;
555 qp->s_flags |= IPATH_S_WAITING;
556 qp->s_flags &= ~IPATH_S_BUSY;
557 spin_lock(&dev->pending_lock);
558 if (list_empty(&qp->piowait))
559 list_add_tail(&qp->piowait, &dev->piowait);
560 spin_unlock(&dev->pending_lock);
561 } else
562 ret = 0;
563 spin_unlock_irqrestore(&qp->s_lock, flags);
564 if (ret)
565 want_buffer(dev->dd, qp);
566 return ret;
567}
568
569/**
570 * ipath_make_grh - construct a GRH header
571 * @dev: a pointer to the ipath device
572 * @hdr: a pointer to the GRH header being constructed
573 * @grh: the global route address to send to
574 * @hwords: the number of 32 bit words of header being sent
575 * @nwords: the number of 32 bit words of data being sent
576 *
577 * Return the size of the header in 32 bit words.
578 */
579u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
580 struct ib_global_route *grh, u32 hwords, u32 nwords)
581{
582 hdr->version_tclass_flow =
583 cpu_to_be32((6 << 28) |
584 (grh->traffic_class << 20) |
585 grh->flow_label);
586 hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
587 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
588 hdr->next_hdr = 0x1B;
589 hdr->hop_limit = grh->hop_limit;
590 /* The SGID is 32-bit aligned. */
591 hdr->sgid.global.subnet_prefix = dev->gid_prefix;
592 hdr->sgid.global.interface_id = dev->dd->ipath_guid;
593 hdr->dgid = grh->dgid;
594
595 /* GRH header size in 32-bit words. */
596 return sizeof(struct ib_grh) / sizeof(u32);
597}
598
599void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
600 struct ipath_other_headers *ohdr,
601 u32 bth0, u32 bth2)
602{
603 u16 lrh0;
604 u32 nwords;
605 u32 extra_bytes;
606
607 /* Construct the header. */
608 extra_bytes = -qp->s_cur_size & 3;
609 nwords = (qp->s_cur_size + extra_bytes) >> 2;
610 lrh0 = IPATH_LRH_BTH;
611 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
612 qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
613 &qp->remote_ah_attr.grh,
614 qp->s_hdrwords, nwords);
615 lrh0 = IPATH_LRH_GRH;
616 }
617 lrh0 |= qp->remote_ah_attr.sl << 4;
618 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
619 qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
620 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
621 qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
622 qp->remote_ah_attr.src_path_bits);
623 bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
624 bth0 |= extra_bytes << 20;
625 ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
626 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
627 ohdr->bth[2] = cpu_to_be32(bth2);
628}
629
630/**
631 * ipath_do_send - perform a send on a QP
632 * @data: contains a pointer to the QP
633 *
634 * Process entries in the send work queue until credit or queue is
635 * exhausted. Only allow one CPU to send a packet per QP (tasklet).
636 * Otherwise, two threads could send packets out of order.
637 */
638void ipath_do_send(unsigned long data)
639{
640 struct ipath_qp *qp = (struct ipath_qp *)data;
641 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
642 int (*make_req)(struct ipath_qp *qp);
643 unsigned long flags;
644
645 if ((qp->ibqp.qp_type == IB_QPT_RC ||
646 qp->ibqp.qp_type == IB_QPT_UC) &&
647 qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
648 ipath_ruc_loopback(qp);
649 goto bail;
650 }
651
652 if (qp->ibqp.qp_type == IB_QPT_RC)
653 make_req = ipath_make_rc_req;
654 else if (qp->ibqp.qp_type == IB_QPT_UC)
655 make_req = ipath_make_uc_req;
656 else
657 make_req = ipath_make_ud_req;
658
659 spin_lock_irqsave(&qp->s_lock, flags);
660
661 /* Return if we are already busy processing a work request. */
662 if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
663 !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
664 spin_unlock_irqrestore(&qp->s_lock, flags);
665 goto bail;
666 }
667
668 qp->s_flags |= IPATH_S_BUSY;
669
670 spin_unlock_irqrestore(&qp->s_lock, flags);
671
672again:
673 /* Check for a constructed packet to be sent. */
674 if (qp->s_hdrwords != 0) {
675 /*
676 * If no PIO bufs are available, return. An interrupt will
677 * call ipath_ib_piobufavail() when one is available.
678 */
679 if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
680 qp->s_cur_sge, qp->s_cur_size)) {
681 if (ipath_no_bufs_available(qp, dev))
682 goto bail;
683 }
684 dev->n_unicast_xmit++;
685 /* Record that we sent the packet and s_hdr is empty. */
686 qp->s_hdrwords = 0;
687 }
688
689 if (make_req(qp))
690 goto again;
691
692bail:;
693}
694
695/*
696 * This should be called with s_lock held.
697 */
698void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
699 enum ib_wc_status status)
700{
701 u32 old_last, last;
702
703 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
704 return;
705
706 /* See ch. 11.2.4.1 and 10.7.3.1 */
707 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
708 (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
709 status != IB_WC_SUCCESS) {
710 struct ib_wc wc;
711
712 memset(&wc, 0, sizeof wc);
713 wc.wr_id = wqe->wr.wr_id;
714 wc.status = status;
715 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
716 wc.qp = &qp->ibqp;
717 if (status == IB_WC_SUCCESS)
718 wc.byte_len = wqe->length;
719 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
720 status != IB_WC_SUCCESS);
721 }
722
723 old_last = last = qp->s_last;
724 if (++last >= qp->s_size)
725 last = 0;
726 qp->s_last = last;
727 if (qp->s_cur == old_last)
728 qp->s_cur = last;
729 if (qp->s_tail == old_last)
730 qp->s_tail = last;
731 if (qp->state == IB_QPS_SQD && last == qp->s_cur)
732 qp->s_draining = 0;
733}
diff --git a/drivers/staging/rdma/ipath/ipath_sdma.c b/drivers/staging/rdma/ipath/ipath_sdma.c
deleted file mode 100644
index 1ffc06abf9da..000000000000
--- a/drivers/staging/rdma/ipath/ipath_sdma.c
+++ /dev/null
@@ -1,818 +0,0 @@
1/*
2 * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/spinlock.h>
34#include <linux/gfp.h>
35
36#include "ipath_kernel.h"
37#include "ipath_verbs.h"
38#include "ipath_common.h"
39
40#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */
41
42static void vl15_watchdog_enq(struct ipath_devdata *dd)
43{
44 /* ipath_sdma_lock must already be held */
45 if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) {
46 unsigned long interval = (HZ + 19) / 20;
47 dd->ipath_sdma_vl15_timer.expires = jiffies + interval;
48 add_timer(&dd->ipath_sdma_vl15_timer);
49 }
50}
51
52static void vl15_watchdog_deq(struct ipath_devdata *dd)
53{
54 /* ipath_sdma_lock must already be held */
55 if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) {
56 unsigned long interval = (HZ + 19) / 20;
57 mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval);
58 } else {
59 del_timer(&dd->ipath_sdma_vl15_timer);
60 }
61}
62
63static void vl15_watchdog_timeout(unsigned long opaque)
64{
65 struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
66
67 if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) {
68 ipath_dbg("vl15 watchdog timeout - clearing\n");
69 ipath_cancel_sends(dd, 1);
70 ipath_hol_down(dd);
71 } else {
72 ipath_dbg("vl15 watchdog timeout - "
73 "condition already cleared\n");
74 }
75}
76
77static void unmap_desc(struct ipath_devdata *dd, unsigned head)
78{
79 __le64 *descqp = &dd->ipath_sdma_descq[head].qw[0];
80 u64 desc[2];
81 dma_addr_t addr;
82 size_t len;
83
84 desc[0] = le64_to_cpu(descqp[0]);
85 desc[1] = le64_to_cpu(descqp[1]);
86
87 addr = (desc[1] << 32) | (desc[0] >> 32);
88 len = (desc[0] >> 14) & (0x7ffULL << 2);
89 dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
90}
91
92/*
93 * ipath_sdma_lock should be locked before calling this.
94 */
95int ipath_sdma_make_progress(struct ipath_devdata *dd)
96{
97 struct list_head *lp = NULL;
98 struct ipath_sdma_txreq *txp = NULL;
99 u16 dmahead;
100 u16 start_idx = 0;
101 int progress = 0;
102
103 if (!list_empty(&dd->ipath_sdma_activelist)) {
104 lp = dd->ipath_sdma_activelist.next;
105 txp = list_entry(lp, struct ipath_sdma_txreq, list);
106 start_idx = txp->start_idx;
107 }
108
109 /*
110 * Read the SDMA head register in order to know that the
111 * interrupt clear has been written to the chip.
112 * Otherwise, we may not get an interrupt for the last
113 * descriptor in the queue.
114 */
115 dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead);
116 /* sanity check return value for error handling (chip reset, etc.) */
117 if (dmahead >= dd->ipath_sdma_descq_cnt)
118 goto done;
119
120 while (dd->ipath_sdma_descq_head != dmahead) {
121 if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC &&
122 dd->ipath_sdma_descq_head == start_idx) {
123 unmap_desc(dd, dd->ipath_sdma_descq_head);
124 start_idx++;
125 if (start_idx == dd->ipath_sdma_descq_cnt)
126 start_idx = 0;
127 }
128
129 /* increment free count and head */
130 dd->ipath_sdma_descq_removed++;
131 if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt)
132 dd->ipath_sdma_descq_head = 0;
133
134 if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) {
135 /* move to notify list */
136 if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
137 vl15_watchdog_deq(dd);
138 list_move_tail(lp, &dd->ipath_sdma_notifylist);
139 if (!list_empty(&dd->ipath_sdma_activelist)) {
140 lp = dd->ipath_sdma_activelist.next;
141 txp = list_entry(lp, struct ipath_sdma_txreq,
142 list);
143 start_idx = txp->start_idx;
144 } else {
145 lp = NULL;
146 txp = NULL;
147 }
148 }
149 progress = 1;
150 }
151
152 if (progress)
153 tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
154
155done:
156 return progress;
157}
158
159static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list)
160{
161 struct ipath_sdma_txreq *txp, *txp_next;
162
163 list_for_each_entry_safe(txp, txp_next, list, list) {
164 list_del_init(&txp->list);
165
166 if (txp->callback)
167 (*txp->callback)(txp->callback_cookie,
168 txp->callback_status);
169 }
170}
171
172static void sdma_notify_taskbody(struct ipath_devdata *dd)
173{
174 unsigned long flags;
175 struct list_head list;
176
177 INIT_LIST_HEAD(&list);
178
179 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
180
181 list_splice_init(&dd->ipath_sdma_notifylist, &list);
182
183 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
184
185 ipath_sdma_notify(dd, &list);
186
187 /*
188 * The IB verbs layer needs to see the callback before getting
189 * the call to ipath_ib_piobufavail() because the callback
190 * handles releasing resources the next send will need.
191 * Otherwise, we could do these calls in
192 * ipath_sdma_make_progress().
193 */
194 ipath_ib_piobufavail(dd->verbs_dev);
195}
196
197static void sdma_notify_task(unsigned long opaque)
198{
199 struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
200
201 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
202 sdma_notify_taskbody(dd);
203}
204
205static void dump_sdma_state(struct ipath_devdata *dd)
206{
207 unsigned long reg;
208
209 reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus);
210 ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg);
211
212 reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl);
213 ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg);
214
215 reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0);
216 ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg);
217
218 reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1);
219 ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg);
220
221 reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2);
222 ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg);
223
224 reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
225 ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg);
226
227 reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
228 ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg);
229}
230
231static void sdma_abort_task(unsigned long opaque)
232{
233 struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
234 u64 status;
235 unsigned long flags;
236
237 if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
238 return;
239
240 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
241
242 status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK;
243
244 /* nothing to do */
245 if (status == IPATH_SDMA_ABORT_NONE)
246 goto unlock;
247
248 /* ipath_sdma_abort() is done, waiting for interrupt */
249 if (status == IPATH_SDMA_ABORT_DISARMED) {
250 if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
251 goto resched_noprint;
252 /* give up, intr got lost somewhere */
253 ipath_dbg("give up waiting for SDMADISABLED intr\n");
254 __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
255 status = IPATH_SDMA_ABORT_ABORTED;
256 }
257
258 /* everything is stopped, time to clean up and restart */
259 if (status == IPATH_SDMA_ABORT_ABORTED) {
260 struct ipath_sdma_txreq *txp, *txpnext;
261 u64 hwstatus;
262 int notify = 0;
263
264 hwstatus = ipath_read_kreg64(dd,
265 dd->ipath_kregs->kr_senddmastatus);
266
267 if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG |
268 IPATH_SDMA_STATUS_ABORT_IN_PROG |
269 IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) ||
270 !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) {
271 if (dd->ipath_sdma_reset_wait > 0) {
272 /* not done shutting down sdma */
273 --dd->ipath_sdma_reset_wait;
274 goto resched;
275 }
276 ipath_cdbg(VERBOSE, "gave up waiting for quiescent "
277 "status after SDMA reset, continuing\n");
278 dump_sdma_state(dd);
279 }
280
281 /* dequeue all "sent" requests */
282 list_for_each_entry_safe(txp, txpnext,
283 &dd->ipath_sdma_activelist, list) {
284 txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED;
285 if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
286 vl15_watchdog_deq(dd);
287 list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
288 notify = 1;
289 }
290 if (notify)
291 tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
292
293 /* reset our notion of head and tail */
294 dd->ipath_sdma_descq_tail = 0;
295 dd->ipath_sdma_descq_head = 0;
296 dd->ipath_sdma_head_dma[0] = 0;
297 dd->ipath_sdma_generation = 0;
298 dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added;
299
300 /* Reset SendDmaLenGen */
301 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen,
302 (u64) dd->ipath_sdma_descq_cnt | (1ULL << 18));
303
304 /* done with sdma state for a bit */
305 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
306
307 /*
308 * Don't restart sdma here (with the exception
309 * below). Wait until link is up to ACTIVE. VL15 MADs
310 * used to bring the link up use PIO, and multiple link
311 * transitions otherwise cause the sdma engine to be
312 * stopped and started multiple times.
313 * The disable is done here, including the shadow,
314 * so the state is kept consistent.
315 * See ipath_restart_sdma() for the actual starting
316 * of sdma.
317 */
318 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
319 dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
320 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
321 dd->ipath_sendctrl);
322 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
323 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
324
325 /* make sure I see next message */
326 dd->ipath_sdma_abort_jiffies = 0;
327
328 /*
329 * Not everything that takes SDMA offline is a link
330 * status change. If the link was up, restart SDMA.
331 */
332 if (dd->ipath_flags & IPATH_LINKACTIVE)
333 ipath_restart_sdma(dd);
334
335 goto done;
336 }
337
338resched:
339 /*
340 * for now, keep spinning
341 * JAG - this is bad to just have default be a loop without
342 * state change
343 */
344 if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
345 ipath_dbg("looping with status 0x%08lx\n",
346 dd->ipath_sdma_status);
347 dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
348 }
349resched_noprint:
350 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
351 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
352 tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
353 return;
354
355unlock:
356 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
357done:
358 return;
359}
360
361/*
362 * This is called from interrupt context.
363 */
364void ipath_sdma_intr(struct ipath_devdata *dd)
365{
366 unsigned long flags;
367
368 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
369
370 (void) ipath_sdma_make_progress(dd);
371
372 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
373}
374
375static int alloc_sdma(struct ipath_devdata *dd)
376{
377 int ret = 0;
378
379 /* Allocate memory for SendDMA descriptor FIFO */
380 dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev,
381 SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL);
382
383 if (!dd->ipath_sdma_descq) {
384 ipath_dev_err(dd, "failed to allocate SendDMA descriptor "
385 "FIFO memory\n");
386 ret = -ENOMEM;
387 goto done;
388 }
389
390 dd->ipath_sdma_descq_cnt =
391 SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc);
392
393 /* Allocate memory for DMA of head register to memory */
394 dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev,
395 PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL);
396 if (!dd->ipath_sdma_head_dma) {
397 ipath_dev_err(dd, "failed to allocate SendDMA head memory\n");
398 ret = -ENOMEM;
399 goto cleanup_descq;
400 }
401 dd->ipath_sdma_head_dma[0] = 0;
402
403 setup_timer(&dd->ipath_sdma_vl15_timer, vl15_watchdog_timeout,
404 (unsigned long)dd);
405
406 atomic_set(&dd->ipath_sdma_vl15_count, 0);
407
408 goto done;
409
410cleanup_descq:
411 dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
412 (void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys);
413 dd->ipath_sdma_descq = NULL;
414 dd->ipath_sdma_descq_phys = 0;
415done:
416 return ret;
417}
418
419int setup_sdma(struct ipath_devdata *dd)
420{
421 int ret = 0;
422 unsigned i, n;
423 u64 tmp64;
424 u64 senddmabufmask[3] = { 0 };
425 unsigned long flags;
426
427 ret = alloc_sdma(dd);
428 if (ret)
429 goto done;
430
431 if (!dd->ipath_sdma_descq) {
432 ipath_dev_err(dd, "SendDMA memory not allocated\n");
433 goto done;
434 }
435
436 /*
437 * Set initial status as if we had been up, then gone down.
438 * This lets initial start on transition to ACTIVE be the
439 * same as restart after link flap.
440 */
441 dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
442 dd->ipath_sdma_abort_jiffies = 0;
443 dd->ipath_sdma_generation = 0;
444 dd->ipath_sdma_descq_tail = 0;
445 dd->ipath_sdma_descq_head = 0;
446 dd->ipath_sdma_descq_removed = 0;
447 dd->ipath_sdma_descq_added = 0;
448
449 /* Set SendDmaBase */
450 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase,
451 dd->ipath_sdma_descq_phys);
452 /* Set SendDmaLenGen */
453 tmp64 = dd->ipath_sdma_descq_cnt;
454 tmp64 |= 1<<18; /* enable generation checking */
455 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64);
456 /* Set SendDmaTail */
457 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail,
458 dd->ipath_sdma_descq_tail);
459 /* Set SendDmaHeadAddr */
460 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
461 dd->ipath_sdma_head_phys);
462
463 /*
464 * Reserve all the former "kernel" piobufs, using high number range
465 * so we get as many 4K buffers as possible
466 */
467 n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
468 i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
469 ipath_chg_pioavailkernel(dd, i, n - i , 0);
470 for (; i < n; ++i) {
471 unsigned word = i / 64;
472 unsigned bit = i & 63;
473 BUG_ON(word >= 3);
474 senddmabufmask[word] |= 1ULL << bit;
475 }
476 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
477 senddmabufmask[0]);
478 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
479 senddmabufmask[1]);
480 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2,
481 senddmabufmask[2]);
482
483 INIT_LIST_HEAD(&dd->ipath_sdma_activelist);
484 INIT_LIST_HEAD(&dd->ipath_sdma_notifylist);
485
486 tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task,
487 (unsigned long) dd);
488 tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
489 (unsigned long) dd);
490
491 /*
492 * No use to turn on SDMA here, as link is probably not ACTIVE
493 * Just mark it RUNNING and enable the interrupt, and let the
494 * ipath_restart_sdma() on link transition to ACTIVE actually
495 * enable it.
496 */
497 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
498 dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
499 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
500 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
501 __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
502 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
503
504done:
505 return ret;
506}
507
508void teardown_sdma(struct ipath_devdata *dd)
509{
510 struct ipath_sdma_txreq *txp, *txpnext;
511 unsigned long flags;
512 dma_addr_t sdma_head_phys = 0;
513 dma_addr_t sdma_descq_phys = 0;
514 void *sdma_descq = NULL;
515 void *sdma_head_dma = NULL;
516
517 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
518 __clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
519 __set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
520 __set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status);
521 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
522
523 tasklet_kill(&dd->ipath_sdma_abort_task);
524 tasklet_kill(&dd->ipath_sdma_notify_task);
525
526 /* turn off sdma */
527 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
528 dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
529 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
530 dd->ipath_sendctrl);
531 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
532 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
533
534 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
535 /* dequeue all "sent" requests */
536 list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist,
537 list) {
538 txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN;
539 if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
540 vl15_watchdog_deq(dd);
541 list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
542 }
543 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
544
545 sdma_notify_taskbody(dd);
546
547 del_timer_sync(&dd->ipath_sdma_vl15_timer);
548
549 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
550
551 dd->ipath_sdma_abort_jiffies = 0;
552
553 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0);
554 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0);
555 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0);
556 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0);
557 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0);
558 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0);
559 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0);
560
561 if (dd->ipath_sdma_head_dma) {
562 sdma_head_dma = (void *) dd->ipath_sdma_head_dma;
563 sdma_head_phys = dd->ipath_sdma_head_phys;
564 dd->ipath_sdma_head_dma = NULL;
565 dd->ipath_sdma_head_phys = 0;
566 }
567
568 if (dd->ipath_sdma_descq) {
569 sdma_descq = dd->ipath_sdma_descq;
570 sdma_descq_phys = dd->ipath_sdma_descq_phys;
571 dd->ipath_sdma_descq = NULL;
572 dd->ipath_sdma_descq_phys = 0;
573 }
574
575 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
576
577 if (sdma_head_dma)
578 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
579 sdma_head_dma, sdma_head_phys);
580
581 if (sdma_descq)
582 dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
583 sdma_descq, sdma_descq_phys);
584}
585
586/*
587 * [Re]start SDMA, if we use it, and it's not already OK.
588 * This is called on transition to link ACTIVE, either the first or
589 * subsequent times.
590 */
591void ipath_restart_sdma(struct ipath_devdata *dd)
592{
593 unsigned long flags;
594 int needed = 1;
595
596 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
597 goto bail;
598
599 /*
600 * First, make sure we should, which is to say,
601 * check that we are "RUNNING" (not in teardown)
602 * and not "SHUTDOWN"
603 */
604 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
605 if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
606 || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
607 needed = 0;
608 else {
609 __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
610 __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
611 __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
612 }
613 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
614 if (!needed) {
615 ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n",
616 dd->ipath_sdma_status);
617 goto bail;
618 }
619 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
620 /*
621 * First clear, just to be safe. Enable is only done
622 * in chip on 0->1 transition
623 */
624 dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
625 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
626 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
627 dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
628 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
629 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
630 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
631
632 /* notify upper layers */
633 ipath_ib_piobufavail(dd->verbs_dev);
634
635bail:
636 return;
637}
638
639static inline void make_sdma_desc(struct ipath_devdata *dd,
640 u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
641{
642 WARN_ON(addr & 3);
643 /* SDmaPhyAddr[47:32] */
644 sdmadesc[1] = addr >> 32;
645 /* SDmaPhyAddr[31:0] */
646 sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
647 /* SDmaGeneration[1:0] */
648 sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30;
649 /* SDmaDwordCount[10:0] */
650 sdmadesc[0] |= (dwlen & 0x7ffULL) << 16;
651 /* SDmaBufOffset[12:2] */
652 sdmadesc[0] |= dwoffset & 0x7ffULL;
653}
654
655/*
656 * This function queues one IB packet onto the send DMA queue per call.
657 * The caller is responsible for checking:
658 * 1) The number of send DMA descriptor entries is less than the size of
659 * the descriptor queue.
660 * 2) The IB SGE addresses and lengths are 32-bit aligned
661 * (except possibly the last SGE's length)
662 * 3) The SGE addresses are suitable for passing to dma_map_single().
663 */
664int ipath_sdma_verbs_send(struct ipath_devdata *dd,
665 struct ipath_sge_state *ss, u32 dwords,
666 struct ipath_verbs_txreq *tx)
667{
668
669 unsigned long flags;
670 struct ipath_sge *sge;
671 int ret = 0;
672 u16 tail;
673 __le64 *descqp;
674 u64 sdmadesc[2];
675 u32 dwoffset;
676 dma_addr_t addr;
677
678 if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) {
679 ipath_dbg("packet size %X > ibmax %X, fail\n",
680 tx->map_len + (dwords<<2), dd->ipath_ibmaxlen);
681 ret = -EMSGSIZE;
682 goto fail;
683 }
684
685 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
686
687retry:
688 if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) {
689 ret = -EBUSY;
690 goto unlock;
691 }
692
693 if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) {
694 if (ipath_sdma_make_progress(dd))
695 goto retry;
696 ret = -ENOBUFS;
697 goto unlock;
698 }
699
700 addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
701 tx->map_len, DMA_TO_DEVICE);
702 if (dma_mapping_error(&dd->pcidev->dev, addr))
703 goto ioerr;
704
705 dwoffset = tx->map_len >> 2;
706 make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
707
708 /* SDmaFirstDesc */
709 sdmadesc[0] |= 1ULL << 12;
710 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
711 sdmadesc[0] |= 1ULL << 14; /* SDmaUseLargeBuf */
712
713 /* write to the descq */
714 tail = dd->ipath_sdma_descq_tail;
715 descqp = &dd->ipath_sdma_descq[tail].qw[0];
716 *descqp++ = cpu_to_le64(sdmadesc[0]);
717 *descqp++ = cpu_to_le64(sdmadesc[1]);
718
719 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC)
720 tx->txreq.start_idx = tail;
721
722 /* increment the tail */
723 if (++tail == dd->ipath_sdma_descq_cnt) {
724 tail = 0;
725 descqp = &dd->ipath_sdma_descq[0].qw[0];
726 ++dd->ipath_sdma_generation;
727 }
728
729 sge = &ss->sge;
730 while (dwords) {
731 u32 dw;
732 u32 len;
733
734 len = dwords << 2;
735 if (len > sge->length)
736 len = sge->length;
737 if (len > sge->sge_length)
738 len = sge->sge_length;
739 BUG_ON(len == 0);
740 dw = (len + 3) >> 2;
741 addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
742 DMA_TO_DEVICE);
743 if (dma_mapping_error(&dd->pcidev->dev, addr))
744 goto unmap;
745 make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
746 /* SDmaUseLargeBuf has to be set in every descriptor */
747 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
748 sdmadesc[0] |= 1ULL << 14;
749 /* write to the descq */
750 *descqp++ = cpu_to_le64(sdmadesc[0]);
751 *descqp++ = cpu_to_le64(sdmadesc[1]);
752
753 /* increment the tail */
754 if (++tail == dd->ipath_sdma_descq_cnt) {
755 tail = 0;
756 descqp = &dd->ipath_sdma_descq[0].qw[0];
757 ++dd->ipath_sdma_generation;
758 }
759 sge->vaddr += len;
760 sge->length -= len;
761 sge->sge_length -= len;
762 if (sge->sge_length == 0) {
763 if (--ss->num_sge)
764 *sge = *ss->sg_list++;
765 } else if (sge->length == 0 && sge->mr != NULL) {
766 if (++sge->n >= IPATH_SEGSZ) {
767 if (++sge->m >= sge->mr->mapsz)
768 break;
769 sge->n = 0;
770 }
771 sge->vaddr =
772 sge->mr->map[sge->m]->segs[sge->n].vaddr;
773 sge->length =
774 sge->mr->map[sge->m]->segs[sge->n].length;
775 }
776
777 dwoffset += dw;
778 dwords -= dw;
779 }
780
781 if (!tail)
782 descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
783 descqp -= 2;
784 /* SDmaLastDesc */
785 descqp[0] |= cpu_to_le64(1ULL << 11);
786 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
787 /* SDmaIntReq */
788 descqp[0] |= cpu_to_le64(1ULL << 15);
789 }
790
791 /* Commit writes to memory and advance the tail on the chip */
792 wmb();
793 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
794
795 tx->txreq.next_descq_idx = tail;
796 tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK;
797 dd->ipath_sdma_descq_tail = tail;
798 dd->ipath_sdma_descq_added += tx->txreq.sg_count;
799 list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
800 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
801 vl15_watchdog_enq(dd);
802 goto unlock;
803
804unmap:
805 while (tail != dd->ipath_sdma_descq_tail) {
806 if (!tail)
807 tail = dd->ipath_sdma_descq_cnt - 1;
808 else
809 tail--;
810 unmap_desc(dd, tail);
811 }
812ioerr:
813 ret = -EIO;
814unlock:
815 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
816fail:
817 return ret;
818}
diff --git a/drivers/staging/rdma/ipath/ipath_srq.c b/drivers/staging/rdma/ipath/ipath_srq.c
deleted file mode 100644
index 26271984b717..000000000000
--- a/drivers/staging/rdma/ipath/ipath_srq.c
+++ /dev/null
@@ -1,380 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/err.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37
38#include "ipath_verbs.h"
39
40/**
41 * ipath_post_srq_receive - post a receive on a shared receive queue
42 * @ibsrq: the SRQ to post the receive on
43 * @wr: the list of work requests to post
44 * @bad_wr: the first WR to cause a problem is put here
45 *
46 * This may be called from interrupt context.
47 */
48int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
49 struct ib_recv_wr **bad_wr)
50{
51 struct ipath_srq *srq = to_isrq(ibsrq);
52 struct ipath_rwq *wq;
53 unsigned long flags;
54 int ret;
55
56 for (; wr; wr = wr->next) {
57 struct ipath_rwqe *wqe;
58 u32 next;
59 int i;
60
61 if ((unsigned) wr->num_sge > srq->rq.max_sge) {
62 *bad_wr = wr;
63 ret = -EINVAL;
64 goto bail;
65 }
66
67 spin_lock_irqsave(&srq->rq.lock, flags);
68 wq = srq->rq.wq;
69 next = wq->head + 1;
70 if (next >= srq->rq.size)
71 next = 0;
72 if (next == wq->tail) {
73 spin_unlock_irqrestore(&srq->rq.lock, flags);
74 *bad_wr = wr;
75 ret = -ENOMEM;
76 goto bail;
77 }
78
79 wqe = get_rwqe_ptr(&srq->rq, wq->head);
80 wqe->wr_id = wr->wr_id;
81 wqe->num_sge = wr->num_sge;
82 for (i = 0; i < wr->num_sge; i++)
83 wqe->sg_list[i] = wr->sg_list[i];
84 /* Make sure queue entry is written before the head index. */
85 smp_wmb();
86 wq->head = next;
87 spin_unlock_irqrestore(&srq->rq.lock, flags);
88 }
89 ret = 0;
90
91bail:
92 return ret;
93}
94
95/**
96 * ipath_create_srq - create a shared receive queue
97 * @ibpd: the protection domain of the SRQ to create
98 * @srq_init_attr: the attributes of the SRQ
99 * @udata: data from libipathverbs when creating a user SRQ
100 */
101struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
102 struct ib_srq_init_attr *srq_init_attr,
103 struct ib_udata *udata)
104{
105 struct ipath_ibdev *dev = to_idev(ibpd->device);
106 struct ipath_srq *srq;
107 u32 sz;
108 struct ib_srq *ret;
109
110 if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
111 ret = ERR_PTR(-ENOSYS);
112 goto done;
113 }
114
115 if (srq_init_attr->attr.max_wr == 0) {
116 ret = ERR_PTR(-EINVAL);
117 goto done;
118 }
119
120 if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
121 (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
122 ret = ERR_PTR(-EINVAL);
123 goto done;
124 }
125
126 srq = kmalloc(sizeof(*srq), GFP_KERNEL);
127 if (!srq) {
128 ret = ERR_PTR(-ENOMEM);
129 goto done;
130 }
131
132 /*
133 * Need to use vmalloc() if we want to support large #s of entries.
134 */
135 srq->rq.size = srq_init_attr->attr.max_wr + 1;
136 srq->rq.max_sge = srq_init_attr->attr.max_sge;
137 sz = sizeof(struct ib_sge) * srq->rq.max_sge +
138 sizeof(struct ipath_rwqe);
139 srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
140 if (!srq->rq.wq) {
141 ret = ERR_PTR(-ENOMEM);
142 goto bail_srq;
143 }
144
145 /*
146 * Return the address of the RWQ as the offset to mmap.
147 * See ipath_mmap() for details.
148 */
149 if (udata && udata->outlen >= sizeof(__u64)) {
150 int err;
151 u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz;
152
153 srq->ip =
154 ipath_create_mmap_info(dev, s,
155 ibpd->uobject->context,
156 srq->rq.wq);
157 if (!srq->ip) {
158 ret = ERR_PTR(-ENOMEM);
159 goto bail_wq;
160 }
161
162 err = ib_copy_to_udata(udata, &srq->ip->offset,
163 sizeof(srq->ip->offset));
164 if (err) {
165 ret = ERR_PTR(err);
166 goto bail_ip;
167 }
168 } else
169 srq->ip = NULL;
170
171 /*
172 * ib_create_srq() will initialize srq->ibsrq.
173 */
174 spin_lock_init(&srq->rq.lock);
175 srq->rq.wq->head = 0;
176 srq->rq.wq->tail = 0;
177 srq->limit = srq_init_attr->attr.srq_limit;
178
179 spin_lock(&dev->n_srqs_lock);
180 if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
181 spin_unlock(&dev->n_srqs_lock);
182 ret = ERR_PTR(-ENOMEM);
183 goto bail_ip;
184 }
185
186 dev->n_srqs_allocated++;
187 spin_unlock(&dev->n_srqs_lock);
188
189 if (srq->ip) {
190 spin_lock_irq(&dev->pending_lock);
191 list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
192 spin_unlock_irq(&dev->pending_lock);
193 }
194
195 ret = &srq->ibsrq;
196 goto done;
197
198bail_ip:
199 kfree(srq->ip);
200bail_wq:
201 vfree(srq->rq.wq);
202bail_srq:
203 kfree(srq);
204done:
205 return ret;
206}
207
208/**
209 * ipath_modify_srq - modify a shared receive queue
210 * @ibsrq: the SRQ to modify
211 * @attr: the new attributes of the SRQ
212 * @attr_mask: indicates which attributes to modify
213 * @udata: user data for ipathverbs.so
214 */
215int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
216 enum ib_srq_attr_mask attr_mask,
217 struct ib_udata *udata)
218{
219 struct ipath_srq *srq = to_isrq(ibsrq);
220 struct ipath_rwq *wq;
221 int ret = 0;
222
223 if (attr_mask & IB_SRQ_MAX_WR) {
224 struct ipath_rwq *owq;
225 struct ipath_rwqe *p;
226 u32 sz, size, n, head, tail;
227
228 /* Check that the requested sizes are below the limits. */
229 if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
230 ((attr_mask & IB_SRQ_LIMIT) ?
231 attr->srq_limit : srq->limit) > attr->max_wr) {
232 ret = -EINVAL;
233 goto bail;
234 }
235
236 sz = sizeof(struct ipath_rwqe) +
237 srq->rq.max_sge * sizeof(struct ib_sge);
238 size = attr->max_wr + 1;
239 wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
240 if (!wq) {
241 ret = -ENOMEM;
242 goto bail;
243 }
244
245 /* Check that we can write the offset to mmap. */
246 if (udata && udata->inlen >= sizeof(__u64)) {
247 __u64 offset_addr;
248 __u64 offset = 0;
249
250 ret = ib_copy_from_udata(&offset_addr, udata,
251 sizeof(offset_addr));
252 if (ret)
253 goto bail_free;
254 udata->outbuf =
255 (void __user *) (unsigned long) offset_addr;
256 ret = ib_copy_to_udata(udata, &offset,
257 sizeof(offset));
258 if (ret)
259 goto bail_free;
260 }
261
262 spin_lock_irq(&srq->rq.lock);
263 /*
264 * validate head pointer value and compute
265 * the number of remaining WQEs.
266 */
267 owq = srq->rq.wq;
268 head = owq->head;
269 if (head >= srq->rq.size)
270 head = 0;
271 tail = owq->tail;
272 if (tail >= srq->rq.size)
273 tail = 0;
274 n = head;
275 if (n < tail)
276 n += srq->rq.size - tail;
277 else
278 n -= tail;
279 if (size <= n) {
280 ret = -EINVAL;
281 goto bail_unlock;
282 }
283 n = 0;
284 p = wq->wq;
285 while (tail != head) {
286 struct ipath_rwqe *wqe;
287 int i;
288
289 wqe = get_rwqe_ptr(&srq->rq, tail);
290 p->wr_id = wqe->wr_id;
291 p->num_sge = wqe->num_sge;
292 for (i = 0; i < wqe->num_sge; i++)
293 p->sg_list[i] = wqe->sg_list[i];
294 n++;
295 p = (struct ipath_rwqe *)((char *) p + sz);
296 if (++tail >= srq->rq.size)
297 tail = 0;
298 }
299 srq->rq.wq = wq;
300 srq->rq.size = size;
301 wq->head = n;
302 wq->tail = 0;
303 if (attr_mask & IB_SRQ_LIMIT)
304 srq->limit = attr->srq_limit;
305 spin_unlock_irq(&srq->rq.lock);
306
307 vfree(owq);
308
309 if (srq->ip) {
310 struct ipath_mmap_info *ip = srq->ip;
311 struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
312 u32 s = sizeof(struct ipath_rwq) + size * sz;
313
314 ipath_update_mmap_info(dev, ip, s, wq);
315
316 /*
317 * Return the offset to mmap.
318 * See ipath_mmap() for details.
319 */
320 if (udata && udata->inlen >= sizeof(__u64)) {
321 ret = ib_copy_to_udata(udata, &ip->offset,
322 sizeof(ip->offset));
323 if (ret)
324 goto bail;
325 }
326
327 spin_lock_irq(&dev->pending_lock);
328 if (list_empty(&ip->pending_mmaps))
329 list_add(&ip->pending_mmaps,
330 &dev->pending_mmaps);
331 spin_unlock_irq(&dev->pending_lock);
332 }
333 } else if (attr_mask & IB_SRQ_LIMIT) {
334 spin_lock_irq(&srq->rq.lock);
335 if (attr->srq_limit >= srq->rq.size)
336 ret = -EINVAL;
337 else
338 srq->limit = attr->srq_limit;
339 spin_unlock_irq(&srq->rq.lock);
340 }
341 goto bail;
342
343bail_unlock:
344 spin_unlock_irq(&srq->rq.lock);
345bail_free:
346 vfree(wq);
347bail:
348 return ret;
349}
350
351int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
352{
353 struct ipath_srq *srq = to_isrq(ibsrq);
354
355 attr->max_wr = srq->rq.size - 1;
356 attr->max_sge = srq->rq.max_sge;
357 attr->srq_limit = srq->limit;
358 return 0;
359}
360
361/**
362 * ipath_destroy_srq - destroy a shared receive queue
363 * @ibsrq: the SRQ to destroy
364 */
365int ipath_destroy_srq(struct ib_srq *ibsrq)
366{
367 struct ipath_srq *srq = to_isrq(ibsrq);
368 struct ipath_ibdev *dev = to_idev(ibsrq->device);
369
370 spin_lock(&dev->n_srqs_lock);
371 dev->n_srqs_allocated--;
372 spin_unlock(&dev->n_srqs_lock);
373 if (srq->ip)
374 kref_put(&srq->ip->ref, ipath_release_mmap_info);
375 else
376 vfree(srq->rq.wq);
377 kfree(srq);
378
379 return 0;
380}
diff --git a/drivers/staging/rdma/ipath/ipath_stats.c b/drivers/staging/rdma/ipath/ipath_stats.c
deleted file mode 100644
index f63e143e3292..000000000000
--- a/drivers/staging/rdma/ipath/ipath_stats.c
+++ /dev/null
@@ -1,347 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "ipath_kernel.h"
35
36struct infinipath_stats ipath_stats;
37
38/**
39 * ipath_snap_cntr - snapshot a chip counter
40 * @dd: the infinipath device
41 * @creg: the counter to snapshot
42 *
43 * called from add_timer and user counter read calls, to deal with
44 * counters that wrap in "human time". The words sent and received, and
45 * the packets sent and received are all that we worry about. For now,
46 * at least, we don't worry about error counters, because if they wrap
47 * that quickly, we probably don't care. We may eventually just make this
48 * handle all the counters. word counters can wrap in about 20 seconds
49 * of full bandwidth traffic, packet counters in a few hours.
50 */
51
52u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
53{
54 u32 val, reg64 = 0;
55 u64 val64;
56 unsigned long t0, t1;
57 u64 ret;
58
59 t0 = jiffies;
60 /* If fast increment counters are only 32 bits, snapshot them,
61 * and maintain them as 64bit values in the driver */
62 if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
63 (creg == dd->ipath_cregs->cr_wordsendcnt ||
64 creg == dd->ipath_cregs->cr_wordrcvcnt ||
65 creg == dd->ipath_cregs->cr_pktsendcnt ||
66 creg == dd->ipath_cregs->cr_pktrcvcnt)) {
67 val64 = ipath_read_creg(dd, creg);
68 val = val64 == ~0ULL ? ~0U : 0;
69 reg64 = 1;
70 } else /* val64 just to keep gcc quiet... */
71 val64 = val = ipath_read_creg32(dd, creg);
72 /*
73 * See if a second has passed. This is just a way to detect things
74 * that are quite broken. Normally this should take just a few
75 * cycles (the check is for long enough that we don't care if we get
76 * pre-empted.) An Opteron HT O read timeout is 4 seconds with
77 * normal NB values
78 */
79 t1 = jiffies;
80 if (time_before(t0 + HZ, t1) && val == -1) {
81 ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n",
82 creg);
83 ret = 0ULL;
84 goto bail;
85 }
86 if (reg64) {
87 ret = val64;
88 goto bail;
89 }
90
91 if (creg == dd->ipath_cregs->cr_wordsendcnt) {
92 if (val != dd->ipath_lastsword) {
93 dd->ipath_sword += val - dd->ipath_lastsword;
94 dd->ipath_lastsword = val;
95 }
96 val64 = dd->ipath_sword;
97 } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
98 if (val != dd->ipath_lastrword) {
99 dd->ipath_rword += val - dd->ipath_lastrword;
100 dd->ipath_lastrword = val;
101 }
102 val64 = dd->ipath_rword;
103 } else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
104 if (val != dd->ipath_lastspkts) {
105 dd->ipath_spkts += val - dd->ipath_lastspkts;
106 dd->ipath_lastspkts = val;
107 }
108 val64 = dd->ipath_spkts;
109 } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
110 if (val != dd->ipath_lastrpkts) {
111 dd->ipath_rpkts += val - dd->ipath_lastrpkts;
112 dd->ipath_lastrpkts = val;
113 }
114 val64 = dd->ipath_rpkts;
115 } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
116 if (dd->ibdeltainprog)
117 val64 -= val64 - dd->ibsymsnap;
118 val64 -= dd->ibsymdelta;
119 } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
120 if (dd->ibdeltainprog)
121 val64 -= val64 - dd->iblnkerrsnap;
122 val64 -= dd->iblnkerrdelta;
123 } else
124 val64 = (u64) val;
125
126 ret = val64;
127
128bail:
129 return ret;
130}
131
132/**
133 * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
134 * @dd: the infinipath device
135 *
136 * print the delta of egrfull/hdrqfull errors for kernel ports no more than
137 * every 5 seconds. User processes are printed at close, but kernel doesn't
138 * close, so... Separate routine so may call from other places someday, and
139 * so function name when printed by _IPATH_INFO is meaningfull
140 */
141static void ipath_qcheck(struct ipath_devdata *dd)
142{
143 static u64 last_tot_hdrqfull;
144 struct ipath_portdata *pd = dd->ipath_pd[0];
145 size_t blen = 0;
146 char buf[128];
147 u32 hdrqtail;
148
149 *buf = 0;
150 if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
151 blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
152 pd->port_hdrqfull -
153 dd->ipath_p0_hdrqfull);
154 dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
155 }
156 if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
157 blen += snprintf(buf + blen, sizeof buf - blen,
158 "%srcvegrfull %llu",
159 blen ? ", " : "",
160 (unsigned long long)
161 (ipath_stats.sps_etidfull -
162 dd->ipath_last_tidfull));
163 dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
164 }
165
166 /*
167 * this is actually the number of hdrq full interrupts, not actual
168 * events, but at the moment that's mostly what I'm interested in.
169 * Actual count, etc. is in the counters, if needed. For production
170 * users this won't ordinarily be printed.
171 */
172
173 if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
174 ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
175 blen += snprintf(buf + blen, sizeof buf - blen,
176 "%shdrqfull %llu (all ports)",
177 blen ? ", " : "",
178 (unsigned long long)
179 (ipath_stats.sps_hdrqfull -
180 last_tot_hdrqfull));
181 last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
182 }
183 if (blen)
184 ipath_dbg("%s\n", buf);
185
186 hdrqtail = ipath_get_hdrqtail(pd);
187 if (pd->port_head != hdrqtail) {
188 if (dd->ipath_lastport0rcv_cnt ==
189 ipath_stats.sps_port0pkts) {
190 ipath_cdbg(PKT, "missing rcv interrupts? "
191 "port0 hd=%x tl=%x; port0pkts %llx; write"
192 " hd (w/intr)\n",
193 pd->port_head, hdrqtail,
194 (unsigned long long)
195 ipath_stats.sps_port0pkts);
196 ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
197 dd->ipath_rhdrhead_intr_off, pd->port_port);
198 }
199 dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
200 }
201}
202
203static void ipath_chk_errormask(struct ipath_devdata *dd)
204{
205 static u32 fixed;
206 u32 ctrl;
207 unsigned long errormask;
208 unsigned long hwerrs;
209
210 if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
211 return;
212
213 errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
214
215 if (errormask == dd->ipath_errormask)
216 return;
217 fixed++;
218
219 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
220 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
221
222 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
223 dd->ipath_errormask);
224
225 if ((hwerrs & dd->ipath_hwerrmask) ||
226 (ctrl & INFINIPATH_C_FREEZEMODE)) {
227 /* force re-interrupt of pending events, just in case */
228 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
229 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
230 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
231 dev_info(&dd->pcidev->dev,
232 "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
233 fixed, errormask, (unsigned long)dd->ipath_errormask,
234 ctrl, hwerrs);
235 } else
236 ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
237 fixed, errormask,
238 (unsigned long)dd->ipath_errormask);
239}
240
241
242/**
243 * ipath_get_faststats - get word counters from chip before they overflow
244 * @opaque - contains a pointer to the infinipath device ipath_devdata
245 *
246 * called from add_timer
247 */
248void ipath_get_faststats(unsigned long opaque)
249{
250 struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
251 int i;
252 static unsigned cnt;
253 unsigned long flags;
254 u64 traffic_wds;
255
256 /*
257 * don't access the chip while running diags, or memory diags can
258 * fail
259 */
260 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
261 ipath_diag_inuse)
262 /* but re-arm the timer, for diags case; won't hurt other */
263 goto done;
264
265 /*
266 * We now try to maintain a "active timer", based on traffic
267 * exceeding a threshold, so we need to check the word-counts
268 * even if they are 64-bit.
269 */
270 traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
271 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
272 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
273 traffic_wds -= dd->ipath_traffic_wds;
274 dd->ipath_traffic_wds += traffic_wds;
275 if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
276 atomic_add(5, &dd->ipath_active_time); /* S/B #define */
277 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
278
279 if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
280 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
281 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
282 }
283
284 ipath_qcheck(dd);
285
286 /*
287 * deal with repeat error suppression. Doesn't really matter if
288 * last error was almost a full interval ago, or just a few usecs
289 * ago; still won't get more than 2 per interval. We may want
290 * longer intervals for this eventually, could do with mod, counter
291 * or separate timer. Also see code in ipath_handle_errors() and
292 * ipath_handle_hwerrors().
293 */
294
295 if (dd->ipath_lasterror)
296 dd->ipath_lasterror = 0;
297 if (dd->ipath_lasthwerror)
298 dd->ipath_lasthwerror = 0;
299 if (dd->ipath_maskederrs
300 && time_after(jiffies, dd->ipath_unmasktime)) {
301 char ebuf[256];
302 int iserr;
303 iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
304 dd->ipath_maskederrs);
305 if (dd->ipath_maskederrs &
306 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
307 INFINIPATH_E_PKTERRS))
308 ipath_dev_err(dd, "Re-enabling masked errors "
309 "(%s)\n", ebuf);
310 else {
311 /*
312 * rcvegrfull and rcvhdrqfull are "normal", for some
313 * types of processes (mostly benchmarks) that send
314 * huge numbers of messages, while not processing
315 * them. So only complain about these at debug
316 * level.
317 */
318 if (iserr)
319 ipath_dbg(
320 "Re-enabling queue full errors (%s)\n",
321 ebuf);
322 else
323 ipath_cdbg(ERRPKT, "Re-enabling packet"
324 " problem interrupt (%s)\n", ebuf);
325 }
326
327 /* re-enable masked errors */
328 dd->ipath_errormask |= dd->ipath_maskederrs;
329 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
330 dd->ipath_errormask);
331 dd->ipath_maskederrs = 0;
332 }
333
334 /* limit qfull messages to ~one per minute per port */
335 if ((++cnt & 0x10)) {
336 for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
337 struct ipath_portdata *pd = dd->ipath_pd[i];
338
339 if (pd && pd->port_lastrcvhdrqtail != -1)
340 pd->port_lastrcvhdrqtail = -1;
341 }
342 }
343
344 ipath_chk_errormask(dd);
345done:
346 mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
347}
diff --git a/drivers/staging/rdma/ipath/ipath_sysfs.c b/drivers/staging/rdma/ipath/ipath_sysfs.c
deleted file mode 100644
index b12b1f6caf59..000000000000
--- a/drivers/staging/rdma/ipath/ipath_sysfs.c
+++ /dev/null
@@ -1,1237 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/ctype.h>
35#include <linux/stat.h>
36
37#include "ipath_kernel.h"
38#include "ipath_verbs.h"
39#include "ipath_common.h"
40
41/**
42 * ipath_parse_ushort - parse an unsigned short value in an arbitrary base
43 * @str: the string containing the number
44 * @valp: where to put the result
45 *
46 * returns the number of bytes consumed, or negative value on error
47 */
48int ipath_parse_ushort(const char *str, unsigned short *valp)
49{
50 unsigned long val;
51 char *end;
52 int ret;
53
54 if (!isdigit(str[0])) {
55 ret = -EINVAL;
56 goto bail;
57 }
58
59 val = simple_strtoul(str, &end, 0);
60
61 if (val > 0xffff) {
62 ret = -EINVAL;
63 goto bail;
64 }
65
66 *valp = val;
67
68 ret = end + 1 - str;
69 if (ret == 0)
70 ret = -EINVAL;
71
72bail:
73 return ret;
74}
75
76static ssize_t show_version(struct device_driver *dev, char *buf)
77{
78 /* The string printed here is already newline-terminated. */
79 return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
80}
81
82static ssize_t show_num_units(struct device_driver *dev, char *buf)
83{
84 return scnprintf(buf, PAGE_SIZE, "%d\n",
85 ipath_count_units(NULL, NULL, NULL));
86}
87
88static ssize_t show_status(struct device *dev,
89 struct device_attribute *attr,
90 char *buf)
91{
92 struct ipath_devdata *dd = dev_get_drvdata(dev);
93 ssize_t ret;
94
95 if (!dd->ipath_statusp) {
96 ret = -EINVAL;
97 goto bail;
98 }
99
100 ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
101 (unsigned long long) *(dd->ipath_statusp));
102
103bail:
104 return ret;
105}
106
107static const char *ipath_status_str[] = {
108 "Initted",
109 "Disabled",
110 "Admin_Disabled",
111 "", /* This used to be the old "OIB_SMA" status. */
112 "", /* This used to be the old "SMA" status. */
113 "Present",
114 "IB_link_up",
115 "IB_configured",
116 "NoIBcable",
117 "Fatal_Hardware_Error",
118 NULL,
119};
120
121static ssize_t show_status_str(struct device *dev,
122 struct device_attribute *attr,
123 char *buf)
124{
125 struct ipath_devdata *dd = dev_get_drvdata(dev);
126 int i, any;
127 u64 s;
128 ssize_t ret;
129
130 if (!dd->ipath_statusp) {
131 ret = -EINVAL;
132 goto bail;
133 }
134
135 s = *(dd->ipath_statusp);
136 *buf = '\0';
137 for (any = i = 0; s && ipath_status_str[i]; i++) {
138 if (s & 1) {
139 if (any && strlcat(buf, " ", PAGE_SIZE) >=
140 PAGE_SIZE)
141 /* overflow */
142 break;
143 if (strlcat(buf, ipath_status_str[i],
144 PAGE_SIZE) >= PAGE_SIZE)
145 break;
146 any = 1;
147 }
148 s >>= 1;
149 }
150 if (any)
151 strlcat(buf, "\n", PAGE_SIZE);
152
153 ret = strlen(buf);
154
155bail:
156 return ret;
157}
158
159static ssize_t show_boardversion(struct device *dev,
160 struct device_attribute *attr,
161 char *buf)
162{
163 struct ipath_devdata *dd = dev_get_drvdata(dev);
164 /* The string printed here is already newline-terminated. */
165 return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
166}
167
168static ssize_t show_localbus_info(struct device *dev,
169 struct device_attribute *attr,
170 char *buf)
171{
172 struct ipath_devdata *dd = dev_get_drvdata(dev);
173 /* The string printed here is already newline-terminated. */
174 return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info);
175}
176
177static ssize_t show_lmc(struct device *dev,
178 struct device_attribute *attr,
179 char *buf)
180{
181 struct ipath_devdata *dd = dev_get_drvdata(dev);
182
183 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
184}
185
186static ssize_t store_lmc(struct device *dev,
187 struct device_attribute *attr,
188 const char *buf,
189 size_t count)
190{
191 struct ipath_devdata *dd = dev_get_drvdata(dev);
192 u16 lmc = 0;
193 int ret;
194
195 ret = ipath_parse_ushort(buf, &lmc);
196 if (ret < 0)
197 goto invalid;
198
199 if (lmc > 7) {
200 ret = -EINVAL;
201 goto invalid;
202 }
203
204 ipath_set_lid(dd, dd->ipath_lid, lmc);
205
206 goto bail;
207invalid:
208 ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
209bail:
210 return ret;
211}
212
213static ssize_t show_lid(struct device *dev,
214 struct device_attribute *attr,
215 char *buf)
216{
217 struct ipath_devdata *dd = dev_get_drvdata(dev);
218
219 return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid);
220}
221
222static ssize_t store_lid(struct device *dev,
223 struct device_attribute *attr,
224 const char *buf,
225 size_t count)
226{
227 struct ipath_devdata *dd = dev_get_drvdata(dev);
228 u16 lid = 0;
229 int ret;
230
231 ret = ipath_parse_ushort(buf, &lid);
232 if (ret < 0)
233 goto invalid;
234
235 if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) {
236 ret = -EINVAL;
237 goto invalid;
238 }
239
240 ipath_set_lid(dd, lid, dd->ipath_lmc);
241
242 goto bail;
243invalid:
244 ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid);
245bail:
246 return ret;
247}
248
249static ssize_t show_mlid(struct device *dev,
250 struct device_attribute *attr,
251 char *buf)
252{
253 struct ipath_devdata *dd = dev_get_drvdata(dev);
254
255 return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid);
256}
257
258static ssize_t store_mlid(struct device *dev,
259 struct device_attribute *attr,
260 const char *buf,
261 size_t count)
262{
263 struct ipath_devdata *dd = dev_get_drvdata(dev);
264 u16 mlid;
265 int ret;
266
267 ret = ipath_parse_ushort(buf, &mlid);
268 if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE)
269 goto invalid;
270
271 dd->ipath_mlid = mlid;
272
273 goto bail;
274invalid:
275 ipath_dev_err(dd, "attempt to set invalid MLID\n");
276bail:
277 return ret;
278}
279
280static ssize_t show_guid(struct device *dev,
281 struct device_attribute *attr,
282 char *buf)
283{
284 struct ipath_devdata *dd = dev_get_drvdata(dev);
285 u8 *guid;
286
287 guid = (u8 *) & (dd->ipath_guid);
288
289 return scnprintf(buf, PAGE_SIZE,
290 "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
291 guid[0], guid[1], guid[2], guid[3],
292 guid[4], guid[5], guid[6], guid[7]);
293}
294
295static ssize_t store_guid(struct device *dev,
296 struct device_attribute *attr,
297 const char *buf,
298 size_t count)
299{
300 struct ipath_devdata *dd = dev_get_drvdata(dev);
301 ssize_t ret;
302 unsigned short guid[8];
303 __be64 new_guid;
304 u8 *ng;
305 int i;
306
307 if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
308 &guid[0], &guid[1], &guid[2], &guid[3],
309 &guid[4], &guid[5], &guid[6], &guid[7]) != 8)
310 goto invalid;
311
312 ng = (u8 *) &new_guid;
313
314 for (i = 0; i < 8; i++) {
315 if (guid[i] > 0xff)
316 goto invalid;
317 ng[i] = guid[i];
318 }
319
320 if (new_guid == 0)
321 goto invalid;
322
323 dd->ipath_guid = new_guid;
324 dd->ipath_nguid = 1;
325 if (dd->verbs_dev)
326 dd->verbs_dev->ibdev.node_guid = new_guid;
327
328 ret = strlen(buf);
329 goto bail;
330
331invalid:
332 ipath_dev_err(dd, "attempt to set invalid GUID\n");
333 ret = -EINVAL;
334
335bail:
336 return ret;
337}
338
339static ssize_t show_nguid(struct device *dev,
340 struct device_attribute *attr,
341 char *buf)
342{
343 struct ipath_devdata *dd = dev_get_drvdata(dev);
344
345 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
346}
347
348static ssize_t show_nports(struct device *dev,
349 struct device_attribute *attr,
350 char *buf)
351{
352 struct ipath_devdata *dd = dev_get_drvdata(dev);
353
354 /* Return the number of user ports available. */
355 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
356}
357
358static ssize_t show_serial(struct device *dev,
359 struct device_attribute *attr,
360 char *buf)
361{
362 struct ipath_devdata *dd = dev_get_drvdata(dev);
363
364 buf[sizeof dd->ipath_serial] = '\0';
365 memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial);
366 strcat(buf, "\n");
367 return strlen(buf);
368}
369
370static ssize_t show_unit(struct device *dev,
371 struct device_attribute *attr,
372 char *buf)
373{
374 struct ipath_devdata *dd = dev_get_drvdata(dev);
375
376 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
377}
378
379static ssize_t show_jint_max_packets(struct device *dev,
380 struct device_attribute *attr,
381 char *buf)
382{
383 struct ipath_devdata *dd = dev_get_drvdata(dev);
384
385 return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets);
386}
387
388static ssize_t store_jint_max_packets(struct device *dev,
389 struct device_attribute *attr,
390 const char *buf,
391 size_t count)
392{
393 struct ipath_devdata *dd = dev_get_drvdata(dev);
394 u16 v = 0;
395 int ret;
396
397 ret = ipath_parse_ushort(buf, &v);
398 if (ret < 0)
399 ipath_dev_err(dd, "invalid jint_max_packets.\n");
400 else
401 dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v);
402
403 return ret;
404}
405
406static ssize_t show_jint_idle_ticks(struct device *dev,
407 struct device_attribute *attr,
408 char *buf)
409{
410 struct ipath_devdata *dd = dev_get_drvdata(dev);
411
412 return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks);
413}
414
415static ssize_t store_jint_idle_ticks(struct device *dev,
416 struct device_attribute *attr,
417 const char *buf,
418 size_t count)
419{
420 struct ipath_devdata *dd = dev_get_drvdata(dev);
421 u16 v = 0;
422 int ret;
423
424 ret = ipath_parse_ushort(buf, &v);
425 if (ret < 0)
426 ipath_dev_err(dd, "invalid jint_idle_ticks.\n");
427 else
428 dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets);
429
430 return ret;
431}
432
433#define DEVICE_COUNTER(name, attr) \
434 static ssize_t show_counter_##name(struct device *dev, \
435 struct device_attribute *attr, \
436 char *buf) \
437 { \
438 struct ipath_devdata *dd = dev_get_drvdata(dev); \
439 return scnprintf(\
440 buf, PAGE_SIZE, "%llu\n", (unsigned long long) \
441 ipath_snap_cntr( \
442 dd, offsetof(struct infinipath_counters, \
443 attr) / sizeof(u64))); \
444 } \
445 static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL);
446
447DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt);
448DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt);
449DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt);
450DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt);
451DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt);
452DEVICE_COUNTER(lb_ints, LBIntCnt);
453DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt);
454DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt);
455DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt);
456DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt);
457DEVICE_COUNTER(rx_dwords, RxDwordCnt);
458DEVICE_COUNTER(rx_ebps, RxEBPCnt);
459DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt);
460DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt);
461DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt);
462DEVICE_COUNTER(rx_len_errs, RxLenErrCnt);
463DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt);
464DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt);
465DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt);
466DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt);
467DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt);
468DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt);
469DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt);
470DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt);
471DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt);
472DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt);
473DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt);
474DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt);
475DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt);
476DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt);
477DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt);
478DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt);
479DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt);
480DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt);
481DEVICE_COUNTER(tx_dwords, TxDwordCnt);
482DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt);
483DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt);
484DEVICE_COUNTER(tx_len_errs, TxLenErrCnt);
485DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt);
486DEVICE_COUNTER(tx_underruns, TxUnderrunCnt);
487DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt);
488
489static struct attribute *dev_counter_attributes[] = {
490 &dev_attr_ib_link_downeds.attr,
491 &dev_attr_ib_link_err_recoveries.attr,
492 &dev_attr_ib_status_changes.attr,
493 &dev_attr_ib_symbol_errs.attr,
494 &dev_attr_lb_flow_stalls.attr,
495 &dev_attr_lb_ints.attr,
496 &dev_attr_rx_bad_formats.attr,
497 &dev_attr_rx_buf_ovfls.attr,
498 &dev_attr_rx_data_pkts.attr,
499 &dev_attr_rx_dropped_pkts.attr,
500 &dev_attr_rx_dwords.attr,
501 &dev_attr_rx_ebps.attr,
502 &dev_attr_rx_flow_ctrl_errs.attr,
503 &dev_attr_rx_flow_pkts.attr,
504 &dev_attr_rx_icrc_errs.attr,
505 &dev_attr_rx_len_errs.attr,
506 &dev_attr_rx_link_problems.attr,
507 &dev_attr_rx_lpcrc_errs.attr,
508 &dev_attr_rx_max_min_len_errs.attr,
509 &dev_attr_rx_p0_hdr_egr_ovfls.attr,
510 &dev_attr_rx_p1_hdr_egr_ovfls.attr,
511 &dev_attr_rx_p2_hdr_egr_ovfls.attr,
512 &dev_attr_rx_p3_hdr_egr_ovfls.attr,
513 &dev_attr_rx_p4_hdr_egr_ovfls.attr,
514 &dev_attr_rx_p5_hdr_egr_ovfls.attr,
515 &dev_attr_rx_p6_hdr_egr_ovfls.attr,
516 &dev_attr_rx_p7_hdr_egr_ovfls.attr,
517 &dev_attr_rx_p8_hdr_egr_ovfls.attr,
518 &dev_attr_rx_pkey_mismatches.attr,
519 &dev_attr_rx_tid_full_errs.attr,
520 &dev_attr_rx_tid_valid_errs.attr,
521 &dev_attr_rx_vcrc_errs.attr,
522 &dev_attr_tx_data_pkts.attr,
523 &dev_attr_tx_dropped_pkts.attr,
524 &dev_attr_tx_dwords.attr,
525 &dev_attr_tx_flow_pkts.attr,
526 &dev_attr_tx_flow_stalls.attr,
527 &dev_attr_tx_len_errs.attr,
528 &dev_attr_tx_max_min_len_errs.attr,
529 &dev_attr_tx_underruns.attr,
530 &dev_attr_tx_unsup_vl_errs.attr,
531 NULL
532};
533
534static struct attribute_group dev_counter_attr_group = {
535 .name = "counters",
536 .attrs = dev_counter_attributes
537};
538
539static ssize_t store_reset(struct device *dev,
540 struct device_attribute *attr,
541 const char *buf,
542 size_t count)
543{
544 struct ipath_devdata *dd = dev_get_drvdata(dev);
545 int ret;
546
547 if (count < 5 || memcmp(buf, "reset", 5)) {
548 ret = -EINVAL;
549 goto bail;
550 }
551
552 if (dd->ipath_flags & IPATH_DISABLED) {
553 /*
554 * post-reset init would re-enable interrupts, etc.
555 * so don't allow reset on disabled devices. Not
556 * perfect error, but about the best choice.
557 */
558 dev_info(dev,"Unit %d is disabled, can't reset\n",
559 dd->ipath_unit);
560 ret = -EINVAL;
561 goto bail;
562 }
563 ret = ipath_reset_device(dd->ipath_unit);
564bail:
565 return ret<0 ? ret : count;
566}
567
568static ssize_t store_link_state(struct device *dev,
569 struct device_attribute *attr,
570 const char *buf,
571 size_t count)
572{
573 struct ipath_devdata *dd = dev_get_drvdata(dev);
574 int ret, r;
575 u16 state;
576
577 ret = ipath_parse_ushort(buf, &state);
578 if (ret < 0)
579 goto invalid;
580
581 r = ipath_set_linkstate(dd, state);
582 if (r < 0) {
583 ret = r;
584 goto bail;
585 }
586
587 goto bail;
588invalid:
589 ipath_dev_err(dd, "attempt to set invalid link state\n");
590bail:
591 return ret;
592}
593
594static ssize_t show_mtu(struct device *dev,
595 struct device_attribute *attr,
596 char *buf)
597{
598 struct ipath_devdata *dd = dev_get_drvdata(dev);
599 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu);
600}
601
602static ssize_t store_mtu(struct device *dev,
603 struct device_attribute *attr,
604 const char *buf,
605 size_t count)
606{
607 struct ipath_devdata *dd = dev_get_drvdata(dev);
608 ssize_t ret;
609 u16 mtu = 0;
610 int r;
611
612 ret = ipath_parse_ushort(buf, &mtu);
613 if (ret < 0)
614 goto invalid;
615
616 r = ipath_set_mtu(dd, mtu);
617 if (r < 0)
618 ret = r;
619
620 goto bail;
621invalid:
622 ipath_dev_err(dd, "attempt to set invalid MTU\n");
623bail:
624 return ret;
625}
626
627static ssize_t show_enabled(struct device *dev,
628 struct device_attribute *attr,
629 char *buf)
630{
631 struct ipath_devdata *dd = dev_get_drvdata(dev);
632 return scnprintf(buf, PAGE_SIZE, "%u\n",
633 (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1);
634}
635
636static ssize_t store_enabled(struct device *dev,
637 struct device_attribute *attr,
638 const char *buf,
639 size_t count)
640{
641 struct ipath_devdata *dd = dev_get_drvdata(dev);
642 ssize_t ret;
643 u16 enable = 0;
644
645 ret = ipath_parse_ushort(buf, &enable);
646 if (ret < 0) {
647 ipath_dev_err(dd, "attempt to use non-numeric on enable\n");
648 goto bail;
649 }
650
651 if (enable) {
652 if (!(dd->ipath_flags & IPATH_DISABLED))
653 goto bail;
654
655 dev_info(dev, "Enabling unit %d\n", dd->ipath_unit);
656 /* same as post-reset */
657 ret = ipath_init_chip(dd, 1);
658 if (ret)
659 ipath_dev_err(dd, "Failed to enable unit %d\n",
660 dd->ipath_unit);
661 else {
662 dd->ipath_flags &= ~IPATH_DISABLED;
663 *dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED;
664 }
665 } else if (!(dd->ipath_flags & IPATH_DISABLED)) {
666 dev_info(dev, "Disabling unit %d\n", dd->ipath_unit);
667 ipath_shutdown_device(dd);
668 dd->ipath_flags |= IPATH_DISABLED;
669 *dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED;
670 }
671
672bail:
673 return ret;
674}
675
676static ssize_t store_rx_pol_inv(struct device *dev,
677 struct device_attribute *attr,
678 const char *buf,
679 size_t count)
680{
681 struct ipath_devdata *dd = dev_get_drvdata(dev);
682 int ret, r;
683 u16 val;
684
685 ret = ipath_parse_ushort(buf, &val);
686 if (ret < 0)
687 goto invalid;
688
689 r = ipath_set_rx_pol_inv(dd, val);
690 if (r < 0) {
691 ret = r;
692 goto bail;
693 }
694
695 goto bail;
696invalid:
697 ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
698bail:
699 return ret;
700}
701
702static ssize_t store_led_override(struct device *dev,
703 struct device_attribute *attr,
704 const char *buf,
705 size_t count)
706{
707 struct ipath_devdata *dd = dev_get_drvdata(dev);
708 int ret;
709 u16 val;
710
711 ret = ipath_parse_ushort(buf, &val);
712 if (ret > 0)
713 ipath_set_led_override(dd, val);
714 else
715 ipath_dev_err(dd, "attempt to set invalid LED override\n");
716 return ret;
717}
718
719static ssize_t show_logged_errs(struct device *dev,
720 struct device_attribute *attr,
721 char *buf)
722{
723 struct ipath_devdata *dd = dev_get_drvdata(dev);
724 int idx, count;
725
726 /* force consistency with actual EEPROM */
727 if (ipath_update_eeprom_log(dd) != 0)
728 return -ENXIO;
729
730 count = 0;
731 for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
732 count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
733 dd->ipath_eep_st_errs[idx],
734 idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
735 }
736
737 return count;
738}
739
740/*
741 * New sysfs entries to control various IB config. These all turn into
742 * accesses via ipath_f_get/set_ib_cfg.
743 *
744 * Get/Set heartbeat enable. Or of 1=enabled, 2=auto
745 */
746static ssize_t show_hrtbt_enb(struct device *dev,
747 struct device_attribute *attr,
748 char *buf)
749{
750 struct ipath_devdata *dd = dev_get_drvdata(dev);
751 int ret;
752
753 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT);
754 if (ret >= 0)
755 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
756 return ret;
757}
758
759static ssize_t store_hrtbt_enb(struct device *dev,
760 struct device_attribute *attr,
761 const char *buf,
762 size_t count)
763{
764 struct ipath_devdata *dd = dev_get_drvdata(dev);
765 int ret, r;
766 u16 val;
767
768 ret = ipath_parse_ushort(buf, &val);
769 if (ret >= 0 && val > 3)
770 ret = -EINVAL;
771 if (ret < 0) {
772 ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
773 goto bail;
774 }
775
776 /*
777 * Set the "intentional" heartbeat enable per either of
778 * "Enable" and "Auto", as these are normally set together.
779 * This bit is consulted when leaving loopback mode,
780 * because entering loopback mode overrides it and automatically
781 * disables heartbeat.
782 */
783 r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val);
784 if (r < 0)
785 ret = r;
786 else if (val == IPATH_IB_HRTBT_OFF)
787 dd->ipath_flags |= IPATH_NO_HRTBT;
788 else
789 dd->ipath_flags &= ~IPATH_NO_HRTBT;
790
791bail:
792 return ret;
793}
794
795/*
796 * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric,
797 * _not_ the particular encoding of any given chip)
798 */
799static ssize_t show_lwid_enb(struct device *dev,
800 struct device_attribute *attr,
801 char *buf)
802{
803 struct ipath_devdata *dd = dev_get_drvdata(dev);
804 int ret;
805
806 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB);
807 if (ret >= 0)
808 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
809 return ret;
810}
811
812static ssize_t store_lwid_enb(struct device *dev,
813 struct device_attribute *attr,
814 const char *buf,
815 size_t count)
816{
817 struct ipath_devdata *dd = dev_get_drvdata(dev);
818 int ret, r;
819 u16 val;
820
821 ret = ipath_parse_ushort(buf, &val);
822 if (ret >= 0 && (val == 0 || val > 3))
823 ret = -EINVAL;
824 if (ret < 0) {
825 ipath_dev_err(dd,
826 "attempt to set invalid Link Width (enable)\n");
827 goto bail;
828 }
829
830 r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val);
831 if (r < 0)
832 ret = r;
833
834bail:
835 return ret;
836}
837
838/* Get current link width */
839static ssize_t show_lwid(struct device *dev,
840 struct device_attribute *attr,
841 char *buf)
842
843{
844 struct ipath_devdata *dd = dev_get_drvdata(dev);
845 int ret;
846
847 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID);
848 if (ret >= 0)
849 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
850 return ret;
851}
852
853/*
854 * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR.
855 */
856static ssize_t show_spd_enb(struct device *dev,
857 struct device_attribute *attr,
858 char *buf)
859{
860 struct ipath_devdata *dd = dev_get_drvdata(dev);
861 int ret;
862
863 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB);
864 if (ret >= 0)
865 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
866 return ret;
867}
868
869static ssize_t store_spd_enb(struct device *dev,
870 struct device_attribute *attr,
871 const char *buf,
872 size_t count)
873{
874 struct ipath_devdata *dd = dev_get_drvdata(dev);
875 int ret, r;
876 u16 val;
877
878 ret = ipath_parse_ushort(buf, &val);
879 if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR)))
880 ret = -EINVAL;
881 if (ret < 0) {
882 ipath_dev_err(dd,
883 "attempt to set invalid Link Speed (enable)\n");
884 goto bail;
885 }
886
887 r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val);
888 if (r < 0)
889 ret = r;
890
891bail:
892 return ret;
893}
894
895/* Get current link speed */
896static ssize_t show_spd(struct device *dev,
897 struct device_attribute *attr,
898 char *buf)
899{
900 struct ipath_devdata *dd = dev_get_drvdata(dev);
901 int ret;
902
903 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD);
904 if (ret >= 0)
905 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
906 return ret;
907}
908
909/*
910 * Get/Set RX polarity-invert enable. 0=no, 1=yes.
911 */
912static ssize_t show_rx_polinv_enb(struct device *dev,
913 struct device_attribute *attr,
914 char *buf)
915{
916 struct ipath_devdata *dd = dev_get_drvdata(dev);
917 int ret;
918
919 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB);
920 if (ret >= 0)
921 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
922 return ret;
923}
924
925static ssize_t store_rx_polinv_enb(struct device *dev,
926 struct device_attribute *attr,
927 const char *buf,
928 size_t count)
929{
930 struct ipath_devdata *dd = dev_get_drvdata(dev);
931 int ret, r;
932 u16 val;
933
934 ret = ipath_parse_ushort(buf, &val);
935 if (ret >= 0 && val > 1) {
936 ipath_dev_err(dd,
937 "attempt to set invalid Rx Polarity (enable)\n");
938 ret = -EINVAL;
939 goto bail;
940 }
941
942 r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
943 if (r < 0)
944 ret = r;
945
946bail:
947 return ret;
948}
949
950/*
951 * Get/Set RX lane-reversal enable. 0=no, 1=yes.
952 */
953static ssize_t show_lanerev_enb(struct device *dev,
954 struct device_attribute *attr,
955 char *buf)
956{
957 struct ipath_devdata *dd = dev_get_drvdata(dev);
958 int ret;
959
960 ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB);
961 if (ret >= 0)
962 ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
963 return ret;
964}
965
966static ssize_t store_lanerev_enb(struct device *dev,
967 struct device_attribute *attr,
968 const char *buf,
969 size_t count)
970{
971 struct ipath_devdata *dd = dev_get_drvdata(dev);
972 int ret, r;
973 u16 val;
974
975 ret = ipath_parse_ushort(buf, &val);
976 if (ret >= 0 && val > 1) {
977 ret = -EINVAL;
978 ipath_dev_err(dd,
979 "attempt to set invalid Lane reversal (enable)\n");
980 goto bail;
981 }
982
983 r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val);
984 if (r < 0)
985 ret = r;
986
987bail:
988 return ret;
989}
990
991static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
992static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
993
994static struct attribute *driver_attributes[] = {
995 &driver_attr_num_units.attr,
996 &driver_attr_version.attr,
997 NULL
998};
999
1000static struct attribute_group driver_attr_group = {
1001 .attrs = driver_attributes
1002};
1003
1004static ssize_t store_tempsense(struct device *dev,
1005 struct device_attribute *attr,
1006 const char *buf,
1007 size_t count)
1008{
1009 struct ipath_devdata *dd = dev_get_drvdata(dev);
1010 int ret, stat;
1011 u16 val;
1012
1013 ret = ipath_parse_ushort(buf, &val);
1014 if (ret <= 0) {
1015 ipath_dev_err(dd, "attempt to set invalid tempsense config\n");
1016 goto bail;
1017 }
1018 /* If anything but the highest limit, enable T_CRIT_A "interrupt" */
1019 stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0);
1020 if (stat) {
1021 ipath_dev_err(dd, "Unable to set tempsense config\n");
1022 ret = -1;
1023 goto bail;
1024 }
1025 stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF));
1026 if (stat) {
1027 ipath_dev_err(dd, "Unable to set local Tcrit\n");
1028 ret = -1;
1029 goto bail;
1030 }
1031 stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8));
1032 if (stat) {
1033 ipath_dev_err(dd, "Unable to set remote Tcrit\n");
1034 ret = -1;
1035 goto bail;
1036 }
1037
1038bail:
1039 return ret;
1040}
1041
1042/*
1043 * dump tempsense regs. in decimal, to ease shell-scripts.
1044 */
1045static ssize_t show_tempsense(struct device *dev,
1046 struct device_attribute *attr,
1047 char *buf)
1048{
1049 struct ipath_devdata *dd = dev_get_drvdata(dev);
1050 int ret;
1051 int idx;
1052 u8 regvals[8];
1053
1054 ret = -ENXIO;
1055 for (idx = 0; idx < 8; ++idx) {
1056 if (idx == 6)
1057 continue;
1058 ret = ipath_tempsense_read(dd, idx);
1059 if (ret < 0)
1060 break;
1061 regvals[idx] = ret;
1062 }
1063 if (idx == 8)
1064 ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
1065 *(signed char *)(regvals),
1066 *(signed char *)(regvals + 1),
1067 regvals[2], regvals[3],
1068 *(signed char *)(regvals + 5),
1069 *(signed char *)(regvals + 7));
1070 return ret;
1071}
1072
1073const struct attribute_group *ipath_driver_attr_groups[] = {
1074 &driver_attr_group,
1075 NULL,
1076};
1077
1078static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
1079static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
1080static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
1081static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
1082static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
1083static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
1084static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
1085static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
1086static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
1087static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
1088static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
1089static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
1090static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
1091static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
1092static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
1093static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
1094static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
1095static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
1096static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
1097static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO,
1098 show_jint_max_packets, store_jint_max_packets);
1099static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO,
1100 show_jint_idle_ticks, store_jint_idle_ticks);
1101static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO,
1102 show_tempsense, store_tempsense);
1103
1104static struct attribute *dev_attributes[] = {
1105 &dev_attr_guid.attr,
1106 &dev_attr_lmc.attr,
1107 &dev_attr_lid.attr,
1108 &dev_attr_link_state.attr,
1109 &dev_attr_mlid.attr,
1110 &dev_attr_mtu.attr,
1111 &dev_attr_nguid.attr,
1112 &dev_attr_nports.attr,
1113 &dev_attr_serial.attr,
1114 &dev_attr_status.attr,
1115 &dev_attr_status_str.attr,
1116 &dev_attr_boardversion.attr,
1117 &dev_attr_unit.attr,
1118 &dev_attr_enabled.attr,
1119 &dev_attr_rx_pol_inv.attr,
1120 &dev_attr_led_override.attr,
1121 &dev_attr_logged_errors.attr,
1122 &dev_attr_tempsense.attr,
1123 &dev_attr_localbus_info.attr,
1124 NULL
1125};
1126
1127static struct attribute_group dev_attr_group = {
1128 .attrs = dev_attributes
1129};
1130
1131static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
1132 store_hrtbt_enb);
1133static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb,
1134 store_lwid_enb);
1135static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL);
1136static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb,
1137 store_spd_enb);
1138static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL);
1139static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb,
1140 store_rx_polinv_enb);
1141static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb,
1142 store_lanerev_enb);
1143
1144static struct attribute *dev_ibcfg_attributes[] = {
1145 &dev_attr_hrtbt_enable.attr,
1146 &dev_attr_link_width_enable.attr,
1147 &dev_attr_link_width.attr,
1148 &dev_attr_link_speed_enable.attr,
1149 &dev_attr_link_speed.attr,
1150 &dev_attr_rx_pol_inv_enable.attr,
1151 &dev_attr_rx_lane_rev_enable.attr,
1152 NULL
1153};
1154
1155static struct attribute_group dev_ibcfg_attr_group = {
1156 .attrs = dev_ibcfg_attributes
1157};
1158
1159/**
1160 * ipath_expose_reset - create a device reset file
1161 * @dev: the device structure
1162 *
1163 * Only expose a file that lets us reset the device after someone
1164 * enters diag mode. A device reset is quite likely to crash the
1165 * machine entirely, so we don't want to normally make it
1166 * available.
1167 *
1168 * Called with ipath_mutex held.
1169 */
1170int ipath_expose_reset(struct device *dev)
1171{
1172 static int exposed;
1173 int ret;
1174
1175 if (!exposed) {
1176 ret = device_create_file(dev, &dev_attr_reset);
1177 exposed = 1;
1178 } else {
1179 ret = 0;
1180 }
1181
1182 return ret;
1183}
1184
1185int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
1186{
1187 int ret;
1188
1189 ret = sysfs_create_group(&dev->kobj, &dev_attr_group);
1190 if (ret)
1191 goto bail;
1192
1193 ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group);
1194 if (ret)
1195 goto bail_attrs;
1196
1197 if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
1198 ret = device_create_file(dev, &dev_attr_jint_idle_ticks);
1199 if (ret)
1200 goto bail_counter;
1201 ret = device_create_file(dev, &dev_attr_jint_max_packets);
1202 if (ret)
1203 goto bail_idle;
1204
1205 ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group);
1206 if (ret)
1207 goto bail_max;
1208 }
1209
1210 return 0;
1211
1212bail_max:
1213 device_remove_file(dev, &dev_attr_jint_max_packets);
1214bail_idle:
1215 device_remove_file(dev, &dev_attr_jint_idle_ticks);
1216bail_counter:
1217 sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
1218bail_attrs:
1219 sysfs_remove_group(&dev->kobj, &dev_attr_group);
1220bail:
1221 return ret;
1222}
1223
1224void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
1225{
1226 sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
1227
1228 if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
1229 sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group);
1230 device_remove_file(dev, &dev_attr_jint_idle_ticks);
1231 device_remove_file(dev, &dev_attr_jint_max_packets);
1232 }
1233
1234 sysfs_remove_group(&dev->kobj, &dev_attr_group);
1235
1236 device_remove_file(dev, &dev_attr_reset);
1237}
diff --git a/drivers/staging/rdma/ipath/ipath_uc.c b/drivers/staging/rdma/ipath/ipath_uc.c
deleted file mode 100644
index 0246b30280b9..000000000000
--- a/drivers/staging/rdma/ipath/ipath_uc.c
+++ /dev/null
@@ -1,547 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "ipath_verbs.h"
35#include "ipath_kernel.h"
36
37/* cut down ridiculously long IB macro names */
38#define OP(x) IB_OPCODE_UC_##x
39
40/**
41 * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
42 * @qp: a pointer to the QP
43 *
44 * Return 1 if constructed; otherwise, return 0.
45 */
46int ipath_make_uc_req(struct ipath_qp *qp)
47{
48 struct ipath_other_headers *ohdr;
49 struct ipath_swqe *wqe;
50 unsigned long flags;
51 u32 hwords;
52 u32 bth0;
53 u32 len;
54 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
55 int ret = 0;
56
57 spin_lock_irqsave(&qp->s_lock, flags);
58
59 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
60 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
61 goto bail;
62 /* We are in the error state, flush the work request. */
63 if (qp->s_last == qp->s_head)
64 goto bail;
65 /* If DMAs are in progress, we can't flush immediately. */
66 if (atomic_read(&qp->s_dma_busy)) {
67 qp->s_flags |= IPATH_S_WAIT_DMA;
68 goto bail;
69 }
70 wqe = get_swqe_ptr(qp, qp->s_last);
71 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
72 goto done;
73 }
74
75 ohdr = &qp->s_hdr.u.oth;
76 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
77 ohdr = &qp->s_hdr.u.l.oth;
78
79 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
80 hwords = 5;
81 bth0 = 1 << 22; /* Set M bit */
82
83 /* Get the next send request. */
84 wqe = get_swqe_ptr(qp, qp->s_cur);
85 qp->s_wqe = NULL;
86 switch (qp->s_state) {
87 default:
88 if (!(ib_ipath_state_ops[qp->state] &
89 IPATH_PROCESS_NEXT_SEND_OK))
90 goto bail;
91 /* Check if send work queue is empty. */
92 if (qp->s_cur == qp->s_head)
93 goto bail;
94 /*
95 * Start a new request.
96 */
97 qp->s_psn = wqe->psn = qp->s_next_psn;
98 qp->s_sge.sge = wqe->sg_list[0];
99 qp->s_sge.sg_list = wqe->sg_list + 1;
100 qp->s_sge.num_sge = wqe->wr.num_sge;
101 qp->s_len = len = wqe->length;
102 switch (wqe->wr.opcode) {
103 case IB_WR_SEND:
104 case IB_WR_SEND_WITH_IMM:
105 if (len > pmtu) {
106 qp->s_state = OP(SEND_FIRST);
107 len = pmtu;
108 break;
109 }
110 if (wqe->wr.opcode == IB_WR_SEND)
111 qp->s_state = OP(SEND_ONLY);
112 else {
113 qp->s_state =
114 OP(SEND_ONLY_WITH_IMMEDIATE);
115 /* Immediate data comes after the BTH */
116 ohdr->u.imm_data = wqe->wr.ex.imm_data;
117 hwords += 1;
118 }
119 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
120 bth0 |= 1 << 23;
121 qp->s_wqe = wqe;
122 if (++qp->s_cur >= qp->s_size)
123 qp->s_cur = 0;
124 break;
125
126 case IB_WR_RDMA_WRITE:
127 case IB_WR_RDMA_WRITE_WITH_IMM:
128 ohdr->u.rc.reth.vaddr =
129 cpu_to_be64(wqe->rdma_wr.remote_addr);
130 ohdr->u.rc.reth.rkey =
131 cpu_to_be32(wqe->rdma_wr.rkey);
132 ohdr->u.rc.reth.length = cpu_to_be32(len);
133 hwords += sizeof(struct ib_reth) / 4;
134 if (len > pmtu) {
135 qp->s_state = OP(RDMA_WRITE_FIRST);
136 len = pmtu;
137 break;
138 }
139 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
140 qp->s_state = OP(RDMA_WRITE_ONLY);
141 else {
142 qp->s_state =
143 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
144 /* Immediate data comes after the RETH */
145 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
146 hwords += 1;
147 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
148 bth0 |= 1 << 23;
149 }
150 qp->s_wqe = wqe;
151 if (++qp->s_cur >= qp->s_size)
152 qp->s_cur = 0;
153 break;
154
155 default:
156 goto bail;
157 }
158 break;
159
160 case OP(SEND_FIRST):
161 qp->s_state = OP(SEND_MIDDLE);
162 /* FALLTHROUGH */
163 case OP(SEND_MIDDLE):
164 len = qp->s_len;
165 if (len > pmtu) {
166 len = pmtu;
167 break;
168 }
169 if (wqe->wr.opcode == IB_WR_SEND)
170 qp->s_state = OP(SEND_LAST);
171 else {
172 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
173 /* Immediate data comes after the BTH */
174 ohdr->u.imm_data = wqe->wr.ex.imm_data;
175 hwords += 1;
176 }
177 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
178 bth0 |= 1 << 23;
179 qp->s_wqe = wqe;
180 if (++qp->s_cur >= qp->s_size)
181 qp->s_cur = 0;
182 break;
183
184 case OP(RDMA_WRITE_FIRST):
185 qp->s_state = OP(RDMA_WRITE_MIDDLE);
186 /* FALLTHROUGH */
187 case OP(RDMA_WRITE_MIDDLE):
188 len = qp->s_len;
189 if (len > pmtu) {
190 len = pmtu;
191 break;
192 }
193 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
194 qp->s_state = OP(RDMA_WRITE_LAST);
195 else {
196 qp->s_state =
197 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
198 /* Immediate data comes after the BTH */
199 ohdr->u.imm_data = wqe->wr.ex.imm_data;
200 hwords += 1;
201 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
202 bth0 |= 1 << 23;
203 }
204 qp->s_wqe = wqe;
205 if (++qp->s_cur >= qp->s_size)
206 qp->s_cur = 0;
207 break;
208 }
209 qp->s_len -= len;
210 qp->s_hdrwords = hwords;
211 qp->s_cur_sge = &qp->s_sge;
212 qp->s_cur_size = len;
213 ipath_make_ruc_header(to_idev(qp->ibqp.device),
214 qp, ohdr, bth0 | (qp->s_state << 24),
215 qp->s_next_psn++ & IPATH_PSN_MASK);
216done:
217 ret = 1;
218 goto unlock;
219
220bail:
221 qp->s_flags &= ~IPATH_S_BUSY;
222unlock:
223 spin_unlock_irqrestore(&qp->s_lock, flags);
224 return ret;
225}
226
227/**
228 * ipath_uc_rcv - handle an incoming UC packet
229 * @dev: the device the packet came in on
230 * @hdr: the header of the packet
231 * @has_grh: true if the packet has a GRH
232 * @data: the packet data
233 * @tlen: the length of the packet
234 * @qp: the QP for this packet.
235 *
236 * This is called from ipath_qp_rcv() to process an incoming UC packet
237 * for the given QP.
238 * Called at interrupt level.
239 */
240void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
241 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
242{
243 struct ipath_other_headers *ohdr;
244 int opcode;
245 u32 hdrsize;
246 u32 psn;
247 u32 pad;
248 struct ib_wc wc;
249 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
250 struct ib_reth *reth;
251 int header_in_data;
252
253 /* Validate the SLID. See Ch. 9.6.1.5 */
254 if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
255 goto done;
256
257 /* Check for GRH */
258 if (!has_grh) {
259 ohdr = &hdr->u.oth;
260 hdrsize = 8 + 12; /* LRH + BTH */
261 psn = be32_to_cpu(ohdr->bth[2]);
262 header_in_data = 0;
263 } else {
264 ohdr = &hdr->u.l.oth;
265 hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
266 /*
267 * The header with GRH is 60 bytes and the
268 * core driver sets the eager header buffer
269 * size to 56 bytes so the last 4 bytes of
270 * the BTH header (PSN) is in the data buffer.
271 */
272 header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
273 if (header_in_data) {
274 psn = be32_to_cpu(((__be32 *) data)[0]);
275 data += sizeof(__be32);
276 } else
277 psn = be32_to_cpu(ohdr->bth[2]);
278 }
279 /*
280 * The opcode is in the low byte when its in network order
281 * (top byte when in host order).
282 */
283 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
284
285 memset(&wc, 0, sizeof wc);
286
287 /* Compare the PSN verses the expected PSN. */
288 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
289 /*
290 * Handle a sequence error.
291 * Silently drop any current message.
292 */
293 qp->r_psn = psn;
294 inv:
295 qp->r_state = OP(SEND_LAST);
296 switch (opcode) {
297 case OP(SEND_FIRST):
298 case OP(SEND_ONLY):
299 case OP(SEND_ONLY_WITH_IMMEDIATE):
300 goto send_first;
301
302 case OP(RDMA_WRITE_FIRST):
303 case OP(RDMA_WRITE_ONLY):
304 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
305 goto rdma_first;
306
307 default:
308 dev->n_pkt_drops++;
309 goto done;
310 }
311 }
312
313 /* Check for opcode sequence errors. */
314 switch (qp->r_state) {
315 case OP(SEND_FIRST):
316 case OP(SEND_MIDDLE):
317 if (opcode == OP(SEND_MIDDLE) ||
318 opcode == OP(SEND_LAST) ||
319 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
320 break;
321 goto inv;
322
323 case OP(RDMA_WRITE_FIRST):
324 case OP(RDMA_WRITE_MIDDLE):
325 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
326 opcode == OP(RDMA_WRITE_LAST) ||
327 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
328 break;
329 goto inv;
330
331 default:
332 if (opcode == OP(SEND_FIRST) ||
333 opcode == OP(SEND_ONLY) ||
334 opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
335 opcode == OP(RDMA_WRITE_FIRST) ||
336 opcode == OP(RDMA_WRITE_ONLY) ||
337 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
338 break;
339 goto inv;
340 }
341
342 /* OK, process the packet. */
343 switch (opcode) {
344 case OP(SEND_FIRST):
345 case OP(SEND_ONLY):
346 case OP(SEND_ONLY_WITH_IMMEDIATE):
347 send_first:
348 if (qp->r_flags & IPATH_R_REUSE_SGE) {
349 qp->r_flags &= ~IPATH_R_REUSE_SGE;
350 qp->r_sge = qp->s_rdma_read_sge;
351 } else if (!ipath_get_rwqe(qp, 0)) {
352 dev->n_pkt_drops++;
353 goto done;
354 }
355 /* Save the WQE so we can reuse it in case of an error. */
356 qp->s_rdma_read_sge = qp->r_sge;
357 qp->r_rcv_len = 0;
358 if (opcode == OP(SEND_ONLY))
359 goto send_last;
360 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
361 goto send_last_imm;
362 /* FALLTHROUGH */
363 case OP(SEND_MIDDLE):
364 /* Check for invalid length PMTU or posted rwqe len. */
365 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
366 qp->r_flags |= IPATH_R_REUSE_SGE;
367 dev->n_pkt_drops++;
368 goto done;
369 }
370 qp->r_rcv_len += pmtu;
371 if (unlikely(qp->r_rcv_len > qp->r_len)) {
372 qp->r_flags |= IPATH_R_REUSE_SGE;
373 dev->n_pkt_drops++;
374 goto done;
375 }
376 ipath_copy_sge(&qp->r_sge, data, pmtu);
377 break;
378
379 case OP(SEND_LAST_WITH_IMMEDIATE):
380 send_last_imm:
381 if (header_in_data) {
382 wc.ex.imm_data = *(__be32 *) data;
383 data += sizeof(__be32);
384 } else {
385 /* Immediate data comes after BTH */
386 wc.ex.imm_data = ohdr->u.imm_data;
387 }
388 hdrsize += 4;
389 wc.wc_flags = IB_WC_WITH_IMM;
390 /* FALLTHROUGH */
391 case OP(SEND_LAST):
392 send_last:
393 /* Get the number of bytes the message was padded by. */
394 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
395 /* Check for invalid length. */
396 /* XXX LAST len should be >= 1 */
397 if (unlikely(tlen < (hdrsize + pad + 4))) {
398 qp->r_flags |= IPATH_R_REUSE_SGE;
399 dev->n_pkt_drops++;
400 goto done;
401 }
402 /* Don't count the CRC. */
403 tlen -= (hdrsize + pad + 4);
404 wc.byte_len = tlen + qp->r_rcv_len;
405 if (unlikely(wc.byte_len > qp->r_len)) {
406 qp->r_flags |= IPATH_R_REUSE_SGE;
407 dev->n_pkt_drops++;
408 goto done;
409 }
410 wc.opcode = IB_WC_RECV;
411 last_imm:
412 ipath_copy_sge(&qp->r_sge, data, tlen);
413 wc.wr_id = qp->r_wr_id;
414 wc.status = IB_WC_SUCCESS;
415 wc.qp = &qp->ibqp;
416 wc.src_qp = qp->remote_qpn;
417 wc.slid = qp->remote_ah_attr.dlid;
418 wc.sl = qp->remote_ah_attr.sl;
419 /* Signal completion event if the solicited bit is set. */
420 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
421 (ohdr->bth[0] &
422 cpu_to_be32(1 << 23)) != 0);
423 break;
424
425 case OP(RDMA_WRITE_FIRST):
426 case OP(RDMA_WRITE_ONLY):
427 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
428 rdma_first:
429 /* RETH comes after BTH */
430 if (!header_in_data)
431 reth = &ohdr->u.rc.reth;
432 else {
433 reth = (struct ib_reth *)data;
434 data += sizeof(*reth);
435 }
436 hdrsize += sizeof(*reth);
437 qp->r_len = be32_to_cpu(reth->length);
438 qp->r_rcv_len = 0;
439 if (qp->r_len != 0) {
440 u32 rkey = be32_to_cpu(reth->rkey);
441 u64 vaddr = be64_to_cpu(reth->vaddr);
442 int ok;
443
444 /* Check rkey */
445 ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
446 vaddr, rkey,
447 IB_ACCESS_REMOTE_WRITE);
448 if (unlikely(!ok)) {
449 dev->n_pkt_drops++;
450 goto done;
451 }
452 } else {
453 qp->r_sge.sg_list = NULL;
454 qp->r_sge.sge.mr = NULL;
455 qp->r_sge.sge.vaddr = NULL;
456 qp->r_sge.sge.length = 0;
457 qp->r_sge.sge.sge_length = 0;
458 }
459 if (unlikely(!(qp->qp_access_flags &
460 IB_ACCESS_REMOTE_WRITE))) {
461 dev->n_pkt_drops++;
462 goto done;
463 }
464 if (opcode == OP(RDMA_WRITE_ONLY))
465 goto rdma_last;
466 else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
467 goto rdma_last_imm;
468 /* FALLTHROUGH */
469 case OP(RDMA_WRITE_MIDDLE):
470 /* Check for invalid length PMTU or posted rwqe len. */
471 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
472 dev->n_pkt_drops++;
473 goto done;
474 }
475 qp->r_rcv_len += pmtu;
476 if (unlikely(qp->r_rcv_len > qp->r_len)) {
477 dev->n_pkt_drops++;
478 goto done;
479 }
480 ipath_copy_sge(&qp->r_sge, data, pmtu);
481 break;
482
483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
484 rdma_last_imm:
485 if (header_in_data) {
486 wc.ex.imm_data = *(__be32 *) data;
487 data += sizeof(__be32);
488 } else {
489 /* Immediate data comes after BTH */
490 wc.ex.imm_data = ohdr->u.imm_data;
491 }
492 hdrsize += 4;
493 wc.wc_flags = IB_WC_WITH_IMM;
494
495 /* Get the number of bytes the message was padded by. */
496 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
497 /* Check for invalid length. */
498 /* XXX LAST len should be >= 1 */
499 if (unlikely(tlen < (hdrsize + pad + 4))) {
500 dev->n_pkt_drops++;
501 goto done;
502 }
503 /* Don't count the CRC. */
504 tlen -= (hdrsize + pad + 4);
505 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
506 dev->n_pkt_drops++;
507 goto done;
508 }
509 if (qp->r_flags & IPATH_R_REUSE_SGE)
510 qp->r_flags &= ~IPATH_R_REUSE_SGE;
511 else if (!ipath_get_rwqe(qp, 1)) {
512 dev->n_pkt_drops++;
513 goto done;
514 }
515 wc.byte_len = qp->r_len;
516 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
517 goto last_imm;
518
519 case OP(RDMA_WRITE_LAST):
520 rdma_last:
521 /* Get the number of bytes the message was padded by. */
522 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
523 /* Check for invalid length. */
524 /* XXX LAST len should be >= 1 */
525 if (unlikely(tlen < (hdrsize + pad + 4))) {
526 dev->n_pkt_drops++;
527 goto done;
528 }
529 /* Don't count the CRC. */
530 tlen -= (hdrsize + pad + 4);
531 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
532 dev->n_pkt_drops++;
533 goto done;
534 }
535 ipath_copy_sge(&qp->r_sge, data, tlen);
536 break;
537
538 default:
539 /* Drop packet for unknown opcodes. */
540 dev->n_pkt_drops++;
541 goto done;
542 }
543 qp->r_psn++;
544 qp->r_state = opcode;
545done:
546 return;
547}
diff --git a/drivers/staging/rdma/ipath/ipath_ud.c b/drivers/staging/rdma/ipath/ipath_ud.c
deleted file mode 100644
index 385d9410a51e..000000000000
--- a/drivers/staging/rdma/ipath/ipath_ud.c
+++ /dev/null
@@ -1,579 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <rdma/ib_smi.h>
35
36#include "ipath_verbs.h"
37#include "ipath_kernel.h"
38
39/**
40 * ipath_ud_loopback - handle send on loopback QPs
41 * @sqp: the sending QP
42 * @swqe: the send work request
43 *
44 * This is called from ipath_make_ud_req() to forward a WQE addressed
45 * to the same HCA.
46 * Note that the receive interrupt handler may be calling ipath_ud_rcv()
47 * while this is being called.
48 */
49static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
50{
51 struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
52 struct ipath_qp *qp;
53 struct ib_ah_attr *ah_attr;
54 unsigned long flags;
55 struct ipath_rq *rq;
56 struct ipath_srq *srq;
57 struct ipath_sge_state rsge;
58 struct ipath_sge *sge;
59 struct ipath_rwq *wq;
60 struct ipath_rwqe *wqe;
61 void (*handler)(struct ib_event *, void *);
62 struct ib_wc wc;
63 u32 tail;
64 u32 rlen;
65 u32 length;
66
67 qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn);
68 if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
69 dev->n_pkt_drops++;
70 goto done;
71 }
72
73 /*
74 * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
75 * Qkeys with the high order bit set mean use the
76 * qkey from the QP context instead of the WR (see 10.2.5).
77 */
78 if (unlikely(qp->ibqp.qp_num &&
79 ((int) swqe->ud_wr.remote_qkey < 0 ?
80 sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) {
81 /* XXX OK to lose a count once in a while. */
82 dev->qkey_violations++;
83 dev->n_pkt_drops++;
84 goto drop;
85 }
86
87 /*
88 * A GRH is expected to precede the data even if not
89 * present on the wire.
90 */
91 length = swqe->length;
92 memset(&wc, 0, sizeof wc);
93 wc.byte_len = length + sizeof(struct ib_grh);
94
95 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
96 wc.wc_flags = IB_WC_WITH_IMM;
97 wc.ex.imm_data = swqe->wr.ex.imm_data;
98 }
99
100 /*
101 * This would be a lot simpler if we could call ipath_get_rwqe()
102 * but that uses state that the receive interrupt handler uses
103 * so we would need to lock out receive interrupts while doing
104 * local loopback.
105 */
106 if (qp->ibqp.srq) {
107 srq = to_isrq(qp->ibqp.srq);
108 handler = srq->ibsrq.event_handler;
109 rq = &srq->rq;
110 } else {
111 srq = NULL;
112 handler = NULL;
113 rq = &qp->r_rq;
114 }
115
116 /*
117 * Get the next work request entry to find where to put the data.
118 * Note that it is safe to drop the lock after changing rq->tail
119 * since ipath_post_receive() won't fill the empty slot.
120 */
121 spin_lock_irqsave(&rq->lock, flags);
122 wq = rq->wq;
123 tail = wq->tail;
124 /* Validate tail before using it since it is user writable. */
125 if (tail >= rq->size)
126 tail = 0;
127 if (unlikely(tail == wq->head)) {
128 spin_unlock_irqrestore(&rq->lock, flags);
129 dev->n_pkt_drops++;
130 goto drop;
131 }
132 wqe = get_rwqe_ptr(rq, tail);
133 rsge.sg_list = qp->r_ud_sg_list;
134 if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
135 spin_unlock_irqrestore(&rq->lock, flags);
136 dev->n_pkt_drops++;
137 goto drop;
138 }
139 /* Silently drop packets which are too big. */
140 if (wc.byte_len > rlen) {
141 spin_unlock_irqrestore(&rq->lock, flags);
142 dev->n_pkt_drops++;
143 goto drop;
144 }
145 if (++tail >= rq->size)
146 tail = 0;
147 wq->tail = tail;
148 wc.wr_id = wqe->wr_id;
149 if (handler) {
150 u32 n;
151
152 /*
153 * validate head pointer value and compute
154 * the number of remaining WQEs.
155 */
156 n = wq->head;
157 if (n >= rq->size)
158 n = 0;
159 if (n < tail)
160 n += rq->size - tail;
161 else
162 n -= tail;
163 if (n < srq->limit) {
164 struct ib_event ev;
165
166 srq->limit = 0;
167 spin_unlock_irqrestore(&rq->lock, flags);
168 ev.device = qp->ibqp.device;
169 ev.element.srq = qp->ibqp.srq;
170 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
171 handler(&ev, srq->ibsrq.srq_context);
172 } else
173 spin_unlock_irqrestore(&rq->lock, flags);
174 } else
175 spin_unlock_irqrestore(&rq->lock, flags);
176
177 ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
178 if (ah_attr->ah_flags & IB_AH_GRH) {
179 ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
180 wc.wc_flags |= IB_WC_GRH;
181 } else
182 ipath_skip_sge(&rsge, sizeof(struct ib_grh));
183 sge = swqe->sg_list;
184 while (length) {
185 u32 len = sge->length;
186
187 if (len > length)
188 len = length;
189 if (len > sge->sge_length)
190 len = sge->sge_length;
191 BUG_ON(len == 0);
192 ipath_copy_sge(&rsge, sge->vaddr, len);
193 sge->vaddr += len;
194 sge->length -= len;
195 sge->sge_length -= len;
196 if (sge->sge_length == 0) {
197 if (--swqe->wr.num_sge)
198 sge++;
199 } else if (sge->length == 0 && sge->mr != NULL) {
200 if (++sge->n >= IPATH_SEGSZ) {
201 if (++sge->m >= sge->mr->mapsz)
202 break;
203 sge->n = 0;
204 }
205 sge->vaddr =
206 sge->mr->map[sge->m]->segs[sge->n].vaddr;
207 sge->length =
208 sge->mr->map[sge->m]->segs[sge->n].length;
209 }
210 length -= len;
211 }
212 wc.status = IB_WC_SUCCESS;
213 wc.opcode = IB_WC_RECV;
214 wc.qp = &qp->ibqp;
215 wc.src_qp = sqp->ibqp.qp_num;
216 /* XXX do we know which pkey matched? Only needed for GSI. */
217 wc.pkey_index = 0;
218 wc.slid = dev->dd->ipath_lid |
219 (ah_attr->src_path_bits &
220 ((1 << dev->dd->ipath_lmc) - 1));
221 wc.sl = ah_attr->sl;
222 wc.dlid_path_bits =
223 ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
224 wc.port_num = 1;
225 /* Signal completion event if the solicited bit is set. */
226 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
227 swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED);
228drop:
229 if (atomic_dec_and_test(&qp->refcount))
230 wake_up(&qp->wait);
231done:;
232}
233
234/**
235 * ipath_make_ud_req - construct a UD request packet
236 * @qp: the QP
237 *
238 * Return 1 if constructed; otherwise, return 0.
239 */
240int ipath_make_ud_req(struct ipath_qp *qp)
241{
242 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
243 struct ipath_other_headers *ohdr;
244 struct ib_ah_attr *ah_attr;
245 struct ipath_swqe *wqe;
246 unsigned long flags;
247 u32 nwords;
248 u32 extra_bytes;
249 u32 bth0;
250 u16 lrh0;
251 u16 lid;
252 int ret = 0;
253 int next_cur;
254
255 spin_lock_irqsave(&qp->s_lock, flags);
256
257 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
258 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
259 goto bail;
260 /* We are in the error state, flush the work request. */
261 if (qp->s_last == qp->s_head)
262 goto bail;
263 /* If DMAs are in progress, we can't flush immediately. */
264 if (atomic_read(&qp->s_dma_busy)) {
265 qp->s_flags |= IPATH_S_WAIT_DMA;
266 goto bail;
267 }
268 wqe = get_swqe_ptr(qp, qp->s_last);
269 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
270 goto done;
271 }
272
273 if (qp->s_cur == qp->s_head)
274 goto bail;
275
276 wqe = get_swqe_ptr(qp, qp->s_cur);
277 next_cur = qp->s_cur + 1;
278 if (next_cur >= qp->s_size)
279 next_cur = 0;
280
281 /* Construct the header. */
282 ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
283 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
284 if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
285 dev->n_multicast_xmit++;
286 else
287 dev->n_unicast_xmit++;
288 } else {
289 dev->n_unicast_xmit++;
290 lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
291 if (unlikely(lid == dev->dd->ipath_lid)) {
292 /*
293 * If DMAs are in progress, we can't generate
294 * a completion for the loopback packet since
295 * it would be out of order.
296 * XXX Instead of waiting, we could queue a
297 * zero length descriptor so we get a callback.
298 */
299 if (atomic_read(&qp->s_dma_busy)) {
300 qp->s_flags |= IPATH_S_WAIT_DMA;
301 goto bail;
302 }
303 qp->s_cur = next_cur;
304 spin_unlock_irqrestore(&qp->s_lock, flags);
305 ipath_ud_loopback(qp, wqe);
306 spin_lock_irqsave(&qp->s_lock, flags);
307 ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
308 goto done;
309 }
310 }
311
312 qp->s_cur = next_cur;
313 extra_bytes = -wqe->length & 3;
314 nwords = (wqe->length + extra_bytes) >> 2;
315
316 /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
317 qp->s_hdrwords = 7;
318 qp->s_cur_size = wqe->length;
319 qp->s_cur_sge = &qp->s_sge;
320 qp->s_dmult = ah_attr->static_rate;
321 qp->s_wqe = wqe;
322 qp->s_sge.sge = wqe->sg_list[0];
323 qp->s_sge.sg_list = wqe->sg_list + 1;
324 qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge;
325
326 if (ah_attr->ah_flags & IB_AH_GRH) {
327 /* Header size in 32-bit words. */
328 qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
329 &ah_attr->grh,
330 qp->s_hdrwords, nwords);
331 lrh0 = IPATH_LRH_GRH;
332 ohdr = &qp->s_hdr.u.l.oth;
333 /*
334 * Don't worry about sending to locally attached multicast
335 * QPs. It is unspecified by the spec. what happens.
336 */
337 } else {
338 /* Header size in 32-bit words. */
339 lrh0 = IPATH_LRH_BTH;
340 ohdr = &qp->s_hdr.u.oth;
341 }
342 if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) {
343 qp->s_hdrwords++;
344 ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data;
345 bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
346 } else
347 bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
348 lrh0 |= ah_attr->sl << 4;
349 if (qp->ibqp.qp_type == IB_QPT_SMI)
350 lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
351 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
352 qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
353 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
354 SIZE_OF_CRC);
355 lid = dev->dd->ipath_lid;
356 if (lid) {
357 lid |= ah_attr->src_path_bits &
358 ((1 << dev->dd->ipath_lmc) - 1);
359 qp->s_hdr.lrh[3] = cpu_to_be16(lid);
360 } else
361 qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
362 if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED)
363 bth0 |= 1 << 23;
364 bth0 |= extra_bytes << 20;
365 bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
366 ipath_get_pkey(dev->dd, qp->s_pkey_index);
367 ohdr->bth[0] = cpu_to_be32(bth0);
368 /*
369 * Use the multicast QP if the destination LID is a multicast LID.
370 */
371 ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
372 ah_attr->dlid != IPATH_PERMISSIVE_LID ?
373 cpu_to_be32(IPATH_MULTICAST_QPN) :
374 cpu_to_be32(wqe->ud_wr.remote_qpn);
375 ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
376 /*
377 * Qkeys with the high order bit set mean use the
378 * qkey from the QP context instead of the WR (see 10.2.5).
379 */
380 ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
381 qp->qkey : wqe->ud_wr.remote_qkey);
382 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
383
384done:
385 ret = 1;
386 goto unlock;
387
388bail:
389 qp->s_flags &= ~IPATH_S_BUSY;
390unlock:
391 spin_unlock_irqrestore(&qp->s_lock, flags);
392 return ret;
393}
394
395/**
396 * ipath_ud_rcv - receive an incoming UD packet
397 * @dev: the device the packet came in on
398 * @hdr: the packet header
399 * @has_grh: true if the packet has a GRH
400 * @data: the packet data
401 * @tlen: the packet length
402 * @qp: the QP the packet came on
403 *
404 * This is called from ipath_qp_rcv() to process an incoming UD packet
405 * for the given QP.
406 * Called at interrupt level.
407 */
408void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
409 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
410{
411 struct ipath_other_headers *ohdr;
412 int opcode;
413 u32 hdrsize;
414 u32 pad;
415 struct ib_wc wc;
416 u32 qkey;
417 u32 src_qp;
418 u16 dlid;
419 int header_in_data;
420
421 /* Check for GRH */
422 if (!has_grh) {
423 ohdr = &hdr->u.oth;
424 hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */
425 qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
426 src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
427 header_in_data = 0;
428 } else {
429 ohdr = &hdr->u.l.oth;
430 hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
431 /*
432 * The header with GRH is 68 bytes and the core driver sets
433 * the eager header buffer size to 56 bytes so the last 12
434 * bytes of the IB header is in the data buffer.
435 */
436 header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
437 if (header_in_data) {
438 qkey = be32_to_cpu(((__be32 *) data)[1]);
439 src_qp = be32_to_cpu(((__be32 *) data)[2]);
440 data += 12;
441 } else {
442 qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
443 src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
444 }
445 }
446 src_qp &= IPATH_QPN_MASK;
447
448 /*
449 * Check that the permissive LID is only used on QP0
450 * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
451 */
452 if (qp->ibqp.qp_num) {
453 if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
454 hdr->lrh[3] == IB_LID_PERMISSIVE)) {
455 dev->n_pkt_drops++;
456 goto bail;
457 }
458 if (unlikely(qkey != qp->qkey)) {
459 /* XXX OK to lose a count once in a while. */
460 dev->qkey_violations++;
461 dev->n_pkt_drops++;
462 goto bail;
463 }
464 } else if (hdr->lrh[1] == IB_LID_PERMISSIVE ||
465 hdr->lrh[3] == IB_LID_PERMISSIVE) {
466 struct ib_smp *smp = (struct ib_smp *) data;
467
468 if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
469 dev->n_pkt_drops++;
470 goto bail;
471 }
472 }
473
474 /*
475 * The opcode is in the low byte when its in network order
476 * (top byte when in host order).
477 */
478 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
479 if (qp->ibqp.qp_num > 1 &&
480 opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
481 if (header_in_data) {
482 wc.ex.imm_data = *(__be32 *) data;
483 data += sizeof(__be32);
484 } else
485 wc.ex.imm_data = ohdr->u.ud.imm_data;
486 wc.wc_flags = IB_WC_WITH_IMM;
487 hdrsize += sizeof(u32);
488 } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
489 wc.ex.imm_data = 0;
490 wc.wc_flags = 0;
491 } else {
492 dev->n_pkt_drops++;
493 goto bail;
494 }
495
496 /* Get the number of bytes the message was padded by. */
497 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
498 if (unlikely(tlen < (hdrsize + pad + 4))) {
499 /* Drop incomplete packets. */
500 dev->n_pkt_drops++;
501 goto bail;
502 }
503 tlen -= hdrsize + pad + 4;
504
505 /* Drop invalid MAD packets (see 13.5.3.1). */
506 if (unlikely((qp->ibqp.qp_num == 0 &&
507 (tlen != 256 ||
508 (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) ||
509 (qp->ibqp.qp_num == 1 &&
510 (tlen != 256 ||
511 (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) {
512 dev->n_pkt_drops++;
513 goto bail;
514 }
515
516 /*
517 * A GRH is expected to precede the data even if not
518 * present on the wire.
519 */
520 wc.byte_len = tlen + sizeof(struct ib_grh);
521
522 /*
523 * Get the next work request entry to find where to put the data.
524 */
525 if (qp->r_flags & IPATH_R_REUSE_SGE)
526 qp->r_flags &= ~IPATH_R_REUSE_SGE;
527 else if (!ipath_get_rwqe(qp, 0)) {
528 /*
529 * Count VL15 packets dropped due to no receive buffer.
530 * Otherwise, count them as buffer overruns since usually,
531 * the HW will be able to receive packets even if there are
532 * no QPs with posted receive buffers.
533 */
534 if (qp->ibqp.qp_num == 0)
535 dev->n_vl15_dropped++;
536 else
537 dev->rcv_errors++;
538 goto bail;
539 }
540 /* Silently drop packets which are too big. */
541 if (wc.byte_len > qp->r_len) {
542 qp->r_flags |= IPATH_R_REUSE_SGE;
543 dev->n_pkt_drops++;
544 goto bail;
545 }
546 if (has_grh) {
547 ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
548 sizeof(struct ib_grh));
549 wc.wc_flags |= IB_WC_GRH;
550 } else
551 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
552 ipath_copy_sge(&qp->r_sge, data,
553 wc.byte_len - sizeof(struct ib_grh));
554 if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
555 goto bail;
556 wc.wr_id = qp->r_wr_id;
557 wc.status = IB_WC_SUCCESS;
558 wc.opcode = IB_WC_RECV;
559 wc.vendor_err = 0;
560 wc.qp = &qp->ibqp;
561 wc.src_qp = src_qp;
562 /* XXX do we know which pkey matched? Only needed for GSI. */
563 wc.pkey_index = 0;
564 wc.slid = be16_to_cpu(hdr->lrh[3]);
565 wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
566 dlid = be16_to_cpu(hdr->lrh[1]);
567 /*
568 * Save the LMC lower bits if the destination LID is a unicast LID.
569 */
570 wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
571 dlid & ((1 << dev->dd->ipath_lmc) - 1);
572 wc.port_num = 1;
573 /* Signal completion event if the solicited bit is set. */
574 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
575 (ohdr->bth[0] &
576 cpu_to_be32(1 << 23)) != 0);
577
578bail:;
579}
diff --git a/drivers/staging/rdma/ipath/ipath_user_pages.c b/drivers/staging/rdma/ipath/ipath_user_pages.c
deleted file mode 100644
index d29b4daf61f8..000000000000
--- a/drivers/staging/rdma/ipath/ipath_user_pages.c
+++ /dev/null
@@ -1,228 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/mm.h>
35#include <linux/device.h>
36#include <linux/slab.h>
37
38#include "ipath_kernel.h"
39
40static void __ipath_release_user_pages(struct page **p, size_t num_pages,
41 int dirty)
42{
43 size_t i;
44
45 for (i = 0; i < num_pages; i++) {
46 ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i,
47 (unsigned long) num_pages, p[i]);
48 if (dirty)
49 set_page_dirty_lock(p[i]);
50 put_page(p[i]);
51 }
52}
53
54/* call with current->mm->mmap_sem held */
55static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
56 struct page **p)
57{
58 unsigned long lock_limit;
59 size_t got;
60 int ret;
61
62 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
63
64 if (num_pages > lock_limit) {
65 ret = -ENOMEM;
66 goto bail;
67 }
68
69 ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n",
70 (unsigned long) num_pages, start_page);
71
72 for (got = 0; got < num_pages; got += ret) {
73 ret = get_user_pages(current, current->mm,
74 start_page + got * PAGE_SIZE,
75 num_pages - got, 1, 1,
76 p + got, NULL);
77 if (ret < 0)
78 goto bail_release;
79 }
80
81 current->mm->pinned_vm += num_pages;
82
83 ret = 0;
84 goto bail;
85
86bail_release:
87 __ipath_release_user_pages(p, got, 0);
88bail:
89 return ret;
90}
91
92/**
93 * ipath_map_page - a safety wrapper around pci_map_page()
94 *
95 * A dma_addr of all 0's is interpreted by the chip as "disabled".
96 * Unfortunately, it can also be a valid dma_addr returned on some
97 * architectures.
98 *
99 * The powerpc iommu assigns dma_addrs in ascending order, so we don't
100 * have to bother with retries or mapping a dummy page to insure we
101 * don't just get the same mapping again.
102 *
103 * I'm sure we won't be so lucky with other iommu's, so FIXME.
104 */
105dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
106 unsigned long offset, size_t size, int direction)
107{
108 dma_addr_t phys;
109
110 phys = pci_map_page(hwdev, page, offset, size, direction);
111
112 if (phys == 0) {
113 pci_unmap_page(hwdev, phys, size, direction);
114 phys = pci_map_page(hwdev, page, offset, size, direction);
115 /*
116 * FIXME: If we get 0 again, we should keep this page,
117 * map another, then free the 0 page.
118 */
119 }
120
121 return phys;
122}
123
124/**
125 * ipath_map_single - a safety wrapper around pci_map_single()
126 *
127 * Same idea as ipath_map_page().
128 */
129dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
130 int direction)
131{
132 dma_addr_t phys;
133
134 phys = pci_map_single(hwdev, ptr, size, direction);
135
136 if (phys == 0) {
137 pci_unmap_single(hwdev, phys, size, direction);
138 phys = pci_map_single(hwdev, ptr, size, direction);
139 /*
140 * FIXME: If we get 0 again, we should keep this page,
141 * map another, then free the 0 page.
142 */
143 }
144
145 return phys;
146}
147
148/**
149 * ipath_get_user_pages - lock user pages into memory
150 * @start_page: the start page
151 * @num_pages: the number of pages
152 * @p: the output page structures
153 *
154 * This function takes a given start page (page aligned user virtual
155 * address) and pins it and the following specified number of pages. For
156 * now, num_pages is always 1, but that will probably change at some point
157 * (because caller is doing expected sends on a single virtually contiguous
158 * buffer, so we can do all pages at once).
159 */
160int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
161 struct page **p)
162{
163 int ret;
164
165 down_write(&current->mm->mmap_sem);
166
167 ret = __ipath_get_user_pages(start_page, num_pages, p);
168
169 up_write(&current->mm->mmap_sem);
170
171 return ret;
172}
173
174void ipath_release_user_pages(struct page **p, size_t num_pages)
175{
176 down_write(&current->mm->mmap_sem);
177
178 __ipath_release_user_pages(p, num_pages, 1);
179
180 current->mm->pinned_vm -= num_pages;
181
182 up_write(&current->mm->mmap_sem);
183}
184
185struct ipath_user_pages_work {
186 struct work_struct work;
187 struct mm_struct *mm;
188 unsigned long num_pages;
189};
190
191static void user_pages_account(struct work_struct *_work)
192{
193 struct ipath_user_pages_work *work =
194 container_of(_work, struct ipath_user_pages_work, work);
195
196 down_write(&work->mm->mmap_sem);
197 work->mm->pinned_vm -= work->num_pages;
198 up_write(&work->mm->mmap_sem);
199 mmput(work->mm);
200 kfree(work);
201}
202
203void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
204{
205 struct ipath_user_pages_work *work;
206 struct mm_struct *mm;
207
208 __ipath_release_user_pages(p, num_pages, 1);
209
210 mm = get_task_mm(current);
211 if (!mm)
212 return;
213
214 work = kmalloc(sizeof(*work), GFP_KERNEL);
215 if (!work)
216 goto bail_mm;
217
218 INIT_WORK(&work->work, user_pages_account);
219 work->mm = mm;
220 work->num_pages = num_pages;
221
222 queue_work(ib_wq, &work->work);
223 return;
224
225bail_mm:
226 mmput(mm);
227 return;
228}
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.c b/drivers/staging/rdma/ipath/ipath_user_sdma.c
deleted file mode 100644
index 8c12e3cccc58..000000000000
--- a/drivers/staging/rdma/ipath/ipath_user_sdma.c
+++ /dev/null
@@ -1,874 +0,0 @@
1/*
2 * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <linux/mm.h>
33#include <linux/types.h>
34#include <linux/device.h>
35#include <linux/dmapool.h>
36#include <linux/slab.h>
37#include <linux/list.h>
38#include <linux/highmem.h>
39#include <linux/io.h>
40#include <linux/uio.h>
41#include <linux/rbtree.h>
42#include <linux/spinlock.h>
43#include <linux/delay.h>
44
45#include "ipath_kernel.h"
46#include "ipath_user_sdma.h"
47
48/* minimum size of header */
49#define IPATH_USER_SDMA_MIN_HEADER_LENGTH 64
50/* expected size of headers (for dma_pool) */
51#define IPATH_USER_SDMA_EXP_HEADER_LENGTH 64
52/* length mask in PBC (lower 11 bits) */
53#define IPATH_PBC_LENGTH_MASK ((1 << 11) - 1)
54
55struct ipath_user_sdma_pkt {
56 u8 naddr; /* dimension of addr (1..3) ... */
57 u32 counter; /* sdma pkts queued counter for this entry */
58 u64 added; /* global descq number of entries */
59
60 struct {
61 u32 offset; /* offset for kvaddr, addr */
62 u32 length; /* length in page */
63 u8 put_page; /* should we put_page? */
64 u8 dma_mapped; /* is page dma_mapped? */
65 struct page *page; /* may be NULL (coherent mem) */
66 void *kvaddr; /* FIXME: only for pio hack */
67 dma_addr_t addr;
68 } addr[4]; /* max pages, any more and we coalesce */
69 struct list_head list; /* list element */
70};
71
72struct ipath_user_sdma_queue {
73 /*
74 * pkts sent to dma engine are queued on this
75 * list head. the type of the elements of this
76 * list are struct ipath_user_sdma_pkt...
77 */
78 struct list_head sent;
79
80 /* headers with expected length are allocated from here... */
81 char header_cache_name[64];
82 struct dma_pool *header_cache;
83
84 /* packets are allocated from the slab cache... */
85 char pkt_slab_name[64];
86 struct kmem_cache *pkt_slab;
87
88 /* as packets go on the queued queue, they are counted... */
89 u32 counter;
90 u32 sent_counter;
91
92 /* dma page table */
93 struct rb_root dma_pages_root;
94
95 /* protect everything above... */
96 struct mutex lock;
97};
98
99struct ipath_user_sdma_queue *
100ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport)
101{
102 struct ipath_user_sdma_queue *pq =
103 kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL);
104
105 if (!pq)
106 goto done;
107
108 pq->counter = 0;
109 pq->sent_counter = 0;
110 INIT_LIST_HEAD(&pq->sent);
111
112 mutex_init(&pq->lock);
113
114 snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
115 "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport);
116 pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
117 sizeof(struct ipath_user_sdma_pkt),
118 0, 0, NULL);
119
120 if (!pq->pkt_slab)
121 goto err_kfree;
122
123 snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
124 "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport);
125 pq->header_cache = dma_pool_create(pq->header_cache_name,
126 dev,
127 IPATH_USER_SDMA_EXP_HEADER_LENGTH,
128 4, 0);
129 if (!pq->header_cache)
130 goto err_slab;
131
132 pq->dma_pages_root = RB_ROOT;
133
134 goto done;
135
136err_slab:
137 kmem_cache_destroy(pq->pkt_slab);
138err_kfree:
139 kfree(pq);
140 pq = NULL;
141
142done:
143 return pq;
144}
145
146static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt,
147 int i, size_t offset, size_t len,
148 int put_page, int dma_mapped,
149 struct page *page,
150 void *kvaddr, dma_addr_t dma_addr)
151{
152 pkt->addr[i].offset = offset;
153 pkt->addr[i].length = len;
154 pkt->addr[i].put_page = put_page;
155 pkt->addr[i].dma_mapped = dma_mapped;
156 pkt->addr[i].page = page;
157 pkt->addr[i].kvaddr = kvaddr;
158 pkt->addr[i].addr = dma_addr;
159}
160
161static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt,
162 u32 counter, size_t offset,
163 size_t len, int dma_mapped,
164 struct page *page,
165 void *kvaddr, dma_addr_t dma_addr)
166{
167 pkt->naddr = 1;
168 pkt->counter = counter;
169 ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
170 kvaddr, dma_addr);
171}
172
173/* we've too many pages in the iovec, coalesce to a single page */
174static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
175 struct ipath_user_sdma_pkt *pkt,
176 const struct iovec *iov,
177 unsigned long niov) {
178 int ret = 0;
179 struct page *page = alloc_page(GFP_KERNEL);
180 void *mpage_save;
181 char *mpage;
182 int i;
183 int len = 0;
184 dma_addr_t dma_addr;
185
186 if (!page) {
187 ret = -ENOMEM;
188 goto done;
189 }
190
191 mpage = kmap(page);
192 mpage_save = mpage;
193 for (i = 0; i < niov; i++) {
194 int cfur;
195
196 cfur = copy_from_user(mpage,
197 iov[i].iov_base, iov[i].iov_len);
198 if (cfur) {
199 ret = -EFAULT;
200 goto free_unmap;
201 }
202
203 mpage += iov[i].iov_len;
204 len += iov[i].iov_len;
205 }
206
207 dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
208 DMA_TO_DEVICE);
209 if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
210 ret = -ENOMEM;
211 goto free_unmap;
212 }
213
214 ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
215 dma_addr);
216 pkt->naddr = 2;
217
218 goto done;
219
220free_unmap:
221 kunmap(page);
222 __free_page(page);
223done:
224 return ret;
225}
226
227/* how many pages in this iovec element? */
228static int ipath_user_sdma_num_pages(const struct iovec *iov)
229{
230 const unsigned long addr = (unsigned long) iov->iov_base;
231 const unsigned long len = iov->iov_len;
232 const unsigned long spage = addr & PAGE_MASK;
233 const unsigned long epage = (addr + len - 1) & PAGE_MASK;
234
235 return 1 + ((epage - spage) >> PAGE_SHIFT);
236}
237
238/* truncate length to page boundary */
239static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
240{
241 const unsigned long offset = offset_in_page(addr);
242
243 return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
244}
245
246static void ipath_user_sdma_free_pkt_frag(struct device *dev,
247 struct ipath_user_sdma_queue *pq,
248 struct ipath_user_sdma_pkt *pkt,
249 int frag)
250{
251 const int i = frag;
252
253 if (pkt->addr[i].page) {
254 if (pkt->addr[i].dma_mapped)
255 dma_unmap_page(dev,
256 pkt->addr[i].addr,
257 pkt->addr[i].length,
258 DMA_TO_DEVICE);
259
260 if (pkt->addr[i].kvaddr)
261 kunmap(pkt->addr[i].page);
262
263 if (pkt->addr[i].put_page)
264 put_page(pkt->addr[i].page);
265 else
266 __free_page(pkt->addr[i].page);
267 } else if (pkt->addr[i].kvaddr)
268 /* free coherent mem from cache... */
269 dma_pool_free(pq->header_cache,
270 pkt->addr[i].kvaddr, pkt->addr[i].addr);
271}
272
273/* return number of pages pinned... */
274static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
275 struct ipath_user_sdma_pkt *pkt,
276 unsigned long addr, int tlen, int npages)
277{
278 struct page *pages[2];
279 int j;
280 int ret;
281
282 ret = get_user_pages_fast(addr, npages, 0, pages);
283 if (ret != npages) {
284 int i;
285
286 for (i = 0; i < ret; i++)
287 put_page(pages[i]);
288
289 ret = -ENOMEM;
290 goto done;
291 }
292
293 for (j = 0; j < npages; j++) {
294 /* map the pages... */
295 const int flen =
296 ipath_user_sdma_page_length(addr, tlen);
297 dma_addr_t dma_addr =
298 dma_map_page(&dd->pcidev->dev,
299 pages[j], 0, flen, DMA_TO_DEVICE);
300 unsigned long fofs = offset_in_page(addr);
301
302 if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
303 ret = -ENOMEM;
304 goto done;
305 }
306
307 ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
308 pages[j], kmap(pages[j]),
309 dma_addr);
310
311 pkt->naddr++;
312 addr += flen;
313 tlen -= flen;
314 }
315
316done:
317 return ret;
318}
319
320static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd,
321 struct ipath_user_sdma_queue *pq,
322 struct ipath_user_sdma_pkt *pkt,
323 const struct iovec *iov,
324 unsigned long niov)
325{
326 int ret = 0;
327 unsigned long idx;
328
329 for (idx = 0; idx < niov; idx++) {
330 const int npages = ipath_user_sdma_num_pages(iov + idx);
331 const unsigned long addr = (unsigned long) iov[idx].iov_base;
332
333 ret = ipath_user_sdma_pin_pages(dd, pkt,
334 addr, iov[idx].iov_len,
335 npages);
336 if (ret < 0)
337 goto free_pkt;
338 }
339
340 goto done;
341
342free_pkt:
343 for (idx = 0; idx < pkt->naddr; idx++)
344 ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
345
346done:
347 return ret;
348}
349
350static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd,
351 struct ipath_user_sdma_queue *pq,
352 struct ipath_user_sdma_pkt *pkt,
353 const struct iovec *iov,
354 unsigned long niov, int npages)
355{
356 int ret = 0;
357
358 if (npages >= ARRAY_SIZE(pkt->addr))
359 ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov);
360 else
361 ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
362
363 return ret;
364}
365
366/* free a packet list -- return counter value of last packet */
367static void ipath_user_sdma_free_pkt_list(struct device *dev,
368 struct ipath_user_sdma_queue *pq,
369 struct list_head *list)
370{
371 struct ipath_user_sdma_pkt *pkt, *pkt_next;
372
373 list_for_each_entry_safe(pkt, pkt_next, list, list) {
374 int i;
375
376 for (i = 0; i < pkt->naddr; i++)
377 ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i);
378
379 kmem_cache_free(pq->pkt_slab, pkt);
380 }
381}
382
383/*
384 * copy headers, coalesce etc -- pq->lock must be held
385 *
386 * we queue all the packets to list, returning the
387 * number of bytes total. list must be empty initially,
388 * as, if there is an error we clean it...
389 */
390static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
391 struct ipath_user_sdma_queue *pq,
392 struct list_head *list,
393 const struct iovec *iov,
394 unsigned long niov,
395 int maxpkts)
396{
397 unsigned long idx = 0;
398 int ret = 0;
399 int npkts = 0;
400 struct page *page = NULL;
401 __le32 *pbc;
402 dma_addr_t dma_addr;
403 struct ipath_user_sdma_pkt *pkt = NULL;
404 size_t len;
405 size_t nw;
406 u32 counter = pq->counter;
407 int dma_mapped = 0;
408
409 while (idx < niov && npkts < maxpkts) {
410 const unsigned long addr = (unsigned long) iov[idx].iov_base;
411 const unsigned long idx_save = idx;
412 unsigned pktnw;
413 unsigned pktnwc;
414 int nfrags = 0;
415 int npages = 0;
416 int cfur;
417
418 dma_mapped = 0;
419 len = iov[idx].iov_len;
420 nw = len >> 2;
421 page = NULL;
422
423 pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
424 if (!pkt) {
425 ret = -ENOMEM;
426 goto free_list;
427 }
428
429 if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH ||
430 len > PAGE_SIZE || len & 3 || addr & 3) {
431 ret = -EINVAL;
432 goto free_pkt;
433 }
434
435 if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH)
436 pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
437 &dma_addr);
438 else
439 pbc = NULL;
440
441 if (!pbc) {
442 page = alloc_page(GFP_KERNEL);
443 if (!page) {
444 ret = -ENOMEM;
445 goto free_pkt;
446 }
447 pbc = kmap(page);
448 }
449
450 cfur = copy_from_user(pbc, iov[idx].iov_base, len);
451 if (cfur) {
452 ret = -EFAULT;
453 goto free_pbc;
454 }
455
456 /*
457 * this assignment is a bit strange. it's because the
458 * the pbc counts the number of 32 bit words in the full
459 * packet _except_ the first word of the pbc itself...
460 */
461 pktnwc = nw - 1;
462
463 /*
464 * pktnw computation yields the number of 32 bit words
465 * that the caller has indicated in the PBC. note that
466 * this is one less than the total number of words that
467 * goes to the send DMA engine as the first 32 bit word
468 * of the PBC itself is not counted. Armed with this count,
469 * we can verify that the packet is consistent with the
470 * iovec lengths.
471 */
472 pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK;
473 if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
474 ret = -EINVAL;
475 goto free_pbc;
476 }
477
478
479 idx++;
480 while (pktnwc < pktnw && idx < niov) {
481 const size_t slen = iov[idx].iov_len;
482 const unsigned long faddr =
483 (unsigned long) iov[idx].iov_base;
484
485 if (slen & 3 || faddr & 3 || !slen ||
486 slen > PAGE_SIZE) {
487 ret = -EINVAL;
488 goto free_pbc;
489 }
490
491 npages++;
492 if ((faddr & PAGE_MASK) !=
493 ((faddr + slen - 1) & PAGE_MASK))
494 npages++;
495
496 pktnwc += slen >> 2;
497 idx++;
498 nfrags++;
499 }
500
501 if (pktnwc != pktnw) {
502 ret = -EINVAL;
503 goto free_pbc;
504 }
505
506 if (page) {
507 dma_addr = dma_map_page(&dd->pcidev->dev,
508 page, 0, len, DMA_TO_DEVICE);
509 if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
510 ret = -ENOMEM;
511 goto free_pbc;
512 }
513
514 dma_mapped = 1;
515 }
516
517 ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
518 page, pbc, dma_addr);
519
520 if (nfrags) {
521 ret = ipath_user_sdma_init_payload(dd, pq, pkt,
522 iov + idx_save + 1,
523 nfrags, npages);
524 if (ret < 0)
525 goto free_pbc_dma;
526 }
527
528 counter++;
529 npkts++;
530
531 list_add_tail(&pkt->list, list);
532 }
533
534 ret = idx;
535 goto done;
536
537free_pbc_dma:
538 if (dma_mapped)
539 dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
540free_pbc:
541 if (page) {
542 kunmap(page);
543 __free_page(page);
544 } else
545 dma_pool_free(pq->header_cache, pbc, dma_addr);
546free_pkt:
547 kmem_cache_free(pq->pkt_slab, pkt);
548free_list:
549 ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
550done:
551 return ret;
552}
553
554static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq,
555 u32 c)
556{
557 pq->sent_counter = c;
558}
559
560/* try to clean out queue -- needs pq->lock */
561static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd,
562 struct ipath_user_sdma_queue *pq)
563{
564 struct list_head free_list;
565 struct ipath_user_sdma_pkt *pkt;
566 struct ipath_user_sdma_pkt *pkt_prev;
567 int ret = 0;
568
569 INIT_LIST_HEAD(&free_list);
570
571 list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
572 s64 descd = dd->ipath_sdma_descq_removed - pkt->added;
573
574 if (descd < 0)
575 break;
576
577 list_move_tail(&pkt->list, &free_list);
578
579 /* one more packet cleaned */
580 ret++;
581 }
582
583 if (!list_empty(&free_list)) {
584 u32 counter;
585
586 pkt = list_entry(free_list.prev,
587 struct ipath_user_sdma_pkt, list);
588 counter = pkt->counter;
589
590 ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
591 ipath_user_sdma_set_complete_counter(pq, counter);
592 }
593
594 return ret;
595}
596
597void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq)
598{
599 if (!pq)
600 return;
601
602 kmem_cache_destroy(pq->pkt_slab);
603 dma_pool_destroy(pq->header_cache);
604 kfree(pq);
605}
606
607/* clean descriptor queue, returns > 0 if some elements cleaned */
608static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd)
609{
610 int ret;
611 unsigned long flags;
612
613 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
614 ret = ipath_sdma_make_progress(dd);
615 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
616
617 return ret;
618}
619
620/* we're in close, drain packets so that we can cleanup successfully... */
621void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
622 struct ipath_user_sdma_queue *pq)
623{
624 int i;
625
626 if (!pq)
627 return;
628
629 for (i = 0; i < 100; i++) {
630 mutex_lock(&pq->lock);
631 if (list_empty(&pq->sent)) {
632 mutex_unlock(&pq->lock);
633 break;
634 }
635 ipath_user_sdma_hwqueue_clean(dd);
636 ipath_user_sdma_queue_clean(dd, pq);
637 mutex_unlock(&pq->lock);
638 msleep(10);
639 }
640
641 if (!list_empty(&pq->sent)) {
642 struct list_head free_list;
643
644 printk(KERN_INFO "drain: lists not empty: forcing!\n");
645 INIT_LIST_HEAD(&free_list);
646 mutex_lock(&pq->lock);
647 list_splice_init(&pq->sent, &free_list);
648 ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
649 mutex_unlock(&pq->lock);
650 }
651}
652
653static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
654 u64 addr, u64 dwlen, u64 dwoffset)
655{
656 return cpu_to_le64(/* SDmaPhyAddr[31:0] */
657 ((addr & 0xfffffffcULL) << 32) |
658 /* SDmaGeneration[1:0] */
659 ((dd->ipath_sdma_generation & 3ULL) << 30) |
660 /* SDmaDwordCount[10:0] */
661 ((dwlen & 0x7ffULL) << 16) |
662 /* SDmaBufOffset[12:2] */
663 (dwoffset & 0x7ffULL));
664}
665
666static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
667{
668 return descq | cpu_to_le64(1ULL << 12);
669}
670
671static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
672{
673 /* last */ /* dma head */
674 return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
675}
676
677static inline __le64 ipath_sdma_make_desc1(u64 addr)
678{
679 /* SDmaPhyAddr[47:32] */
680 return cpu_to_le64(addr >> 32);
681}
682
683static void ipath_user_sdma_send_frag(struct ipath_devdata *dd,
684 struct ipath_user_sdma_pkt *pkt, int idx,
685 unsigned ofs, u16 tail)
686{
687 const u64 addr = (u64) pkt->addr[idx].addr +
688 (u64) pkt->addr[idx].offset;
689 const u64 dwlen = (u64) pkt->addr[idx].length / 4;
690 __le64 *descqp;
691 __le64 descq0;
692
693 descqp = &dd->ipath_sdma_descq[tail].qw[0];
694
695 descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs);
696 if (idx == 0)
697 descq0 = ipath_sdma_make_first_desc0(descq0);
698 if (idx == pkt->naddr - 1)
699 descq0 = ipath_sdma_make_last_desc0(descq0);
700
701 descqp[0] = descq0;
702 descqp[1] = ipath_sdma_make_desc1(addr);
703}
704
705/* pq->lock must be held, get packets on the wire... */
706static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
707 struct ipath_user_sdma_queue *pq,
708 struct list_head *pktlist)
709{
710 int ret = 0;
711 unsigned long flags;
712 u16 tail;
713
714 if (list_empty(pktlist))
715 return 0;
716
717 if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE)))
718 return -ECOMM;
719
720 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
721
722 if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) {
723 ret = -ECOMM;
724 goto unlock;
725 }
726
727 tail = dd->ipath_sdma_descq_tail;
728 while (!list_empty(pktlist)) {
729 struct ipath_user_sdma_pkt *pkt =
730 list_entry(pktlist->next, struct ipath_user_sdma_pkt,
731 list);
732 int i;
733 unsigned ofs = 0;
734 u16 dtail = tail;
735
736 if (pkt->naddr > ipath_sdma_descq_freecnt(dd))
737 goto unlock_check_tail;
738
739 for (i = 0; i < pkt->naddr; i++) {
740 ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail);
741 ofs += pkt->addr[i].length >> 2;
742
743 if (++tail == dd->ipath_sdma_descq_cnt) {
744 tail = 0;
745 ++dd->ipath_sdma_generation;
746 }
747 }
748
749 if ((ofs<<2) > dd->ipath_ibmaxlen) {
750 ipath_dbg("packet size %X > ibmax %X, fail\n",
751 ofs<<2, dd->ipath_ibmaxlen);
752 ret = -EMSGSIZE;
753 goto unlock;
754 }
755
756 /*
757 * if the packet is >= 2KB mtu equivalent, we have to use
758 * the large buffers, and have to mark each descriptor as
759 * part of a large buffer packet.
760 */
761 if (ofs >= IPATH_SMALLBUF_DWORDS) {
762 for (i = 0; i < pkt->naddr; i++) {
763 dd->ipath_sdma_descq[dtail].qw[0] |=
764 cpu_to_le64(1ULL << 14);
765 if (++dtail == dd->ipath_sdma_descq_cnt)
766 dtail = 0;
767 }
768 }
769
770 dd->ipath_sdma_descq_added += pkt->naddr;
771 pkt->added = dd->ipath_sdma_descq_added;
772 list_move_tail(&pkt->list, &pq->sent);
773 ret++;
774 }
775
776unlock_check_tail:
777 /* advance the tail on the chip if necessary */
778 if (dd->ipath_sdma_descq_tail != tail) {
779 wmb();
780 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
781 dd->ipath_sdma_descq_tail = tail;
782 }
783
784unlock:
785 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
786
787 return ret;
788}
789
790int ipath_user_sdma_writev(struct ipath_devdata *dd,
791 struct ipath_user_sdma_queue *pq,
792 const struct iovec *iov,
793 unsigned long dim)
794{
795 int ret = 0;
796 struct list_head list;
797 int npkts = 0;
798
799 INIT_LIST_HEAD(&list);
800
801 mutex_lock(&pq->lock);
802
803 if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) {
804 ipath_user_sdma_hwqueue_clean(dd);
805 ipath_user_sdma_queue_clean(dd, pq);
806 }
807
808 while (dim) {
809 const int mxp = 8;
810
811 ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
812 if (ret <= 0)
813 goto done_unlock;
814 else {
815 dim -= ret;
816 iov += ret;
817 }
818
819 /* force packets onto the sdma hw queue... */
820 if (!list_empty(&list)) {
821 /*
822 * lazily clean hw queue. the 4 is a guess of about
823 * how many sdma descriptors a packet will take (it
824 * doesn't have to be perfect).
825 */
826 if (ipath_sdma_descq_freecnt(dd) < ret * 4) {
827 ipath_user_sdma_hwqueue_clean(dd);
828 ipath_user_sdma_queue_clean(dd, pq);
829 }
830
831 ret = ipath_user_sdma_push_pkts(dd, pq, &list);
832 if (ret < 0)
833 goto done_unlock;
834 else {
835 npkts += ret;
836 pq->counter += ret;
837
838 if (!list_empty(&list))
839 goto done_unlock;
840 }
841 }
842 }
843
844done_unlock:
845 if (!list_empty(&list))
846 ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
847 mutex_unlock(&pq->lock);
848
849 return (ret < 0) ? ret : npkts;
850}
851
852int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
853 struct ipath_user_sdma_queue *pq)
854{
855 int ret = 0;
856
857 mutex_lock(&pq->lock);
858 ipath_user_sdma_hwqueue_clean(dd);
859 ret = ipath_user_sdma_queue_clean(dd, pq);
860 mutex_unlock(&pq->lock);
861
862 return ret;
863}
864
865u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq)
866{
867 return pq->sent_counter;
868}
869
870u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq)
871{
872 return pq->counter;
873}
874
diff --git a/drivers/staging/rdma/ipath/ipath_user_sdma.h b/drivers/staging/rdma/ipath/ipath_user_sdma.h
deleted file mode 100644
index fc76316c4a58..000000000000
--- a/drivers/staging/rdma/ipath/ipath_user_sdma.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/*
2 * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <linux/device.h>
33
34struct ipath_user_sdma_queue;
35
36struct ipath_user_sdma_queue *
37ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
38void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq);
39
40int ipath_user_sdma_writev(struct ipath_devdata *dd,
41 struct ipath_user_sdma_queue *pq,
42 const struct iovec *iov,
43 unsigned long dim);
44
45int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
46 struct ipath_user_sdma_queue *pq);
47
48void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
49 struct ipath_user_sdma_queue *pq);
50
51u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq);
52u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq);
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
deleted file mode 100644
index 53f9dcab180d..000000000000
--- a/drivers/staging/rdma/ipath/ipath_verbs.c
+++ /dev/null
@@ -1,2376 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <rdma/ib_mad.h>
35#include <rdma/ib_user_verbs.h>
36#include <linux/io.h>
37#include <linux/slab.h>
38#include <linux/module.h>
39#include <linux/utsname.h>
40#include <linux/rculist.h>
41
42#include "ipath_kernel.h"
43#include "ipath_verbs.h"
44#include "ipath_common.h"
45
46static unsigned int ib_ipath_qp_table_size = 251;
47module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
48MODULE_PARM_DESC(qp_table_size, "QP table size");
49
50unsigned int ib_ipath_lkey_table_size = 12;
51module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
52 S_IRUGO);
53MODULE_PARM_DESC(lkey_table_size,
54 "LKEY table size in bits (2^n, 1 <= n <= 23)");
55
56static unsigned int ib_ipath_max_pds = 0xFFFF;
57module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
58MODULE_PARM_DESC(max_pds,
59 "Maximum number of protection domains to support");
60
61static unsigned int ib_ipath_max_ahs = 0xFFFF;
62module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
63MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
64
65unsigned int ib_ipath_max_cqes = 0x2FFFF;
66module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
67MODULE_PARM_DESC(max_cqes,
68 "Maximum number of completion queue entries to support");
69
70unsigned int ib_ipath_max_cqs = 0x1FFFF;
71module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
72MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
73
74unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
75module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
76 S_IWUSR | S_IRUGO);
77MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
78
79unsigned int ib_ipath_max_qps = 16384;
80module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
81MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
82
83unsigned int ib_ipath_max_sges = 0x60;
84module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
85MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
86
87unsigned int ib_ipath_max_mcast_grps = 16384;
88module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
89 S_IWUSR | S_IRUGO);
90MODULE_PARM_DESC(max_mcast_grps,
91 "Maximum number of multicast groups to support");
92
93unsigned int ib_ipath_max_mcast_qp_attached = 16;
94module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
95 uint, S_IWUSR | S_IRUGO);
96MODULE_PARM_DESC(max_mcast_qp_attached,
97 "Maximum number of attached QPs to support");
98
99unsigned int ib_ipath_max_srqs = 1024;
100module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
101MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
102
103unsigned int ib_ipath_max_srq_sges = 128;
104module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
105 uint, S_IWUSR | S_IRUGO);
106MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
107
108unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
109module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
110 uint, S_IWUSR | S_IRUGO);
111MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
112
113static unsigned int ib_ipath_disable_sma;
114module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
115MODULE_PARM_DESC(disable_sma, "Disable the SMA");
116
117/*
118 * Note that it is OK to post send work requests in the SQE and ERR
119 * states; ipath_do_send() will process them and generate error
120 * completions as per IB 1.2 C10-96.
121 */
122const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
123 [IB_QPS_RESET] = 0,
124 [IB_QPS_INIT] = IPATH_POST_RECV_OK,
125 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
126 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
127 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
128 IPATH_PROCESS_NEXT_SEND_OK,
129 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
130 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
131 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
132 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
133 [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
134 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
135};
136
137struct ipath_ucontext {
138 struct ib_ucontext ibucontext;
139};
140
141static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
142 *ibucontext)
143{
144 return container_of(ibucontext, struct ipath_ucontext, ibucontext);
145}
146
147/*
148 * Translate ib_wr_opcode into ib_wc_opcode.
149 */
150const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
151 [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
152 [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
153 [IB_WR_SEND] = IB_WC_SEND,
154 [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
155 [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
156 [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
157 [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
158};
159
160/*
161 * System image GUID.
162 */
163static __be64 sys_image_guid;
164
165/**
166 * ipath_copy_sge - copy data to SGE memory
167 * @ss: the SGE state
168 * @data: the data to copy
169 * @length: the length of the data
170 */
171void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
172{
173 struct ipath_sge *sge = &ss->sge;
174
175 while (length) {
176 u32 len = sge->length;
177
178 if (len > length)
179 len = length;
180 if (len > sge->sge_length)
181 len = sge->sge_length;
182 BUG_ON(len == 0);
183 memcpy(sge->vaddr, data, len);
184 sge->vaddr += len;
185 sge->length -= len;
186 sge->sge_length -= len;
187 if (sge->sge_length == 0) {
188 if (--ss->num_sge)
189 *sge = *ss->sg_list++;
190 } else if (sge->length == 0 && sge->mr != NULL) {
191 if (++sge->n >= IPATH_SEGSZ) {
192 if (++sge->m >= sge->mr->mapsz)
193 break;
194 sge->n = 0;
195 }
196 sge->vaddr =
197 sge->mr->map[sge->m]->segs[sge->n].vaddr;
198 sge->length =
199 sge->mr->map[sge->m]->segs[sge->n].length;
200 }
201 data += len;
202 length -= len;
203 }
204}
205
206/**
207 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
208 * @ss: the SGE state
209 * @length: the number of bytes to skip
210 */
211void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
212{
213 struct ipath_sge *sge = &ss->sge;
214
215 while (length) {
216 u32 len = sge->length;
217
218 if (len > length)
219 len = length;
220 if (len > sge->sge_length)
221 len = sge->sge_length;
222 BUG_ON(len == 0);
223 sge->vaddr += len;
224 sge->length -= len;
225 sge->sge_length -= len;
226 if (sge->sge_length == 0) {
227 if (--ss->num_sge)
228 *sge = *ss->sg_list++;
229 } else if (sge->length == 0 && sge->mr != NULL) {
230 if (++sge->n >= IPATH_SEGSZ) {
231 if (++sge->m >= sge->mr->mapsz)
232 break;
233 sge->n = 0;
234 }
235 sge->vaddr =
236 sge->mr->map[sge->m]->segs[sge->n].vaddr;
237 sge->length =
238 sge->mr->map[sge->m]->segs[sge->n].length;
239 }
240 length -= len;
241 }
242}
243
244/*
245 * Count the number of DMA descriptors needed to send length bytes of data.
246 * Don't modify the ipath_sge_state to get the count.
247 * Return zero if any of the segments is not aligned.
248 */
249static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
250{
251 struct ipath_sge *sg_list = ss->sg_list;
252 struct ipath_sge sge = ss->sge;
253 u8 num_sge = ss->num_sge;
254 u32 ndesc = 1; /* count the header */
255
256 while (length) {
257 u32 len = sge.length;
258
259 if (len > length)
260 len = length;
261 if (len > sge.sge_length)
262 len = sge.sge_length;
263 BUG_ON(len == 0);
264 if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
265 (len != length && (len & (sizeof(u32) - 1)))) {
266 ndesc = 0;
267 break;
268 }
269 ndesc++;
270 sge.vaddr += len;
271 sge.length -= len;
272 sge.sge_length -= len;
273 if (sge.sge_length == 0) {
274 if (--num_sge)
275 sge = *sg_list++;
276 } else if (sge.length == 0 && sge.mr != NULL) {
277 if (++sge.n >= IPATH_SEGSZ) {
278 if (++sge.m >= sge.mr->mapsz)
279 break;
280 sge.n = 0;
281 }
282 sge.vaddr =
283 sge.mr->map[sge.m]->segs[sge.n].vaddr;
284 sge.length =
285 sge.mr->map[sge.m]->segs[sge.n].length;
286 }
287 length -= len;
288 }
289 return ndesc;
290}
291
292/*
293 * Copy from the SGEs to the data buffer.
294 */
295static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
296 u32 length)
297{
298 struct ipath_sge *sge = &ss->sge;
299
300 while (length) {
301 u32 len = sge->length;
302
303 if (len > length)
304 len = length;
305 if (len > sge->sge_length)
306 len = sge->sge_length;
307 BUG_ON(len == 0);
308 memcpy(data, sge->vaddr, len);
309 sge->vaddr += len;
310 sge->length -= len;
311 sge->sge_length -= len;
312 if (sge->sge_length == 0) {
313 if (--ss->num_sge)
314 *sge = *ss->sg_list++;
315 } else if (sge->length == 0 && sge->mr != NULL) {
316 if (++sge->n >= IPATH_SEGSZ) {
317 if (++sge->m >= sge->mr->mapsz)
318 break;
319 sge->n = 0;
320 }
321 sge->vaddr =
322 sge->mr->map[sge->m]->segs[sge->n].vaddr;
323 sge->length =
324 sge->mr->map[sge->m]->segs[sge->n].length;
325 }
326 data += len;
327 length -= len;
328 }
329}
330
331/**
332 * ipath_post_one_send - post one RC, UC, or UD send work request
333 * @qp: the QP to post on
334 * @wr: the work request to send
335 */
336static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
337{
338 struct ipath_swqe *wqe;
339 u32 next;
340 int i;
341 int j;
342 int acc;
343 int ret;
344 unsigned long flags;
345 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
346
347 spin_lock_irqsave(&qp->s_lock, flags);
348
349 if (qp->ibqp.qp_type != IB_QPT_SMI &&
350 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
351 ret = -ENETDOWN;
352 goto bail;
353 }
354
355 /* Check that state is OK to post send. */
356 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
357 goto bail_inval;
358
359 /* IB spec says that num_sge == 0 is OK. */
360 if (wr->num_sge > qp->s_max_sge)
361 goto bail_inval;
362
363 /*
364 * Don't allow RDMA reads or atomic operations on UC or
365 * undefined operations.
366 * Make sure buffer is large enough to hold the result for atomics.
367 */
368 if (qp->ibqp.qp_type == IB_QPT_UC) {
369 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
370 goto bail_inval;
371 } else if (qp->ibqp.qp_type == IB_QPT_UD) {
372 /* Check UD opcode */
373 if (wr->opcode != IB_WR_SEND &&
374 wr->opcode != IB_WR_SEND_WITH_IMM)
375 goto bail_inval;
376 /* Check UD destination address PD */
377 if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
378 goto bail_inval;
379 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
380 goto bail_inval;
381 else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
382 (wr->num_sge == 0 ||
383 wr->sg_list[0].length < sizeof(u64) ||
384 wr->sg_list[0].addr & (sizeof(u64) - 1)))
385 goto bail_inval;
386 else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
387 goto bail_inval;
388
389 next = qp->s_head + 1;
390 if (next >= qp->s_size)
391 next = 0;
392 if (next == qp->s_last) {
393 ret = -ENOMEM;
394 goto bail;
395 }
396
397 wqe = get_swqe_ptr(qp, qp->s_head);
398
399 if (qp->ibqp.qp_type != IB_QPT_UC &&
400 qp->ibqp.qp_type != IB_QPT_RC)
401 memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
402 else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
403 wr->opcode == IB_WR_RDMA_WRITE ||
404 wr->opcode == IB_WR_RDMA_READ)
405 memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
406 else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
407 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
408 memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
409 else
410 memcpy(&wqe->wr, wr, sizeof(wqe->wr));
411
412 wqe->length = 0;
413 if (wr->num_sge) {
414 acc = wr->opcode >= IB_WR_RDMA_READ ?
415 IB_ACCESS_LOCAL_WRITE : 0;
416 for (i = 0, j = 0; i < wr->num_sge; i++) {
417 u32 length = wr->sg_list[i].length;
418 int ok;
419
420 if (length == 0)
421 continue;
422 ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
423 &wr->sg_list[i], acc);
424 if (!ok)
425 goto bail_inval;
426 wqe->length += length;
427 j++;
428 }
429 wqe->wr.num_sge = j;
430 }
431 if (qp->ibqp.qp_type == IB_QPT_UC ||
432 qp->ibqp.qp_type == IB_QPT_RC) {
433 if (wqe->length > 0x80000000U)
434 goto bail_inval;
435 } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
436 goto bail_inval;
437 wqe->ssn = qp->s_ssn++;
438 qp->s_head = next;
439
440 ret = 0;
441 goto bail;
442
443bail_inval:
444 ret = -EINVAL;
445bail:
446 spin_unlock_irqrestore(&qp->s_lock, flags);
447 return ret;
448}
449
450/**
451 * ipath_post_send - post a send on a QP
452 * @ibqp: the QP to post the send on
453 * @wr: the list of work requests to post
454 * @bad_wr: the first bad WR is put here
455 *
456 * This may be called from interrupt context.
457 */
458static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
459 struct ib_send_wr **bad_wr)
460{
461 struct ipath_qp *qp = to_iqp(ibqp);
462 int err = 0;
463
464 for (; wr; wr = wr->next) {
465 err = ipath_post_one_send(qp, wr);
466 if (err) {
467 *bad_wr = wr;
468 goto bail;
469 }
470 }
471
472 /* Try to do the send work in the caller's context. */
473 ipath_do_send((unsigned long) qp);
474
475bail:
476 return err;
477}
478
479/**
480 * ipath_post_receive - post a receive on a QP
481 * @ibqp: the QP to post the receive on
482 * @wr: the WR to post
483 * @bad_wr: the first bad WR is put here
484 *
485 * This may be called from interrupt context.
486 */
487static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
488 struct ib_recv_wr **bad_wr)
489{
490 struct ipath_qp *qp = to_iqp(ibqp);
491 struct ipath_rwq *wq = qp->r_rq.wq;
492 unsigned long flags;
493 int ret;
494
495 /* Check that state is OK to post receive. */
496 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
497 *bad_wr = wr;
498 ret = -EINVAL;
499 goto bail;
500 }
501
502 for (; wr; wr = wr->next) {
503 struct ipath_rwqe *wqe;
504 u32 next;
505 int i;
506
507 if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
508 *bad_wr = wr;
509 ret = -EINVAL;
510 goto bail;
511 }
512
513 spin_lock_irqsave(&qp->r_rq.lock, flags);
514 next = wq->head + 1;
515 if (next >= qp->r_rq.size)
516 next = 0;
517 if (next == wq->tail) {
518 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
519 *bad_wr = wr;
520 ret = -ENOMEM;
521 goto bail;
522 }
523
524 wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
525 wqe->wr_id = wr->wr_id;
526 wqe->num_sge = wr->num_sge;
527 for (i = 0; i < wr->num_sge; i++)
528 wqe->sg_list[i] = wr->sg_list[i];
529 /* Make sure queue entry is written before the head index. */
530 smp_wmb();
531 wq->head = next;
532 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
533 }
534 ret = 0;
535
536bail:
537 return ret;
538}
539
540/**
541 * ipath_qp_rcv - processing an incoming packet on a QP
542 * @dev: the device the packet came on
543 * @hdr: the packet header
544 * @has_grh: true if the packet has a GRH
545 * @data: the packet data
546 * @tlen: the packet length
547 * @qp: the QP the packet came on
548 *
549 * This is called from ipath_ib_rcv() to process an incoming packet
550 * for the given QP.
551 * Called at interrupt level.
552 */
553static void ipath_qp_rcv(struct ipath_ibdev *dev,
554 struct ipath_ib_header *hdr, int has_grh,
555 void *data, u32 tlen, struct ipath_qp *qp)
556{
557 /* Check for valid receive state. */
558 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
559 dev->n_pkt_drops++;
560 return;
561 }
562
563 switch (qp->ibqp.qp_type) {
564 case IB_QPT_SMI:
565 case IB_QPT_GSI:
566 if (ib_ipath_disable_sma)
567 break;
568 /* FALLTHROUGH */
569 case IB_QPT_UD:
570 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
571 break;
572
573 case IB_QPT_RC:
574 ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
575 break;
576
577 case IB_QPT_UC:
578 ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
579 break;
580
581 default:
582 break;
583 }
584}
585
586/**
587 * ipath_ib_rcv - process an incoming packet
588 * @arg: the device pointer
589 * @rhdr: the header of the packet
590 * @data: the packet data
591 * @tlen: the packet length
592 *
593 * This is called from ipath_kreceive() to process an incoming packet at
594 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
595 */
596void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
597 u32 tlen)
598{
599 struct ipath_ib_header *hdr = rhdr;
600 struct ipath_other_headers *ohdr;
601 struct ipath_qp *qp;
602 u32 qp_num;
603 int lnh;
604 u8 opcode;
605 u16 lid;
606
607 if (unlikely(dev == NULL))
608 goto bail;
609
610 if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */
611 dev->rcv_errors++;
612 goto bail;
613 }
614
615 /* Check for a valid destination LID (see ch. 7.11.1). */
616 lid = be16_to_cpu(hdr->lrh[1]);
617 if (lid < IPATH_MULTICAST_LID_BASE) {
618 lid &= ~((1 << dev->dd->ipath_lmc) - 1);
619 if (unlikely(lid != dev->dd->ipath_lid)) {
620 dev->rcv_errors++;
621 goto bail;
622 }
623 }
624
625 /* Check for GRH */
626 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
627 if (lnh == IPATH_LRH_BTH)
628 ohdr = &hdr->u.oth;
629 else if (lnh == IPATH_LRH_GRH)
630 ohdr = &hdr->u.l.oth;
631 else {
632 dev->rcv_errors++;
633 goto bail;
634 }
635
636 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
637 dev->opstats[opcode].n_bytes += tlen;
638 dev->opstats[opcode].n_packets++;
639
640 /* Get the destination QP number. */
641 qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
642 if (qp_num == IPATH_MULTICAST_QPN) {
643 struct ipath_mcast *mcast;
644 struct ipath_mcast_qp *p;
645
646 if (lnh != IPATH_LRH_GRH) {
647 dev->n_pkt_drops++;
648 goto bail;
649 }
650 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
651 if (mcast == NULL) {
652 dev->n_pkt_drops++;
653 goto bail;
654 }
655 dev->n_multicast_rcv++;
656 list_for_each_entry_rcu(p, &mcast->qp_list, list)
657 ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
658 /*
659 * Notify ipath_multicast_detach() if it is waiting for us
660 * to finish.
661 */
662 if (atomic_dec_return(&mcast->refcount) <= 1)
663 wake_up(&mcast->wait);
664 } else {
665 qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
666 if (qp) {
667 dev->n_unicast_rcv++;
668 ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
669 tlen, qp);
670 /*
671 * Notify ipath_destroy_qp() if it is waiting
672 * for us to finish.
673 */
674 if (atomic_dec_and_test(&qp->refcount))
675 wake_up(&qp->wait);
676 } else
677 dev->n_pkt_drops++;
678 }
679
680bail:;
681}
682
683/**
684 * ipath_ib_timer - verbs timer
685 * @arg: the device pointer
686 *
687 * This is called from ipath_do_rcv_timer() at interrupt level to check for
688 * QPs which need retransmits and to collect performance numbers.
689 */
690static void ipath_ib_timer(struct ipath_ibdev *dev)
691{
692 struct ipath_qp *resend = NULL;
693 struct ipath_qp *rnr = NULL;
694 struct list_head *last;
695 struct ipath_qp *qp;
696 unsigned long flags;
697
698 if (dev == NULL)
699 return;
700
701 spin_lock_irqsave(&dev->pending_lock, flags);
702 /* Start filling the next pending queue. */
703 if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
704 dev->pending_index = 0;
705 /* Save any requests still in the new queue, they have timed out. */
706 last = &dev->pending[dev->pending_index];
707 while (!list_empty(last)) {
708 qp = list_entry(last->next, struct ipath_qp, timerwait);
709 list_del_init(&qp->timerwait);
710 qp->timer_next = resend;
711 resend = qp;
712 atomic_inc(&qp->refcount);
713 }
714 last = &dev->rnrwait;
715 if (!list_empty(last)) {
716 qp = list_entry(last->next, struct ipath_qp, timerwait);
717 if (--qp->s_rnr_timeout == 0) {
718 do {
719 list_del_init(&qp->timerwait);
720 qp->timer_next = rnr;
721 rnr = qp;
722 atomic_inc(&qp->refcount);
723 if (list_empty(last))
724 break;
725 qp = list_entry(last->next, struct ipath_qp,
726 timerwait);
727 } while (qp->s_rnr_timeout == 0);
728 }
729 }
730 /*
731 * We should only be in the started state if pma_sample_start != 0
732 */
733 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
734 --dev->pma_sample_start == 0) {
735 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
736 ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
737 &dev->ipath_rword,
738 &dev->ipath_spkts,
739 &dev->ipath_rpkts,
740 &dev->ipath_xmit_wait);
741 }
742 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
743 if (dev->pma_sample_interval == 0) {
744 u64 ta, tb, tc, td, te;
745
746 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
747 ipath_snapshot_counters(dev->dd, &ta, &tb,
748 &tc, &td, &te);
749
750 dev->ipath_sword = ta - dev->ipath_sword;
751 dev->ipath_rword = tb - dev->ipath_rword;
752 dev->ipath_spkts = tc - dev->ipath_spkts;
753 dev->ipath_rpkts = td - dev->ipath_rpkts;
754 dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
755 } else {
756 dev->pma_sample_interval--;
757 }
758 }
759 spin_unlock_irqrestore(&dev->pending_lock, flags);
760
761 /* XXX What if timer fires again while this is running? */
762 while (resend != NULL) {
763 qp = resend;
764 resend = qp->timer_next;
765
766 spin_lock_irqsave(&qp->s_lock, flags);
767 if (qp->s_last != qp->s_tail &&
768 ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
769 dev->n_timeouts++;
770 ipath_restart_rc(qp, qp->s_last_psn + 1);
771 }
772 spin_unlock_irqrestore(&qp->s_lock, flags);
773
774 /* Notify ipath_destroy_qp() if it is waiting. */
775 if (atomic_dec_and_test(&qp->refcount))
776 wake_up(&qp->wait);
777 }
778 while (rnr != NULL) {
779 qp = rnr;
780 rnr = qp->timer_next;
781
782 spin_lock_irqsave(&qp->s_lock, flags);
783 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
784 ipath_schedule_send(qp);
785 spin_unlock_irqrestore(&qp->s_lock, flags);
786
787 /* Notify ipath_destroy_qp() if it is waiting. */
788 if (atomic_dec_and_test(&qp->refcount))
789 wake_up(&qp->wait);
790 }
791}
792
793static void update_sge(struct ipath_sge_state *ss, u32 length)
794{
795 struct ipath_sge *sge = &ss->sge;
796
797 sge->vaddr += length;
798 sge->length -= length;
799 sge->sge_length -= length;
800 if (sge->sge_length == 0) {
801 if (--ss->num_sge)
802 *sge = *ss->sg_list++;
803 } else if (sge->length == 0 && sge->mr != NULL) {
804 if (++sge->n >= IPATH_SEGSZ) {
805 if (++sge->m >= sge->mr->mapsz)
806 return;
807 sge->n = 0;
808 }
809 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
810 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
811 }
812}
813
814#ifdef __LITTLE_ENDIAN
815static inline u32 get_upper_bits(u32 data, u32 shift)
816{
817 return data >> shift;
818}
819
820static inline u32 set_upper_bits(u32 data, u32 shift)
821{
822 return data << shift;
823}
824
825static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
826{
827 data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
828 data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
829 return data;
830}
831#else
832static inline u32 get_upper_bits(u32 data, u32 shift)
833{
834 return data << shift;
835}
836
837static inline u32 set_upper_bits(u32 data, u32 shift)
838{
839 return data >> shift;
840}
841
842static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
843{
844 data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
845 data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
846 return data;
847}
848#endif
849
850static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
851 u32 length, unsigned flush_wc)
852{
853 u32 extra = 0;
854 u32 data = 0;
855 u32 last;
856
857 while (1) {
858 u32 len = ss->sge.length;
859 u32 off;
860
861 if (len > length)
862 len = length;
863 if (len > ss->sge.sge_length)
864 len = ss->sge.sge_length;
865 BUG_ON(len == 0);
866 /* If the source address is not aligned, try to align it. */
867 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
868 if (off) {
869 u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
870 ~(sizeof(u32) - 1));
871 u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
872 u32 y;
873
874 y = sizeof(u32) - off;
875 if (len > y)
876 len = y;
877 if (len + extra >= sizeof(u32)) {
878 data |= set_upper_bits(v, extra *
879 BITS_PER_BYTE);
880 len = sizeof(u32) - extra;
881 if (len == length) {
882 last = data;
883 break;
884 }
885 __raw_writel(data, piobuf);
886 piobuf++;
887 extra = 0;
888 data = 0;
889 } else {
890 /* Clear unused upper bytes */
891 data |= clear_upper_bytes(v, len, extra);
892 if (len == length) {
893 last = data;
894 break;
895 }
896 extra += len;
897 }
898 } else if (extra) {
899 /* Source address is aligned. */
900 u32 *addr = (u32 *) ss->sge.vaddr;
901 int shift = extra * BITS_PER_BYTE;
902 int ushift = 32 - shift;
903 u32 l = len;
904
905 while (l >= sizeof(u32)) {
906 u32 v = *addr;
907
908 data |= set_upper_bits(v, shift);
909 __raw_writel(data, piobuf);
910 data = get_upper_bits(v, ushift);
911 piobuf++;
912 addr++;
913 l -= sizeof(u32);
914 }
915 /*
916 * We still have 'extra' number of bytes leftover.
917 */
918 if (l) {
919 u32 v = *addr;
920
921 if (l + extra >= sizeof(u32)) {
922 data |= set_upper_bits(v, shift);
923 len -= l + extra - sizeof(u32);
924 if (len == length) {
925 last = data;
926 break;
927 }
928 __raw_writel(data, piobuf);
929 piobuf++;
930 extra = 0;
931 data = 0;
932 } else {
933 /* Clear unused upper bytes */
934 data |= clear_upper_bytes(v, l,
935 extra);
936 if (len == length) {
937 last = data;
938 break;
939 }
940 extra += l;
941 }
942 } else if (len == length) {
943 last = data;
944 break;
945 }
946 } else if (len == length) {
947 u32 w;
948
949 /*
950 * Need to round up for the last dword in the
951 * packet.
952 */
953 w = (len + 3) >> 2;
954 __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
955 piobuf += w - 1;
956 last = ((u32 *) ss->sge.vaddr)[w - 1];
957 break;
958 } else {
959 u32 w = len >> 2;
960
961 __iowrite32_copy(piobuf, ss->sge.vaddr, w);
962 piobuf += w;
963
964 extra = len & (sizeof(u32) - 1);
965 if (extra) {
966 u32 v = ((u32 *) ss->sge.vaddr)[w];
967
968 /* Clear unused upper bytes */
969 data = clear_upper_bytes(v, extra, 0);
970 }
971 }
972 update_sge(ss, len);
973 length -= len;
974 }
975 /* Update address before sending packet. */
976 update_sge(ss, length);
977 if (flush_wc) {
978 /* must flush early everything before trigger word */
979 ipath_flush_wc();
980 __raw_writel(last, piobuf);
981 /* be sure trigger word is written */
982 ipath_flush_wc();
983 } else
984 __raw_writel(last, piobuf);
985}
986
987/*
988 * Convert IB rate to delay multiplier.
989 */
990unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
991{
992 switch (rate) {
993 case IB_RATE_2_5_GBPS: return 8;
994 case IB_RATE_5_GBPS: return 4;
995 case IB_RATE_10_GBPS: return 2;
996 case IB_RATE_20_GBPS: return 1;
997 default: return 0;
998 }
999}
1000
1001/*
1002 * Convert delay multiplier to IB rate
1003 */
1004static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
1005{
1006 switch (mult) {
1007 case 8: return IB_RATE_2_5_GBPS;
1008 case 4: return IB_RATE_5_GBPS;
1009 case 2: return IB_RATE_10_GBPS;
1010 case 1: return IB_RATE_20_GBPS;
1011 default: return IB_RATE_PORT_CURRENT;
1012 }
1013}
1014
1015static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
1016{
1017 struct ipath_verbs_txreq *tx = NULL;
1018 unsigned long flags;
1019
1020 spin_lock_irqsave(&dev->pending_lock, flags);
1021 if (!list_empty(&dev->txreq_free)) {
1022 struct list_head *l = dev->txreq_free.next;
1023
1024 list_del(l);
1025 tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
1026 }
1027 spin_unlock_irqrestore(&dev->pending_lock, flags);
1028 return tx;
1029}
1030
1031static inline void put_txreq(struct ipath_ibdev *dev,
1032 struct ipath_verbs_txreq *tx)
1033{
1034 unsigned long flags;
1035
1036 spin_lock_irqsave(&dev->pending_lock, flags);
1037 list_add(&tx->txreq.list, &dev->txreq_free);
1038 spin_unlock_irqrestore(&dev->pending_lock, flags);
1039}
1040
1041static void sdma_complete(void *cookie, int status)
1042{
1043 struct ipath_verbs_txreq *tx = cookie;
1044 struct ipath_qp *qp = tx->qp;
1045 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1046 unsigned long flags;
1047 enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
1048 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
1049
1050 if (atomic_dec_and_test(&qp->s_dma_busy)) {
1051 spin_lock_irqsave(&qp->s_lock, flags);
1052 if (tx->wqe)
1053 ipath_send_complete(qp, tx->wqe, ibs);
1054 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1055 qp->s_last != qp->s_head) ||
1056 (qp->s_flags & IPATH_S_WAIT_DMA))
1057 ipath_schedule_send(qp);
1058 spin_unlock_irqrestore(&qp->s_lock, flags);
1059 wake_up(&qp->wait_dma);
1060 } else if (tx->wqe) {
1061 spin_lock_irqsave(&qp->s_lock, flags);
1062 ipath_send_complete(qp, tx->wqe, ibs);
1063 spin_unlock_irqrestore(&qp->s_lock, flags);
1064 }
1065
1066 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
1067 kfree(tx->txreq.map_addr);
1068 put_txreq(dev, tx);
1069
1070 if (atomic_dec_and_test(&qp->refcount))
1071 wake_up(&qp->wait);
1072}
1073
1074static void decrement_dma_busy(struct ipath_qp *qp)
1075{
1076 unsigned long flags;
1077
1078 if (atomic_dec_and_test(&qp->s_dma_busy)) {
1079 spin_lock_irqsave(&qp->s_lock, flags);
1080 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1081 qp->s_last != qp->s_head) ||
1082 (qp->s_flags & IPATH_S_WAIT_DMA))
1083 ipath_schedule_send(qp);
1084 spin_unlock_irqrestore(&qp->s_lock, flags);
1085 wake_up(&qp->wait_dma);
1086 }
1087}
1088
1089/*
1090 * Compute the number of clock cycles of delay before sending the next packet.
1091 * The multipliers reflect the number of clocks for the fastest rate so
1092 * one tick at 4xDDR is 8 ticks at 1xSDR.
1093 * If the destination port will take longer to receive a packet than
1094 * the outgoing link can send it, we need to delay sending the next packet
1095 * by the difference in time it takes the receiver to receive and the sender
1096 * to send this packet.
1097 * Note that this delay is always correct for UC and RC but not always
1098 * optimal for UD. For UD, the destination HCA can be different for each
1099 * packet, in which case, we could send packets to a different destination
1100 * while "waiting" for the delay. The overhead for doing this without
1101 * HW support is more than just paying the cost of delaying some packets
1102 * unnecessarily.
1103 */
1104static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
1105{
1106 return (rcv_mult > snd_mult) ?
1107 (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
1108}
1109
1110static int ipath_verbs_send_dma(struct ipath_qp *qp,
1111 struct ipath_ib_header *hdr, u32 hdrwords,
1112 struct ipath_sge_state *ss, u32 len,
1113 u32 plen, u32 dwords)
1114{
1115 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1116 struct ipath_devdata *dd = dev->dd;
1117 struct ipath_verbs_txreq *tx;
1118 u32 *piobuf;
1119 u32 control;
1120 u32 ndesc;
1121 int ret;
1122
1123 tx = qp->s_tx;
1124 if (tx) {
1125 qp->s_tx = NULL;
1126 /* resend previously constructed packet */
1127 atomic_inc(&qp->s_dma_busy);
1128 ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
1129 if (ret) {
1130 qp->s_tx = tx;
1131 decrement_dma_busy(qp);
1132 }
1133 goto bail;
1134 }
1135
1136 tx = get_txreq(dev);
1137 if (!tx) {
1138 ret = -EBUSY;
1139 goto bail;
1140 }
1141
1142 /*
1143 * Get the saved delay count we computed for the previous packet
1144 * and save the delay count for this packet to be used next time
1145 * we get here.
1146 */
1147 control = qp->s_pkt_delay;
1148 qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1149
1150 tx->qp = qp;
1151 atomic_inc(&qp->refcount);
1152 tx->wqe = qp->s_wqe;
1153 tx->txreq.callback = sdma_complete;
1154 tx->txreq.callback_cookie = tx;
1155 tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
1156 IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
1157 if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1158 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
1159
1160 /* VL15 packets bypass credit check */
1161 if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
1162 control |= 1ULL << 31;
1163 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
1164 }
1165
1166 if (len) {
1167 /*
1168 * Don't try to DMA if it takes more descriptors than
1169 * the queue holds.
1170 */
1171 ndesc = ipath_count_sge(ss, len);
1172 if (ndesc >= dd->ipath_sdma_descq_cnt)
1173 ndesc = 0;
1174 } else
1175 ndesc = 1;
1176 if (ndesc) {
1177 tx->hdr.pbc[0] = cpu_to_le32(plen);
1178 tx->hdr.pbc[1] = cpu_to_le32(control);
1179 memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
1180 tx->txreq.sg_count = ndesc;
1181 tx->map_len = (hdrwords + 2) << 2;
1182 tx->txreq.map_addr = &tx->hdr;
1183 atomic_inc(&qp->s_dma_busy);
1184 ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
1185 if (ret) {
1186 /* save ss and length in dwords */
1187 tx->ss = ss;
1188 tx->len = dwords;
1189 qp->s_tx = tx;
1190 decrement_dma_busy(qp);
1191 }
1192 goto bail;
1193 }
1194
1195 /* Allocate a buffer and copy the header and payload to it. */
1196 tx->map_len = (plen + 1) << 2;
1197 piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
1198 if (unlikely(piobuf == NULL)) {
1199 ret = -EBUSY;
1200 goto err_tx;
1201 }
1202 tx->txreq.map_addr = piobuf;
1203 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
1204 tx->txreq.sg_count = 1;
1205
1206 *piobuf++ = (__force u32) cpu_to_le32(plen);
1207 *piobuf++ = (__force u32) cpu_to_le32(control);
1208 memcpy(piobuf, hdr, hdrwords << 2);
1209 ipath_copy_from_sge(piobuf + hdrwords, ss, len);
1210
1211 atomic_inc(&qp->s_dma_busy);
1212 ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
1213 /*
1214 * If we couldn't queue the DMA request, save the info
1215 * and try again later rather than destroying the
1216 * buffer and undoing the side effects of the copy.
1217 */
1218 if (ret) {
1219 tx->ss = NULL;
1220 tx->len = 0;
1221 qp->s_tx = tx;
1222 decrement_dma_busy(qp);
1223 }
1224 dev->n_unaligned++;
1225 goto bail;
1226
1227err_tx:
1228 if (atomic_dec_and_test(&qp->refcount))
1229 wake_up(&qp->wait);
1230 put_txreq(dev, tx);
1231bail:
1232 return ret;
1233}
1234
1235static int ipath_verbs_send_pio(struct ipath_qp *qp,
1236 struct ipath_ib_header *ibhdr, u32 hdrwords,
1237 struct ipath_sge_state *ss, u32 len,
1238 u32 plen, u32 dwords)
1239{
1240 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1241 u32 *hdr = (u32 *) ibhdr;
1242 u32 __iomem *piobuf;
1243 unsigned flush_wc;
1244 u32 control;
1245 int ret;
1246 unsigned long flags;
1247
1248 piobuf = ipath_getpiobuf(dd, plen, NULL);
1249 if (unlikely(piobuf == NULL)) {
1250 ret = -EBUSY;
1251 goto bail;
1252 }
1253
1254 /*
1255 * Get the saved delay count we computed for the previous packet
1256 * and save the delay count for this packet to be used next time
1257 * we get here.
1258 */
1259 control = qp->s_pkt_delay;
1260 qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1261
1262 /* VL15 packets bypass credit check */
1263 if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
1264 control |= 1ULL << 31;
1265
1266 /*
1267 * Write the length to the control qword plus any needed flags.
1268 * We have to flush after the PBC for correctness on some cpus
1269 * or WC buffer can be written out of order.
1270 */
1271 writeq(((u64) control << 32) | plen, piobuf);
1272 piobuf += 2;
1273
1274 flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
1275 if (len == 0) {
1276 /*
1277 * If there is just the header portion, must flush before
1278 * writing last word of header for correctness, and after
1279 * the last header word (trigger word).
1280 */
1281 if (flush_wc) {
1282 ipath_flush_wc();
1283 __iowrite32_copy(piobuf, hdr, hdrwords - 1);
1284 ipath_flush_wc();
1285 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
1286 ipath_flush_wc();
1287 } else
1288 __iowrite32_copy(piobuf, hdr, hdrwords);
1289 goto done;
1290 }
1291
1292 if (flush_wc)
1293 ipath_flush_wc();
1294 __iowrite32_copy(piobuf, hdr, hdrwords);
1295 piobuf += hdrwords;
1296
1297 /* The common case is aligned and contained in one segment. */
1298 if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
1299 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
1300 u32 *addr = (u32 *) ss->sge.vaddr;
1301
1302 /* Update address before sending packet. */
1303 update_sge(ss, len);
1304 if (flush_wc) {
1305 __iowrite32_copy(piobuf, addr, dwords - 1);
1306 /* must flush early everything before trigger word */
1307 ipath_flush_wc();
1308 __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1309 /* be sure trigger word is written */
1310 ipath_flush_wc();
1311 } else
1312 __iowrite32_copy(piobuf, addr, dwords);
1313 goto done;
1314 }
1315 copy_io(piobuf, ss, len, flush_wc);
1316done:
1317 if (qp->s_wqe) {
1318 spin_lock_irqsave(&qp->s_lock, flags);
1319 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1320 spin_unlock_irqrestore(&qp->s_lock, flags);
1321 }
1322 ret = 0;
1323bail:
1324 return ret;
1325}
1326
1327/**
1328 * ipath_verbs_send - send a packet
1329 * @qp: the QP to send on
1330 * @hdr: the packet header
1331 * @hdrwords: the number of 32-bit words in the header
1332 * @ss: the SGE to send
1333 * @len: the length of the packet in bytes
1334 */
1335int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
1336 u32 hdrwords, struct ipath_sge_state *ss, u32 len)
1337{
1338 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1339 u32 plen;
1340 int ret;
1341 u32 dwords = (len + 3) >> 2;
1342
1343 /*
1344 * Calculate the send buffer trigger address.
1345 * The +1 counts for the pbc control dword following the pbc length.
1346 */
1347 plen = hdrwords + dwords + 1;
1348
1349 /*
1350 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1351 * can defer SDMA restart until link goes ACTIVE without
1352 * worrying about just how we got there.
1353 */
1354 if (qp->ibqp.qp_type == IB_QPT_SMI ||
1355 !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
1356 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1357 plen, dwords);
1358 else
1359 ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1360 plen, dwords);
1361
1362 return ret;
1363}
1364
1365int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
1366 u64 *rwords, u64 *spkts, u64 *rpkts,
1367 u64 *xmit_wait)
1368{
1369 int ret;
1370
1371 if (!(dd->ipath_flags & IPATH_INITTED)) {
1372 /* no hardware, freeze, etc. */
1373 ret = -EINVAL;
1374 goto bail;
1375 }
1376 *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
1377 *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1378 *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1379 *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1380 *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
1381
1382 ret = 0;
1383
1384bail:
1385 return ret;
1386}
1387
1388/**
1389 * ipath_get_counters - get various chip counters
1390 * @dd: the infinipath device
1391 * @cntrs: counters are placed here
1392 *
1393 * Return the counters needed by recv_pma_get_portcounters().
1394 */
1395int ipath_get_counters(struct ipath_devdata *dd,
1396 struct ipath_verbs_counters *cntrs)
1397{
1398 struct ipath_cregs const *crp = dd->ipath_cregs;
1399 int ret;
1400
1401 if (!(dd->ipath_flags & IPATH_INITTED)) {
1402 /* no hardware, freeze, etc. */
1403 ret = -EINVAL;
1404 goto bail;
1405 }
1406 cntrs->symbol_error_counter =
1407 ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
1408 cntrs->link_error_recovery_counter =
1409 ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
1410 /*
1411 * The link downed counter counts when the other side downs the
1412 * connection. We add in the number of times we downed the link
1413 * due to local link integrity errors to compensate.
1414 */
1415 cntrs->link_downed_counter =
1416 ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
1417 cntrs->port_rcv_errors =
1418 ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
1419 ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
1420 ipath_snap_cntr(dd, crp->cr_portovflcnt) +
1421 ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
1422 ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
1423 ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
1424 ipath_snap_cntr(dd, crp->cr_erricrccnt) +
1425 ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
1426 ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
1427 ipath_snap_cntr(dd, crp->cr_badformatcnt) +
1428 dd->ipath_rxfc_unsupvl_errs;
1429 if (crp->cr_rxotherlocalphyerrcnt)
1430 cntrs->port_rcv_errors +=
1431 ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
1432 if (crp->cr_rxvlerrcnt)
1433 cntrs->port_rcv_errors +=
1434 ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
1435 cntrs->port_rcv_remphys_errors =
1436 ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
1437 cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
1438 cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
1439 cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
1440 cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
1441 cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
1442 cntrs->local_link_integrity_errors =
1443 crp->cr_locallinkintegrityerrcnt ?
1444 ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
1445 ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
1446 dd->ipath_lli_errs : dd->ipath_lli_errors);
1447 cntrs->excessive_buffer_overrun_errors =
1448 crp->cr_excessbufferovflcnt ?
1449 ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
1450 dd->ipath_overrun_thresh_errs;
1451 cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
1452 ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
1453
1454 ret = 0;
1455
1456bail:
1457 return ret;
1458}
1459
1460/**
1461 * ipath_ib_piobufavail - callback when a PIO buffer is available
1462 * @arg: the device pointer
1463 *
1464 * This is called from ipath_intr() at interrupt level when a PIO buffer is
1465 * available after ipath_verbs_send() returned an error that no buffers were
1466 * available. Return 1 if we consumed all the PIO buffers and we still have
1467 * QPs waiting for buffers (for now, just restart the send tasklet and
1468 * return zero).
1469 */
1470int ipath_ib_piobufavail(struct ipath_ibdev *dev)
1471{
1472 struct list_head *list;
1473 struct ipath_qp *qplist;
1474 struct ipath_qp *qp;
1475 unsigned long flags;
1476
1477 if (dev == NULL)
1478 goto bail;
1479
1480 list = &dev->piowait;
1481 qplist = NULL;
1482
1483 spin_lock_irqsave(&dev->pending_lock, flags);
1484 while (!list_empty(list)) {
1485 qp = list_entry(list->next, struct ipath_qp, piowait);
1486 list_del_init(&qp->piowait);
1487 qp->pio_next = qplist;
1488 qplist = qp;
1489 atomic_inc(&qp->refcount);
1490 }
1491 spin_unlock_irqrestore(&dev->pending_lock, flags);
1492
1493 while (qplist != NULL) {
1494 qp = qplist;
1495 qplist = qp->pio_next;
1496
1497 spin_lock_irqsave(&qp->s_lock, flags);
1498 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
1499 ipath_schedule_send(qp);
1500 spin_unlock_irqrestore(&qp->s_lock, flags);
1501
1502 /* Notify ipath_destroy_qp() if it is waiting. */
1503 if (atomic_dec_and_test(&qp->refcount))
1504 wake_up(&qp->wait);
1505 }
1506
1507bail:
1508 return 0;
1509}
1510
1511static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
1512 struct ib_udata *uhw)
1513{
1514 struct ipath_ibdev *dev = to_idev(ibdev);
1515
1516 if (uhw->inlen || uhw->outlen)
1517 return -EINVAL;
1518
1519 memset(props, 0, sizeof(*props));
1520
1521 props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1522 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1523 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1524 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1525 props->page_size_cap = PAGE_SIZE;
1526 props->vendor_id =
1527 IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
1528 props->vendor_part_id = dev->dd->ipath_deviceid;
1529 props->hw_ver = dev->dd->ipath_pcirev;
1530
1531 props->sys_image_guid = dev->sys_image_guid;
1532
1533 props->max_mr_size = ~0ull;
1534 props->max_qp = ib_ipath_max_qps;
1535 props->max_qp_wr = ib_ipath_max_qp_wrs;
1536 props->max_sge = ib_ipath_max_sges;
1537 props->max_sge_rd = ib_ipath_max_sges;
1538 props->max_cq = ib_ipath_max_cqs;
1539 props->max_ah = ib_ipath_max_ahs;
1540 props->max_cqe = ib_ipath_max_cqes;
1541 props->max_mr = dev->lk_table.max;
1542 props->max_fmr = dev->lk_table.max;
1543 props->max_map_per_fmr = 32767;
1544 props->max_pd = ib_ipath_max_pds;
1545 props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
1546 props->max_qp_init_rd_atom = 255;
1547 /* props->max_res_rd_atom */
1548 props->max_srq = ib_ipath_max_srqs;
1549 props->max_srq_wr = ib_ipath_max_srq_wrs;
1550 props->max_srq_sge = ib_ipath_max_srq_sges;
1551 /* props->local_ca_ack_delay */
1552 props->atomic_cap = IB_ATOMIC_GLOB;
1553 props->max_pkeys = ipath_get_npkeys(dev->dd);
1554 props->max_mcast_grp = ib_ipath_max_mcast_grps;
1555 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
1556 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
1557 props->max_mcast_grp;
1558
1559 return 0;
1560}
1561
1562const u8 ipath_cvt_physportstate[32] = {
1563 [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
1564 [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
1565 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
1566 [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
1567 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
1568 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
1569 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
1570 IB_PHYSPORTSTATE_CFG_TRAIN,
1571 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
1572 IB_PHYSPORTSTATE_CFG_TRAIN,
1573 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
1574 IB_PHYSPORTSTATE_CFG_TRAIN,
1575 [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
1576 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
1577 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1578 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
1579 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1580 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
1581 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1582 [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
1583 [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
1584 [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
1585 [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
1586 [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
1587 [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
1588 [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
1589 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
1590};
1591
1592u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
1593{
1594 return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
1595}
1596
1597static int ipath_query_port(struct ib_device *ibdev,
1598 u8 port, struct ib_port_attr *props)
1599{
1600 struct ipath_ibdev *dev = to_idev(ibdev);
1601 struct ipath_devdata *dd = dev->dd;
1602 enum ib_mtu mtu;
1603 u16 lid = dd->ipath_lid;
1604 u64 ibcstat;
1605
1606 memset(props, 0, sizeof(*props));
1607 props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1608 props->lmc = dd->ipath_lmc;
1609 props->sm_lid = dev->sm_lid;
1610 props->sm_sl = dev->sm_sl;
1611 ibcstat = dd->ipath_lastibcstat;
1612 /* map LinkState to IB portinfo values. */
1613 props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
1614
1615 /* See phys_state_show() */
1616 props->phys_state = /* MEA: assumes shift == 0 */
1617 ipath_cvt_physportstate[dd->ipath_lastibcstat &
1618 dd->ibcs_lts_mask];
1619 props->port_cap_flags = dev->port_cap_flags;
1620 props->gid_tbl_len = 1;
1621 props->max_msg_sz = 0x80000000;
1622 props->pkey_tbl_len = ipath_get_npkeys(dd);
1623 props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
1624 dev->z_pkey_violations;
1625 props->qkey_viol_cntr = dev->qkey_violations;
1626 props->active_width = dd->ipath_link_width_active;
1627 /* See rate_show() */
1628 props->active_speed = dd->ipath_link_speed_active;
1629 props->max_vl_num = 1; /* VLCap = VL0 */
1630 props->init_type_reply = 0;
1631
1632 props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
1633 switch (dd->ipath_ibmtu) {
1634 case 4096:
1635 mtu = IB_MTU_4096;
1636 break;
1637 case 2048:
1638 mtu = IB_MTU_2048;
1639 break;
1640 case 1024:
1641 mtu = IB_MTU_1024;
1642 break;
1643 case 512:
1644 mtu = IB_MTU_512;
1645 break;
1646 case 256:
1647 mtu = IB_MTU_256;
1648 break;
1649 default:
1650 mtu = IB_MTU_2048;
1651 }
1652 props->active_mtu = mtu;
1653 props->subnet_timeout = dev->subnet_timeout;
1654
1655 return 0;
1656}
1657
1658static int ipath_modify_device(struct ib_device *device,
1659 int device_modify_mask,
1660 struct ib_device_modify *device_modify)
1661{
1662 int ret;
1663
1664 if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1665 IB_DEVICE_MODIFY_NODE_DESC)) {
1666 ret = -EOPNOTSUPP;
1667 goto bail;
1668 }
1669
1670 if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
1671 memcpy(device->node_desc, device_modify->node_desc, 64);
1672
1673 if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
1674 to_idev(device)->sys_image_guid =
1675 cpu_to_be64(device_modify->sys_image_guid);
1676
1677 ret = 0;
1678
1679bail:
1680 return ret;
1681}
1682
1683static int ipath_modify_port(struct ib_device *ibdev,
1684 u8 port, int port_modify_mask,
1685 struct ib_port_modify *props)
1686{
1687 struct ipath_ibdev *dev = to_idev(ibdev);
1688
1689 dev->port_cap_flags |= props->set_port_cap_mask;
1690 dev->port_cap_flags &= ~props->clr_port_cap_mask;
1691 if (port_modify_mask & IB_PORT_SHUTDOWN)
1692 ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
1693 if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
1694 dev->qkey_violations = 0;
1695 return 0;
1696}
1697
1698static int ipath_query_gid(struct ib_device *ibdev, u8 port,
1699 int index, union ib_gid *gid)
1700{
1701 struct ipath_ibdev *dev = to_idev(ibdev);
1702 int ret;
1703
1704 if (index >= 1) {
1705 ret = -EINVAL;
1706 goto bail;
1707 }
1708 gid->global.subnet_prefix = dev->gid_prefix;
1709 gid->global.interface_id = dev->dd->ipath_guid;
1710
1711 ret = 0;
1712
1713bail:
1714 return ret;
1715}
1716
1717static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
1718 struct ib_ucontext *context,
1719 struct ib_udata *udata)
1720{
1721 struct ipath_ibdev *dev = to_idev(ibdev);
1722 struct ipath_pd *pd;
1723 struct ib_pd *ret;
1724
1725 /*
1726 * This is actually totally arbitrary. Some correctness tests
1727 * assume there's a maximum number of PDs that can be allocated.
1728 * We don't actually have this limit, but we fail the test if
1729 * we allow allocations of more than we report for this value.
1730 */
1731
1732 pd = kmalloc(sizeof *pd, GFP_KERNEL);
1733 if (!pd) {
1734 ret = ERR_PTR(-ENOMEM);
1735 goto bail;
1736 }
1737
1738 spin_lock(&dev->n_pds_lock);
1739 if (dev->n_pds_allocated == ib_ipath_max_pds) {
1740 spin_unlock(&dev->n_pds_lock);
1741 kfree(pd);
1742 ret = ERR_PTR(-ENOMEM);
1743 goto bail;
1744 }
1745
1746 dev->n_pds_allocated++;
1747 spin_unlock(&dev->n_pds_lock);
1748
1749 /* ib_alloc_pd() will initialize pd->ibpd. */
1750 pd->user = udata != NULL;
1751
1752 ret = &pd->ibpd;
1753
1754bail:
1755 return ret;
1756}
1757
1758static int ipath_dealloc_pd(struct ib_pd *ibpd)
1759{
1760 struct ipath_pd *pd = to_ipd(ibpd);
1761 struct ipath_ibdev *dev = to_idev(ibpd->device);
1762
1763 spin_lock(&dev->n_pds_lock);
1764 dev->n_pds_allocated--;
1765 spin_unlock(&dev->n_pds_lock);
1766
1767 kfree(pd);
1768
1769 return 0;
1770}
1771
1772/**
1773 * ipath_create_ah - create an address handle
1774 * @pd: the protection domain
1775 * @ah_attr: the attributes of the AH
1776 *
1777 * This may be called from interrupt context.
1778 */
1779static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
1780 struct ib_ah_attr *ah_attr)
1781{
1782 struct ipath_ah *ah;
1783 struct ib_ah *ret;
1784 struct ipath_ibdev *dev = to_idev(pd->device);
1785 unsigned long flags;
1786
1787 /* A multicast address requires a GRH (see ch. 8.4.1). */
1788 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
1789 ah_attr->dlid != IPATH_PERMISSIVE_LID &&
1790 !(ah_attr->ah_flags & IB_AH_GRH)) {
1791 ret = ERR_PTR(-EINVAL);
1792 goto bail;
1793 }
1794
1795 if (ah_attr->dlid == 0) {
1796 ret = ERR_PTR(-EINVAL);
1797 goto bail;
1798 }
1799
1800 if (ah_attr->port_num < 1 ||
1801 ah_attr->port_num > pd->device->phys_port_cnt) {
1802 ret = ERR_PTR(-EINVAL);
1803 goto bail;
1804 }
1805
1806 ah = kmalloc(sizeof *ah, GFP_ATOMIC);
1807 if (!ah) {
1808 ret = ERR_PTR(-ENOMEM);
1809 goto bail;
1810 }
1811
1812 spin_lock_irqsave(&dev->n_ahs_lock, flags);
1813 if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
1814 spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1815 kfree(ah);
1816 ret = ERR_PTR(-ENOMEM);
1817 goto bail;
1818 }
1819
1820 dev->n_ahs_allocated++;
1821 spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1822
1823 /* ib_create_ah() will initialize ah->ibah. */
1824 ah->attr = *ah_attr;
1825 ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
1826
1827 ret = &ah->ibah;
1828
1829bail:
1830 return ret;
1831}
1832
1833/**
1834 * ipath_destroy_ah - destroy an address handle
1835 * @ibah: the AH to destroy
1836 *
1837 * This may be called from interrupt context.
1838 */
1839static int ipath_destroy_ah(struct ib_ah *ibah)
1840{
1841 struct ipath_ibdev *dev = to_idev(ibah->device);
1842 struct ipath_ah *ah = to_iah(ibah);
1843 unsigned long flags;
1844
1845 spin_lock_irqsave(&dev->n_ahs_lock, flags);
1846 dev->n_ahs_allocated--;
1847 spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1848
1849 kfree(ah);
1850
1851 return 0;
1852}
1853
1854static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
1855{
1856 struct ipath_ah *ah = to_iah(ibah);
1857
1858 *ah_attr = ah->attr;
1859 ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
1860
1861 return 0;
1862}
1863
1864/**
1865 * ipath_get_npkeys - return the size of the PKEY table for port 0
1866 * @dd: the infinipath device
1867 */
1868unsigned ipath_get_npkeys(struct ipath_devdata *dd)
1869{
1870 return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1871}
1872
1873/**
1874 * ipath_get_pkey - return the indexed PKEY from the port PKEY table
1875 * @dd: the infinipath device
1876 * @index: the PKEY index
1877 */
1878unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
1879{
1880 unsigned ret;
1881
1882 /* always a kernel port, no locking needed */
1883 if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1884 ret = 0;
1885 else
1886 ret = dd->ipath_pd[0]->port_pkeys[index];
1887
1888 return ret;
1889}
1890
1891static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1892 u16 *pkey)
1893{
1894 struct ipath_ibdev *dev = to_idev(ibdev);
1895 int ret;
1896
1897 if (index >= ipath_get_npkeys(dev->dd)) {
1898 ret = -EINVAL;
1899 goto bail;
1900 }
1901
1902 *pkey = ipath_get_pkey(dev->dd, index);
1903 ret = 0;
1904
1905bail:
1906 return ret;
1907}
1908
1909/**
1910 * ipath_alloc_ucontext - allocate a ucontest
1911 * @ibdev: the infiniband device
1912 * @udata: not used by the InfiniPath driver
1913 */
1914
1915static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
1916 struct ib_udata *udata)
1917{
1918 struct ipath_ucontext *context;
1919 struct ib_ucontext *ret;
1920
1921 context = kmalloc(sizeof *context, GFP_KERNEL);
1922 if (!context) {
1923 ret = ERR_PTR(-ENOMEM);
1924 goto bail;
1925 }
1926
1927 ret = &context->ibucontext;
1928
1929bail:
1930 return ret;
1931}
1932
1933static int ipath_dealloc_ucontext(struct ib_ucontext *context)
1934{
1935 kfree(to_iucontext(context));
1936 return 0;
1937}
1938
1939static int ipath_verbs_register_sysfs(struct ib_device *dev);
1940
1941static void __verbs_timer(unsigned long arg)
1942{
1943 struct ipath_devdata *dd = (struct ipath_devdata *) arg;
1944
1945 /* Handle verbs layer timeouts. */
1946 ipath_ib_timer(dd->verbs_dev);
1947
1948 mod_timer(&dd->verbs_timer, jiffies + 1);
1949}
1950
1951static int enable_timer(struct ipath_devdata *dd)
1952{
1953 /*
1954 * Early chips had a design flaw where the chip and kernel idea
1955 * of the tail register don't always agree, and therefore we won't
1956 * get an interrupt on the next packet received.
1957 * If the board supports per packet receive interrupts, use it.
1958 * Otherwise, the timer function periodically checks for packets
1959 * to cover this case.
1960 * Either way, the timer is needed for verbs layer related
1961 * processing.
1962 */
1963 if (dd->ipath_flags & IPATH_GPIO_INTR) {
1964 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1965 0x2074076542310ULL);
1966 /* Enable GPIO bit 2 interrupt */
1967 dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
1968 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1969 dd->ipath_gpio_mask);
1970 }
1971
1972 setup_timer(&dd->verbs_timer, __verbs_timer, (unsigned long)dd);
1973
1974 dd->verbs_timer.expires = jiffies + 1;
1975 add_timer(&dd->verbs_timer);
1976
1977 return 0;
1978}
1979
1980static int disable_timer(struct ipath_devdata *dd)
1981{
1982 /* Disable GPIO bit 2 interrupt */
1983 if (dd->ipath_flags & IPATH_GPIO_INTR) {
1984 /* Disable GPIO bit 2 interrupt */
1985 dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
1986 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1987 dd->ipath_gpio_mask);
1988 /*
1989 * We might want to undo changes to debugportselect,
1990 * but how?
1991 */
1992 }
1993
1994 del_timer_sync(&dd->verbs_timer);
1995
1996 return 0;
1997}
1998
1999static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num,
2000 struct ib_port_immutable *immutable)
2001{
2002 struct ib_port_attr attr;
2003 int err;
2004
2005 err = ipath_query_port(ibdev, port_num, &attr);
2006 if (err)
2007 return err;
2008
2009 immutable->pkey_tbl_len = attr.pkey_tbl_len;
2010 immutable->gid_tbl_len = attr.gid_tbl_len;
2011 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2012 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2013
2014 return 0;
2015}
2016
2017/**
2018 * ipath_register_ib_device - register our device with the infiniband core
2019 * @dd: the device data structure
2020 * Return the allocated ipath_ibdev pointer or NULL on error.
2021 */
2022int ipath_register_ib_device(struct ipath_devdata *dd)
2023{
2024 struct ipath_verbs_counters cntrs;
2025 struct ipath_ibdev *idev;
2026 struct ib_device *dev;
2027 struct ipath_verbs_txreq *tx;
2028 unsigned i;
2029 int ret;
2030
2031 idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
2032 if (idev == NULL) {
2033 ret = -ENOMEM;
2034 goto bail;
2035 }
2036
2037 dev = &idev->ibdev;
2038
2039 if (dd->ipath_sdma_descq_cnt) {
2040 tx = kmalloc_array(dd->ipath_sdma_descq_cnt, sizeof *tx,
2041 GFP_KERNEL);
2042 if (tx == NULL) {
2043 ret = -ENOMEM;
2044 goto err_tx;
2045 }
2046 } else
2047 tx = NULL;
2048 idev->txreq_bufs = tx;
2049
2050 /* Only need to initialize non-zero fields. */
2051 spin_lock_init(&idev->n_pds_lock);
2052 spin_lock_init(&idev->n_ahs_lock);
2053 spin_lock_init(&idev->n_cqs_lock);
2054 spin_lock_init(&idev->n_qps_lock);
2055 spin_lock_init(&idev->n_srqs_lock);
2056 spin_lock_init(&idev->n_mcast_grps_lock);
2057
2058 spin_lock_init(&idev->qp_table.lock);
2059 spin_lock_init(&idev->lk_table.lock);
2060 idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
2061 /* Set the prefix to the default value (see ch. 4.1.1) */
2062 idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL);
2063
2064 ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
2065 if (ret)
2066 goto err_qp;
2067
2068 /*
2069 * The top ib_ipath_lkey_table_size bits are used to index the
2070 * table. The lower 8 bits can be owned by the user (copied from
2071 * the LKEY). The remaining bits act as a generation number or tag.
2072 */
2073 idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
2074 idev->lk_table.table = kcalloc(idev->lk_table.max,
2075 sizeof(*idev->lk_table.table),
2076 GFP_KERNEL);
2077 if (idev->lk_table.table == NULL) {
2078 ret = -ENOMEM;
2079 goto err_lk;
2080 }
2081 INIT_LIST_HEAD(&idev->pending_mmaps);
2082 spin_lock_init(&idev->pending_lock);
2083 idev->mmap_offset = PAGE_SIZE;
2084 spin_lock_init(&idev->mmap_offset_lock);
2085 INIT_LIST_HEAD(&idev->pending[0]);
2086 INIT_LIST_HEAD(&idev->pending[1]);
2087 INIT_LIST_HEAD(&idev->pending[2]);
2088 INIT_LIST_HEAD(&idev->piowait);
2089 INIT_LIST_HEAD(&idev->rnrwait);
2090 INIT_LIST_HEAD(&idev->txreq_free);
2091 idev->pending_index = 0;
2092 idev->port_cap_flags =
2093 IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
2094 if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
2095 idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
2096 idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
2097 idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
2098 idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
2099 idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
2100 idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
2101
2102 /* Snapshot current HW counters to "clear" them. */
2103 ipath_get_counters(dd, &cntrs);
2104 idev->z_symbol_error_counter = cntrs.symbol_error_counter;
2105 idev->z_link_error_recovery_counter =
2106 cntrs.link_error_recovery_counter;
2107 idev->z_link_downed_counter = cntrs.link_downed_counter;
2108 idev->z_port_rcv_errors = cntrs.port_rcv_errors;
2109 idev->z_port_rcv_remphys_errors =
2110 cntrs.port_rcv_remphys_errors;
2111 idev->z_port_xmit_discards = cntrs.port_xmit_discards;
2112 idev->z_port_xmit_data = cntrs.port_xmit_data;
2113 idev->z_port_rcv_data = cntrs.port_rcv_data;
2114 idev->z_port_xmit_packets = cntrs.port_xmit_packets;
2115 idev->z_port_rcv_packets = cntrs.port_rcv_packets;
2116 idev->z_local_link_integrity_errors =
2117 cntrs.local_link_integrity_errors;
2118 idev->z_excessive_buffer_overrun_errors =
2119 cntrs.excessive_buffer_overrun_errors;
2120 idev->z_vl15_dropped = cntrs.vl15_dropped;
2121
2122 for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
2123 list_add(&tx->txreq.list, &idev->txreq_free);
2124
2125 /*
2126 * The system image GUID is supposed to be the same for all
2127 * IB HCAs in a single system but since there can be other
2128 * device types in the system, we can't be sure this is unique.
2129 */
2130 if (!sys_image_guid)
2131 sys_image_guid = dd->ipath_guid;
2132 idev->sys_image_guid = sys_image_guid;
2133 idev->ib_unit = dd->ipath_unit;
2134 idev->dd = dd;
2135
2136 strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
2137 dev->owner = THIS_MODULE;
2138 dev->node_guid = dd->ipath_guid;
2139 dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
2140 dev->uverbs_cmd_mask =
2141 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2142 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2143 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2144 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2145 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2146 (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
2147 (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
2148 (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
2149 (1ull << IB_USER_VERBS_CMD_REG_MR) |
2150 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2151 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2152 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2153 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
2154 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2155 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
2156 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
2157 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2158 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2159 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2160 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2161 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
2162 (1ull << IB_USER_VERBS_CMD_POST_RECV) |
2163 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
2164 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
2165 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
2166 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
2167 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
2168 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
2169 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
2170 dev->node_type = RDMA_NODE_IB_CA;
2171 dev->phys_port_cnt = 1;
2172 dev->num_comp_vectors = 1;
2173 dev->dma_device = &dd->pcidev->dev;
2174 dev->query_device = ipath_query_device;
2175 dev->modify_device = ipath_modify_device;
2176 dev->query_port = ipath_query_port;
2177 dev->modify_port = ipath_modify_port;
2178 dev->query_pkey = ipath_query_pkey;
2179 dev->query_gid = ipath_query_gid;
2180 dev->alloc_ucontext = ipath_alloc_ucontext;
2181 dev->dealloc_ucontext = ipath_dealloc_ucontext;
2182 dev->alloc_pd = ipath_alloc_pd;
2183 dev->dealloc_pd = ipath_dealloc_pd;
2184 dev->create_ah = ipath_create_ah;
2185 dev->destroy_ah = ipath_destroy_ah;
2186 dev->query_ah = ipath_query_ah;
2187 dev->create_srq = ipath_create_srq;
2188 dev->modify_srq = ipath_modify_srq;
2189 dev->query_srq = ipath_query_srq;
2190 dev->destroy_srq = ipath_destroy_srq;
2191 dev->create_qp = ipath_create_qp;
2192 dev->modify_qp = ipath_modify_qp;
2193 dev->query_qp = ipath_query_qp;
2194 dev->destroy_qp = ipath_destroy_qp;
2195 dev->post_send = ipath_post_send;
2196 dev->post_recv = ipath_post_receive;
2197 dev->post_srq_recv = ipath_post_srq_receive;
2198 dev->create_cq = ipath_create_cq;
2199 dev->destroy_cq = ipath_destroy_cq;
2200 dev->resize_cq = ipath_resize_cq;
2201 dev->poll_cq = ipath_poll_cq;
2202 dev->req_notify_cq = ipath_req_notify_cq;
2203 dev->get_dma_mr = ipath_get_dma_mr;
2204 dev->reg_user_mr = ipath_reg_user_mr;
2205 dev->dereg_mr = ipath_dereg_mr;
2206 dev->alloc_fmr = ipath_alloc_fmr;
2207 dev->map_phys_fmr = ipath_map_phys_fmr;
2208 dev->unmap_fmr = ipath_unmap_fmr;
2209 dev->dealloc_fmr = ipath_dealloc_fmr;
2210 dev->attach_mcast = ipath_multicast_attach;
2211 dev->detach_mcast = ipath_multicast_detach;
2212 dev->process_mad = ipath_process_mad;
2213 dev->mmap = ipath_mmap;
2214 dev->dma_ops = &ipath_dma_mapping_ops;
2215 dev->get_port_immutable = ipath_port_immutable;
2216
2217 snprintf(dev->node_desc, sizeof(dev->node_desc),
2218 IPATH_IDSTR " %s", init_utsname()->nodename);
2219
2220 ret = ib_register_device(dev, NULL);
2221 if (ret)
2222 goto err_reg;
2223
2224 ret = ipath_verbs_register_sysfs(dev);
2225 if (ret)
2226 goto err_class;
2227
2228 enable_timer(dd);
2229
2230 goto bail;
2231
2232err_class:
2233 ib_unregister_device(dev);
2234err_reg:
2235 kfree(idev->lk_table.table);
2236err_lk:
2237 kfree(idev->qp_table.table);
2238err_qp:
2239 kfree(idev->txreq_bufs);
2240err_tx:
2241 ib_dealloc_device(dev);
2242 ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
2243 idev = NULL;
2244
2245bail:
2246 dd->verbs_dev = idev;
2247 return ret;
2248}
2249
2250void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2251{
2252 struct ib_device *ibdev = &dev->ibdev;
2253 u32 qps_inuse;
2254
2255 ib_unregister_device(ibdev);
2256
2257 disable_timer(dev->dd);
2258
2259 if (!list_empty(&dev->pending[0]) ||
2260 !list_empty(&dev->pending[1]) ||
2261 !list_empty(&dev->pending[2]))
2262 ipath_dev_err(dev->dd, "pending list not empty!\n");
2263 if (!list_empty(&dev->piowait))
2264 ipath_dev_err(dev->dd, "piowait list not empty!\n");
2265 if (!list_empty(&dev->rnrwait))
2266 ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
2267 if (!ipath_mcast_tree_empty())
2268 ipath_dev_err(dev->dd, "multicast table memory leak!\n");
2269 /*
2270 * Note that ipath_unregister_ib_device() can be called before all
2271 * the QPs are destroyed!
2272 */
2273 qps_inuse = ipath_free_all_qps(&dev->qp_table);
2274 if (qps_inuse)
2275 ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
2276 qps_inuse);
2277 kfree(dev->qp_table.table);
2278 kfree(dev->lk_table.table);
2279 kfree(dev->txreq_bufs);
2280 ib_dealloc_device(ibdev);
2281}
2282
2283static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2284 char *buf)
2285{
2286 struct ipath_ibdev *dev =
2287 container_of(device, struct ipath_ibdev, ibdev.dev);
2288
2289 return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
2290}
2291
2292static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2293 char *buf)
2294{
2295 struct ipath_ibdev *dev =
2296 container_of(device, struct ipath_ibdev, ibdev.dev);
2297 int ret;
2298
2299 ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
2300 if (ret < 0)
2301 goto bail;
2302 strcat(buf, "\n");
2303 ret = strlen(buf);
2304
2305bail:
2306 return ret;
2307}
2308
2309static ssize_t show_stats(struct device *device, struct device_attribute *attr,
2310 char *buf)
2311{
2312 struct ipath_ibdev *dev =
2313 container_of(device, struct ipath_ibdev, ibdev.dev);
2314 int i;
2315 int len;
2316
2317 len = sprintf(buf,
2318 "RC resends %d\n"
2319 "RC no QACK %d\n"
2320 "RC ACKs %d\n"
2321 "RC SEQ NAKs %d\n"
2322 "RC RDMA seq %d\n"
2323 "RC RNR NAKs %d\n"
2324 "RC OTH NAKs %d\n"
2325 "RC timeouts %d\n"
2326 "RC RDMA dup %d\n"
2327 "piobuf wait %d\n"
2328 "unaligned %d\n"
2329 "PKT drops %d\n"
2330 "WQE errs %d\n",
2331 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
2332 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
2333 dev->n_other_naks, dev->n_timeouts,
2334 dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
2335 dev->n_pkt_drops, dev->n_wqe_errs);
2336 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
2337 const struct ipath_opcode_stats *si = &dev->opstats[i];
2338
2339 if (!si->n_packets && !si->n_bytes)
2340 continue;
2341 len += sprintf(buf + len, "%02x %llu/%llu\n", i,
2342 (unsigned long long) si->n_packets,
2343 (unsigned long long) si->n_bytes);
2344 }
2345 return len;
2346}
2347
2348static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2349static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2350static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
2351static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
2352
2353static struct device_attribute *ipath_class_attributes[] = {
2354 &dev_attr_hw_rev,
2355 &dev_attr_hca_type,
2356 &dev_attr_board_id,
2357 &dev_attr_stats
2358};
2359
2360static int ipath_verbs_register_sysfs(struct ib_device *dev)
2361{
2362 int i;
2363 int ret;
2364
2365 for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
2366 ret = device_create_file(&dev->dev,
2367 ipath_class_attributes[i]);
2368 if (ret)
2369 goto bail;
2370 }
2371 return 0;
2372bail:
2373 for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
2374 device_remove_file(&dev->dev, ipath_class_attributes[i]);
2375 return ret;
2376}
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
deleted file mode 100644
index 6c70a89667a9..000000000000
--- a/drivers/staging/rdma/ipath/ipath_verbs.h
+++ /dev/null
@@ -1,941 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef IPATH_VERBS_H
35#define IPATH_VERBS_H
36
37#include <linux/types.h>
38#include <linux/spinlock.h>
39#include <linux/kernel.h>
40#include <linux/interrupt.h>
41#include <linux/kref.h>
42#include <rdma/ib_pack.h>
43#include <rdma/ib_user_verbs.h>
44
45#include "ipath_kernel.h"
46
47#define IPATH_MAX_RDMA_ATOMIC 4
48
49#define QPN_MAX (1 << 24)
50#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
51
52/*
53 * Increment this value if any changes that break userspace ABI
54 * compatibility are made.
55 */
56#define IPATH_UVERBS_ABI_VERSION 2
57
58/*
59 * Define an ib_cq_notify value that is not valid so we know when CQ
60 * notifications are armed.
61 */
62#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
63
64/* AETH NAK opcode values */
65#define IB_RNR_NAK 0x20
66#define IB_NAK_PSN_ERROR 0x60
67#define IB_NAK_INVALID_REQUEST 0x61
68#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
69#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
70#define IB_NAK_INVALID_RD_REQUEST 0x64
71
72/* Flags for checking QP state (see ib_ipath_state_ops[]) */
73#define IPATH_POST_SEND_OK 0x01
74#define IPATH_POST_RECV_OK 0x02
75#define IPATH_PROCESS_RECV_OK 0x04
76#define IPATH_PROCESS_SEND_OK 0x08
77#define IPATH_PROCESS_NEXT_SEND_OK 0x10
78#define IPATH_FLUSH_SEND 0x20
79#define IPATH_FLUSH_RECV 0x40
80#define IPATH_PROCESS_OR_FLUSH_SEND \
81 (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
82
83/* IB Performance Manager status values */
84#define IB_PMA_SAMPLE_STATUS_DONE 0x00
85#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
86#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02
87
88/* Mandatory IB performance counter select values. */
89#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001)
90#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002)
91#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003)
92#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004)
93#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005)
94
95struct ib_reth {
96 __be64 vaddr;
97 __be32 rkey;
98 __be32 length;
99} __attribute__ ((packed));
100
101struct ib_atomic_eth {
102 __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
103 __be32 rkey;
104 __be64 swap_data;
105 __be64 compare_data;
106} __attribute__ ((packed));
107
108struct ipath_other_headers {
109 __be32 bth[3];
110 union {
111 struct {
112 __be32 deth[2];
113 __be32 imm_data;
114 } ud;
115 struct {
116 struct ib_reth reth;
117 __be32 imm_data;
118 } rc;
119 struct {
120 __be32 aeth;
121 __be32 atomic_ack_eth[2];
122 } at;
123 __be32 imm_data;
124 __be32 aeth;
125 struct ib_atomic_eth atomic_eth;
126 } u;
127} __attribute__ ((packed));
128
129/*
130 * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
131 * long (72 w/ imm_data). Only the first 56 bytes of the IB header
132 * will be in the eager header buffer. The remaining 12 or 16 bytes
133 * are in the data buffer.
134 */
135struct ipath_ib_header {
136 __be16 lrh[4];
137 union {
138 struct {
139 struct ib_grh grh;
140 struct ipath_other_headers oth;
141 } l;
142 struct ipath_other_headers oth;
143 } u;
144} __attribute__ ((packed));
145
146struct ipath_pio_header {
147 __le32 pbc[2];
148 struct ipath_ib_header hdr;
149} __attribute__ ((packed));
150
151/*
152 * There is one struct ipath_mcast for each multicast GID.
153 * All attached QPs are then stored as a list of
154 * struct ipath_mcast_qp.
155 */
156struct ipath_mcast_qp {
157 struct list_head list;
158 struct ipath_qp *qp;
159};
160
161struct ipath_mcast {
162 struct rb_node rb_node;
163 union ib_gid mgid;
164 struct list_head qp_list;
165 wait_queue_head_t wait;
166 atomic_t refcount;
167 int n_attached;
168};
169
170/* Protection domain */
171struct ipath_pd {
172 struct ib_pd ibpd;
173 int user; /* non-zero if created from user space */
174};
175
176/* Address Handle */
177struct ipath_ah {
178 struct ib_ah ibah;
179 struct ib_ah_attr attr;
180};
181
182/*
183 * This structure is used by ipath_mmap() to validate an offset
184 * when an mmap() request is made. The vm_area_struct then uses
185 * this as its vm_private_data.
186 */
187struct ipath_mmap_info {
188 struct list_head pending_mmaps;
189 struct ib_ucontext *context;
190 void *obj;
191 __u64 offset;
192 struct kref ref;
193 unsigned size;
194};
195
196/*
197 * This structure is used to contain the head pointer, tail pointer,
198 * and completion queue entries as a single memory allocation so
199 * it can be mmap'ed into user space.
200 */
201struct ipath_cq_wc {
202 u32 head; /* index of next entry to fill */
203 u32 tail; /* index of next ib_poll_cq() entry */
204 union {
205 /* these are actually size ibcq.cqe + 1 */
206 struct ib_uverbs_wc uqueue[0];
207 struct ib_wc kqueue[0];
208 };
209};
210
211/*
212 * The completion queue structure.
213 */
214struct ipath_cq {
215 struct ib_cq ibcq;
216 struct tasklet_struct comptask;
217 spinlock_t lock;
218 u8 notify;
219 u8 triggered;
220 struct ipath_cq_wc *queue;
221 struct ipath_mmap_info *ip;
222};
223
224/*
225 * A segment is a linear region of low physical memory.
226 * XXX Maybe we should use phys addr here and kmap()/kunmap().
227 * Used by the verbs layer.
228 */
229struct ipath_seg {
230 void *vaddr;
231 size_t length;
232};
233
234/* The number of ipath_segs that fit in a page. */
235#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
236
237struct ipath_segarray {
238 struct ipath_seg segs[IPATH_SEGSZ];
239};
240
241struct ipath_mregion {
242 struct ib_pd *pd; /* shares refcnt of ibmr.pd */
243 u64 user_base; /* User's address for this region */
244 u64 iova; /* IB start address of this region */
245 size_t length;
246 u32 lkey;
247 u32 offset; /* offset (bytes) to start of region */
248 int access_flags;
249 u32 max_segs; /* number of ipath_segs in all the arrays */
250 u32 mapsz; /* size of the map array */
251 struct ipath_segarray *map[0]; /* the segments */
252};
253
254/*
255 * These keep track of the copy progress within a memory region.
256 * Used by the verbs layer.
257 */
258struct ipath_sge {
259 struct ipath_mregion *mr;
260 void *vaddr; /* kernel virtual address of segment */
261 u32 sge_length; /* length of the SGE */
262 u32 length; /* remaining length of the segment */
263 u16 m; /* current index: mr->map[m] */
264 u16 n; /* current index: mr->map[m]->segs[n] */
265};
266
267/* Memory region */
268struct ipath_mr {
269 struct ib_mr ibmr;
270 struct ib_umem *umem;
271 struct ipath_mregion mr; /* must be last */
272};
273
274/*
275 * Send work request queue entry.
276 * The size of the sg_list is determined when the QP is created and stored
277 * in qp->s_max_sge.
278 */
279struct ipath_swqe {
280 union {
281 struct ib_send_wr wr; /* don't use wr.sg_list */
282 struct ib_ud_wr ud_wr;
283 struct ib_rdma_wr rdma_wr;
284 struct ib_atomic_wr atomic_wr;
285 };
286
287 u32 psn; /* first packet sequence number */
288 u32 lpsn; /* last packet sequence number */
289 u32 ssn; /* send sequence number */
290 u32 length; /* total length of data in sg_list */
291 struct ipath_sge sg_list[0];
292};
293
294/*
295 * Receive work request queue entry.
296 * The size of the sg_list is determined when the QP (or SRQ) is created
297 * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
298 */
299struct ipath_rwqe {
300 u64 wr_id;
301 u8 num_sge;
302 struct ib_sge sg_list[0];
303};
304
305/*
306 * This structure is used to contain the head pointer, tail pointer,
307 * and receive work queue entries as a single memory allocation so
308 * it can be mmap'ed into user space.
309 * Note that the wq array elements are variable size so you can't
310 * just index into the array to get the N'th element;
311 * use get_rwqe_ptr() instead.
312 */
313struct ipath_rwq {
314 u32 head; /* new work requests posted to the head */
315 u32 tail; /* receives pull requests from here. */
316 struct ipath_rwqe wq[0];
317};
318
319struct ipath_rq {
320 struct ipath_rwq *wq;
321 spinlock_t lock;
322 u32 size; /* size of RWQE array */
323 u8 max_sge;
324};
325
326struct ipath_srq {
327 struct ib_srq ibsrq;
328 struct ipath_rq rq;
329 struct ipath_mmap_info *ip;
330 /* send signal when number of RWQEs < limit */
331 u32 limit;
332};
333
334struct ipath_sge_state {
335 struct ipath_sge *sg_list; /* next SGE to be used if any */
336 struct ipath_sge sge; /* progress state for the current SGE */
337 u8 num_sge;
338 u8 static_rate;
339};
340
341/*
342 * This structure holds the information that the send tasklet needs
343 * to send a RDMA read response or atomic operation.
344 */
345struct ipath_ack_entry {
346 u8 opcode;
347 u8 sent;
348 u32 psn;
349 union {
350 struct ipath_sge_state rdma_sge;
351 u64 atomic_data;
352 };
353};
354
355/*
356 * Variables prefixed with s_ are for the requester (sender).
357 * Variables prefixed with r_ are for the responder (receiver).
358 * Variables prefixed with ack_ are for responder replies.
359 *
360 * Common variables are protected by both r_rq.lock and s_lock in that order
361 * which only happens in modify_qp() or changing the QP 'state'.
362 */
363struct ipath_qp {
364 struct ib_qp ibqp;
365 struct ipath_qp *next; /* link list for QPN hash table */
366 struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */
367 struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */
368 struct list_head piowait; /* link for wait PIO buf */
369 struct list_head timerwait; /* link for waiting for timeouts */
370 struct ib_ah_attr remote_ah_attr;
371 struct ipath_ib_header s_hdr; /* next packet header to send */
372 atomic_t refcount;
373 wait_queue_head_t wait;
374 wait_queue_head_t wait_dma;
375 struct tasklet_struct s_task;
376 struct ipath_mmap_info *ip;
377 struct ipath_sge_state *s_cur_sge;
378 struct ipath_verbs_txreq *s_tx;
379 struct ipath_sge_state s_sge; /* current send request data */
380 struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
381 struct ipath_sge_state s_ack_rdma_sge;
382 struct ipath_sge_state s_rdma_read_sge;
383 struct ipath_sge_state r_sge; /* current receive data */
384 spinlock_t s_lock;
385 atomic_t s_dma_busy;
386 u16 s_pkt_delay;
387 u16 s_hdrwords; /* size of s_hdr in 32 bit words */
388 u32 s_cur_size; /* size of send packet in bytes */
389 u32 s_len; /* total length of s_sge */
390 u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
391 u32 s_next_psn; /* PSN for next request */
392 u32 s_last_psn; /* last response PSN processed */
393 u32 s_psn; /* current packet sequence number */
394 u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
395 u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
396 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
397 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
398 u64 r_wr_id; /* ID for current receive WQE */
399 unsigned long r_aflags;
400 u32 r_len; /* total length of r_sge */
401 u32 r_rcv_len; /* receive data len processed */
402 u32 r_psn; /* expected rcv packet sequence number */
403 u32 r_msn; /* message sequence number */
404 u8 state; /* QP state */
405 u8 s_state; /* opcode of last packet sent */
406 u8 s_ack_state; /* opcode of packet to ACK */
407 u8 s_nak_state; /* non-zero if NAK is pending */
408 u8 r_state; /* opcode of last packet received */
409 u8 r_nak_state; /* non-zero if NAK is pending */
410 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
411 u8 r_flags;
412 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
413 u8 r_head_ack_queue; /* index into s_ack_queue[] */
414 u8 qp_access_flags;
415 u8 s_max_sge; /* size of s_wq->sg_list */
416 u8 s_retry_cnt; /* number of times to retry */
417 u8 s_rnr_retry_cnt;
418 u8 s_retry; /* requester retry counter */
419 u8 s_rnr_retry; /* requester RNR retry counter */
420 u8 s_pkey_index; /* PKEY index to use */
421 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
422 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
423 u8 s_tail_ack_queue; /* index into s_ack_queue[] */
424 u8 s_flags;
425 u8 s_dmult;
426 u8 s_draining;
427 u8 timeout; /* Timeout for this QP */
428 enum ib_mtu path_mtu;
429 u32 remote_qpn;
430 u32 qkey; /* QKEY for this QP (for UD or RD) */
431 u32 s_size; /* send work queue size */
432 u32 s_head; /* new entries added here */
433 u32 s_tail; /* next entry to process */
434 u32 s_cur; /* current work queue entry */
435 u32 s_last; /* last un-ACK'ed entry */
436 u32 s_ssn; /* SSN of tail entry */
437 u32 s_lsn; /* limit sequence number (credit) */
438 struct ipath_swqe *s_wq; /* send work queue */
439 struct ipath_swqe *s_wqe;
440 struct ipath_sge *r_ud_sg_list;
441 struct ipath_rq r_rq; /* receive work queue */
442 struct ipath_sge r_sg_list[0]; /* verified SGEs */
443};
444
445/*
446 * Atomic bit definitions for r_aflags.
447 */
448#define IPATH_R_WRID_VALID 0
449
450/*
451 * Bit definitions for r_flags.
452 */
453#define IPATH_R_REUSE_SGE 0x01
454#define IPATH_R_RDMAR_SEQ 0x02
455
456/*
457 * Bit definitions for s_flags.
458 *
459 * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
460 * before processing the next SWQE
461 * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
462 * before processing the next SWQE
463 * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
464 * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
465 * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
466 * next send completion entry not via send DMA.
467 */
468#define IPATH_S_SIGNAL_REQ_WR 0x01
469#define IPATH_S_FENCE_PENDING 0x02
470#define IPATH_S_RDMAR_PENDING 0x04
471#define IPATH_S_ACK_PENDING 0x08
472#define IPATH_S_BUSY 0x10
473#define IPATH_S_WAITING 0x20
474#define IPATH_S_WAIT_SSN_CREDIT 0x40
475#define IPATH_S_WAIT_DMA 0x80
476
477#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
478 IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
479
480#define IPATH_PSN_CREDIT 512
481
482/*
483 * Since struct ipath_swqe is not a fixed size, we can't simply index into
484 * struct ipath_qp.s_wq. This function does the array index computation.
485 */
486static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
487 unsigned n)
488{
489 return (struct ipath_swqe *)((char *)qp->s_wq +
490 (sizeof(struct ipath_swqe) +
491 qp->s_max_sge *
492 sizeof(struct ipath_sge)) * n);
493}
494
495/*
496 * Since struct ipath_rwqe is not a fixed size, we can't simply index into
497 * struct ipath_rwq.wq. This function does the array index computation.
498 */
499static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
500 unsigned n)
501{
502 return (struct ipath_rwqe *)
503 ((char *) rq->wq->wq +
504 (sizeof(struct ipath_rwqe) +
505 rq->max_sge * sizeof(struct ib_sge)) * n);
506}
507
508/*
509 * QPN-map pages start out as NULL, they get allocated upon
510 * first use and are never deallocated. This way,
511 * large bitmaps are not allocated unless large numbers of QPs are used.
512 */
513struct qpn_map {
514 atomic_t n_free;
515 void *page;
516};
517
518struct ipath_qp_table {
519 spinlock_t lock;
520 u32 last; /* last QP number allocated */
521 u32 max; /* size of the hash table */
522 u32 nmaps; /* size of the map table */
523 struct ipath_qp **table;
524 /* bit map of free numbers */
525 struct qpn_map map[QPNMAP_ENTRIES];
526};
527
528struct ipath_lkey_table {
529 spinlock_t lock;
530 u32 next; /* next unused index (speeds search) */
531 u32 gen; /* generation count */
532 u32 max; /* size of the table */
533 struct ipath_mregion **table;
534};
535
536struct ipath_opcode_stats {
537 u64 n_packets; /* number of packets */
538 u64 n_bytes; /* total number of bytes */
539};
540
541struct ipath_ibdev {
542 struct ib_device ibdev;
543 struct ipath_devdata *dd;
544 struct list_head pending_mmaps;
545 spinlock_t mmap_offset_lock;
546 u32 mmap_offset;
547 int ib_unit; /* This is the device number */
548 u16 sm_lid; /* in host order */
549 u8 sm_sl;
550 u8 mkeyprot;
551 /* non-zero when timer is set */
552 unsigned long mkey_lease_timeout;
553
554 /* The following fields are really per port. */
555 struct ipath_qp_table qp_table;
556 struct ipath_lkey_table lk_table;
557 struct list_head pending[3]; /* FIFO of QPs waiting for ACKs */
558 struct list_head piowait; /* list for wait PIO buf */
559 struct list_head txreq_free;
560 void *txreq_bufs;
561 /* list of QPs waiting for RNR timer */
562 struct list_head rnrwait;
563 spinlock_t pending_lock;
564 __be64 sys_image_guid; /* in network order */
565 __be64 gid_prefix; /* in network order */
566 __be64 mkey;
567
568 u32 n_pds_allocated; /* number of PDs allocated for device */
569 spinlock_t n_pds_lock;
570 u32 n_ahs_allocated; /* number of AHs allocated for device */
571 spinlock_t n_ahs_lock;
572 u32 n_cqs_allocated; /* number of CQs allocated for device */
573 spinlock_t n_cqs_lock;
574 u32 n_qps_allocated; /* number of QPs allocated for device */
575 spinlock_t n_qps_lock;
576 u32 n_srqs_allocated; /* number of SRQs allocated for device */
577 spinlock_t n_srqs_lock;
578 u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
579 spinlock_t n_mcast_grps_lock;
580
581 u64 ipath_sword; /* total dwords sent (sample result) */
582 u64 ipath_rword; /* total dwords received (sample result) */
583 u64 ipath_spkts; /* total packets sent (sample result) */
584 u64 ipath_rpkts; /* total packets received (sample result) */
585 /* # of ticks no data sent (sample result) */
586 u64 ipath_xmit_wait;
587 u64 rcv_errors; /* # of packets with SW detected rcv errs */
588 u64 n_unicast_xmit; /* total unicast packets sent */
589 u64 n_unicast_rcv; /* total unicast packets received */
590 u64 n_multicast_xmit; /* total multicast packets sent */
591 u64 n_multicast_rcv; /* total multicast packets received */
592 u64 z_symbol_error_counter; /* starting count for PMA */
593 u64 z_link_error_recovery_counter; /* starting count for PMA */
594 u64 z_link_downed_counter; /* starting count for PMA */
595 u64 z_port_rcv_errors; /* starting count for PMA */
596 u64 z_port_rcv_remphys_errors; /* starting count for PMA */
597 u64 z_port_xmit_discards; /* starting count for PMA */
598 u64 z_port_xmit_data; /* starting count for PMA */
599 u64 z_port_rcv_data; /* starting count for PMA */
600 u64 z_port_xmit_packets; /* starting count for PMA */
601 u64 z_port_rcv_packets; /* starting count for PMA */
602 u32 z_pkey_violations; /* starting count for PMA */
603 u32 z_local_link_integrity_errors; /* starting count for PMA */
604 u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
605 u32 z_vl15_dropped; /* starting count for PMA */
606 u32 n_rc_resends;
607 u32 n_rc_acks;
608 u32 n_rc_qacks;
609 u32 n_seq_naks;
610 u32 n_rdma_seq;
611 u32 n_rnr_naks;
612 u32 n_other_naks;
613 u32 n_timeouts;
614 u32 n_pkt_drops;
615 u32 n_vl15_dropped;
616 u32 n_wqe_errs;
617 u32 n_rdma_dup_busy;
618 u32 n_piowait;
619 u32 n_unaligned;
620 u32 port_cap_flags;
621 u32 pma_sample_start;
622 u32 pma_sample_interval;
623 __be16 pma_counter_select[5];
624 u16 pma_tag;
625 u16 qkey_violations;
626 u16 mkey_violations;
627 u16 mkey_lease_period;
628 u16 pending_index; /* which pending queue is active */
629 u8 pma_sample_status;
630 u8 subnet_timeout;
631 u8 vl_high_limit;
632 struct ipath_opcode_stats opstats[128];
633};
634
635struct ipath_verbs_counters {
636 u64 symbol_error_counter;
637 u64 link_error_recovery_counter;
638 u64 link_downed_counter;
639 u64 port_rcv_errors;
640 u64 port_rcv_remphys_errors;
641 u64 port_xmit_discards;
642 u64 port_xmit_data;
643 u64 port_rcv_data;
644 u64 port_xmit_packets;
645 u64 port_rcv_packets;
646 u32 local_link_integrity_errors;
647 u32 excessive_buffer_overrun_errors;
648 u32 vl15_dropped;
649};
650
651struct ipath_verbs_txreq {
652 struct ipath_qp *qp;
653 struct ipath_swqe *wqe;
654 u32 map_len;
655 u32 len;
656 struct ipath_sge_state *ss;
657 struct ipath_pio_header hdr;
658 struct ipath_sdma_txreq txreq;
659};
660
661static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
662{
663 return container_of(ibmr, struct ipath_mr, ibmr);
664}
665
666static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
667{
668 return container_of(ibpd, struct ipath_pd, ibpd);
669}
670
671static inline struct ipath_ah *to_iah(struct ib_ah *ibah)
672{
673 return container_of(ibah, struct ipath_ah, ibah);
674}
675
676static inline struct ipath_cq *to_icq(struct ib_cq *ibcq)
677{
678 return container_of(ibcq, struct ipath_cq, ibcq);
679}
680
681static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq)
682{
683 return container_of(ibsrq, struct ipath_srq, ibsrq);
684}
685
686static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp)
687{
688 return container_of(ibqp, struct ipath_qp, ibqp);
689}
690
691static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
692{
693 return container_of(ibdev, struct ipath_ibdev, ibdev);
694}
695
696/*
697 * This must be called with s_lock held.
698 */
699static inline void ipath_schedule_send(struct ipath_qp *qp)
700{
701 if (qp->s_flags & IPATH_S_ANY_WAIT)
702 qp->s_flags &= ~IPATH_S_ANY_WAIT;
703 if (!(qp->s_flags & IPATH_S_BUSY))
704 tasklet_hi_schedule(&qp->s_task);
705}
706
707int ipath_process_mad(struct ib_device *ibdev,
708 int mad_flags,
709 u8 port_num,
710 const struct ib_wc *in_wc,
711 const struct ib_grh *in_grh,
712 const struct ib_mad_hdr *in, size_t in_mad_size,
713 struct ib_mad_hdr *out, size_t *out_mad_size,
714 u16 *out_mad_pkey_index);
715
716/*
717 * Compare the lower 24 bits of the two values.
718 * Returns an integer <, ==, or > than zero.
719 */
720static inline int ipath_cmp24(u32 a, u32 b)
721{
722 return (((int) a) - ((int) b)) << 8;
723}
724
725struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
726
727int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
728 u64 *rwords, u64 *spkts, u64 *rpkts,
729 u64 *xmit_wait);
730
731int ipath_get_counters(struct ipath_devdata *dd,
732 struct ipath_verbs_counters *cntrs);
733
734int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
735
736int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
737
738int ipath_mcast_tree_empty(void);
739
740__be32 ipath_compute_aeth(struct ipath_qp *qp);
741
742struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn);
743
744struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
745 struct ib_qp_init_attr *init_attr,
746 struct ib_udata *udata);
747
748int ipath_destroy_qp(struct ib_qp *ibqp);
749
750int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
751
752int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
753 int attr_mask, struct ib_udata *udata);
754
755int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
756 int attr_mask, struct ib_qp_init_attr *init_attr);
757
758unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
759
760int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
761
762void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
763
764unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
765
766int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
767 u32 hdrwords, struct ipath_sge_state *ss, u32 len);
768
769void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
770
771void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
772
773void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
774 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
775
776void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
777 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
778
779void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
780
781void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
782
783int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
784
785void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
786 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
787
788int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
789 struct ipath_mregion *mr);
790
791void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
792
793int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
794 struct ib_sge *sge, int acc);
795
796int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
797 u32 len, u64 vaddr, u32 rkey, int acc);
798
799int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
800 struct ib_recv_wr **bad_wr);
801
802struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
803 struct ib_srq_init_attr *srq_init_attr,
804 struct ib_udata *udata);
805
806int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
807 enum ib_srq_attr_mask attr_mask,
808 struct ib_udata *udata);
809
810int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
811
812int ipath_destroy_srq(struct ib_srq *ibsrq);
813
814void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
815
816int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
817
818struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
819 const struct ib_cq_init_attr *attr,
820 struct ib_ucontext *context,
821 struct ib_udata *udata);
822
823int ipath_destroy_cq(struct ib_cq *ibcq);
824
825int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
826
827int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
828
829struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
830
831struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
832 u64 virt_addr, int mr_access_flags,
833 struct ib_udata *udata);
834
835int ipath_dereg_mr(struct ib_mr *ibmr);
836
837struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
838 struct ib_fmr_attr *fmr_attr);
839
840int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
841 int list_len, u64 iova);
842
843int ipath_unmap_fmr(struct list_head *fmr_list);
844
845int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
846
847void ipath_release_mmap_info(struct kref *ref);
848
849struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
850 u32 size,
851 struct ib_ucontext *context,
852 void *obj);
853
854void ipath_update_mmap_info(struct ipath_ibdev *dev,
855 struct ipath_mmap_info *ip,
856 u32 size, void *obj);
857
858int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
859
860void ipath_insert_rnr_queue(struct ipath_qp *qp);
861
862int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
863 u32 *lengthp, struct ipath_sge_state *ss);
864
865int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
866
867u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
868 struct ib_global_route *grh, u32 hwords, u32 nwords);
869
870void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
871 struct ipath_other_headers *ohdr,
872 u32 bth0, u32 bth2);
873
874void ipath_do_send(unsigned long data);
875
876void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
877 enum ib_wc_status status);
878
879int ipath_make_rc_req(struct ipath_qp *qp);
880
881int ipath_make_uc_req(struct ipath_qp *qp);
882
883int ipath_make_ud_req(struct ipath_qp *qp);
884
885int ipath_register_ib_device(struct ipath_devdata *);
886
887void ipath_unregister_ib_device(struct ipath_ibdev *);
888
889void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
890
891int ipath_ib_piobufavail(struct ipath_ibdev *);
892
893unsigned ipath_get_npkeys(struct ipath_devdata *);
894
895u32 ipath_get_cr_errpkey(struct ipath_devdata *);
896
897unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
898
899extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
900
901/*
902 * Below converts HCA-specific LinkTrainingState to IB PhysPortState
903 * values.
904 */
905extern const u8 ipath_cvt_physportstate[];
906#define IB_PHYSPORTSTATE_SLEEP 1
907#define IB_PHYSPORTSTATE_POLL 2
908#define IB_PHYSPORTSTATE_DISABLED 3
909#define IB_PHYSPORTSTATE_CFG_TRAIN 4
910#define IB_PHYSPORTSTATE_LINKUP 5
911#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
912
913extern const int ib_ipath_state_ops[];
914
915extern unsigned int ib_ipath_lkey_table_size;
916
917extern unsigned int ib_ipath_max_cqes;
918
919extern unsigned int ib_ipath_max_cqs;
920
921extern unsigned int ib_ipath_max_qp_wrs;
922
923extern unsigned int ib_ipath_max_qps;
924
925extern unsigned int ib_ipath_max_sges;
926
927extern unsigned int ib_ipath_max_mcast_grps;
928
929extern unsigned int ib_ipath_max_mcast_qp_attached;
930
931extern unsigned int ib_ipath_max_srqs;
932
933extern unsigned int ib_ipath_max_srq_sges;
934
935extern unsigned int ib_ipath_max_srq_wrs;
936
937extern const u32 ib_ipath_rnr_table[];
938
939extern struct ib_dma_mapping_ops ipath_dma_mapping_ops;
940
941#endif /* IPATH_VERBS_H */
diff --git a/drivers/staging/rdma/ipath/ipath_verbs_mcast.c b/drivers/staging/rdma/ipath/ipath_verbs_mcast.c
deleted file mode 100644
index 72d476fa5b8f..000000000000
--- a/drivers/staging/rdma/ipath/ipath_verbs_mcast.c
+++ /dev/null
@@ -1,363 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/rculist.h>
35#include <linux/slab.h>
36
37#include "ipath_verbs.h"
38
39/*
40 * Global table of GID to attached QPs.
41 * The table is global to all ipath devices since a send from one QP/device
42 * needs to be locally routed to any locally attached QPs on the same
43 * or different device.
44 */
45static struct rb_root mcast_tree;
46static DEFINE_SPINLOCK(mcast_lock);
47
48/**
49 * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
50 * @qp: the QP to link
51 */
52static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
53{
54 struct ipath_mcast_qp *mqp;
55
56 mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
57 if (!mqp)
58 goto bail;
59
60 mqp->qp = qp;
61 atomic_inc(&qp->refcount);
62
63bail:
64 return mqp;
65}
66
67static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
68{
69 struct ipath_qp *qp = mqp->qp;
70
71 /* Notify ipath_destroy_qp() if it is waiting. */
72 if (atomic_dec_and_test(&qp->refcount))
73 wake_up(&qp->wait);
74
75 kfree(mqp);
76}
77
78/**
79 * ipath_mcast_alloc - allocate the multicast GID structure
80 * @mgid: the multicast GID
81 *
82 * A list of QPs will be attached to this structure.
83 */
84static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
85{
86 struct ipath_mcast *mcast;
87
88 mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
89 if (!mcast)
90 goto bail;
91
92 mcast->mgid = *mgid;
93 INIT_LIST_HEAD(&mcast->qp_list);
94 init_waitqueue_head(&mcast->wait);
95 atomic_set(&mcast->refcount, 0);
96 mcast->n_attached = 0;
97
98bail:
99 return mcast;
100}
101
102static void ipath_mcast_free(struct ipath_mcast *mcast)
103{
104 struct ipath_mcast_qp *p, *tmp;
105
106 list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
107 ipath_mcast_qp_free(p);
108
109 kfree(mcast);
110}
111
112/**
113 * ipath_mcast_find - search the global table for the given multicast GID
114 * @mgid: the multicast GID to search for
115 *
116 * Returns NULL if not found.
117 *
118 * The caller is responsible for decrementing the reference count if found.
119 */
120struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
121{
122 struct rb_node *n;
123 unsigned long flags;
124 struct ipath_mcast *mcast;
125
126 spin_lock_irqsave(&mcast_lock, flags);
127 n = mcast_tree.rb_node;
128 while (n) {
129 int ret;
130
131 mcast = rb_entry(n, struct ipath_mcast, rb_node);
132
133 ret = memcmp(mgid->raw, mcast->mgid.raw,
134 sizeof(union ib_gid));
135 if (ret < 0)
136 n = n->rb_left;
137 else if (ret > 0)
138 n = n->rb_right;
139 else {
140 atomic_inc(&mcast->refcount);
141 spin_unlock_irqrestore(&mcast_lock, flags);
142 goto bail;
143 }
144 }
145 spin_unlock_irqrestore(&mcast_lock, flags);
146
147 mcast = NULL;
148
149bail:
150 return mcast;
151}
152
153/**
154 * ipath_mcast_add - insert mcast GID into table and attach QP struct
155 * @mcast: the mcast GID table
156 * @mqp: the QP to attach
157 *
158 * Return zero if both were added. Return EEXIST if the GID was already in
159 * the table but the QP was added. Return ESRCH if the QP was already
160 * attached and neither structure was added.
161 */
162static int ipath_mcast_add(struct ipath_ibdev *dev,
163 struct ipath_mcast *mcast,
164 struct ipath_mcast_qp *mqp)
165{
166 struct rb_node **n = &mcast_tree.rb_node;
167 struct rb_node *pn = NULL;
168 int ret;
169
170 spin_lock_irq(&mcast_lock);
171
172 while (*n) {
173 struct ipath_mcast *tmcast;
174 struct ipath_mcast_qp *p;
175
176 pn = *n;
177 tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
178
179 ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
180 sizeof(union ib_gid));
181 if (ret < 0) {
182 n = &pn->rb_left;
183 continue;
184 }
185 if (ret > 0) {
186 n = &pn->rb_right;
187 continue;
188 }
189
190 /* Search the QP list to see if this is already there. */
191 list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
192 if (p->qp == mqp->qp) {
193 ret = ESRCH;
194 goto bail;
195 }
196 }
197 if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) {
198 ret = ENOMEM;
199 goto bail;
200 }
201
202 tmcast->n_attached++;
203
204 list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
205 ret = EEXIST;
206 goto bail;
207 }
208
209 spin_lock(&dev->n_mcast_grps_lock);
210 if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
211 spin_unlock(&dev->n_mcast_grps_lock);
212 ret = ENOMEM;
213 goto bail;
214 }
215
216 dev->n_mcast_grps_allocated++;
217 spin_unlock(&dev->n_mcast_grps_lock);
218
219 mcast->n_attached++;
220
221 list_add_tail_rcu(&mqp->list, &mcast->qp_list);
222
223 atomic_inc(&mcast->refcount);
224 rb_link_node(&mcast->rb_node, pn, n);
225 rb_insert_color(&mcast->rb_node, &mcast_tree);
226
227 ret = 0;
228
229bail:
230 spin_unlock_irq(&mcast_lock);
231
232 return ret;
233}
234
235int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
236{
237 struct ipath_qp *qp = to_iqp(ibqp);
238 struct ipath_ibdev *dev = to_idev(ibqp->device);
239 struct ipath_mcast *mcast;
240 struct ipath_mcast_qp *mqp;
241 int ret;
242
243 /*
244 * Allocate data structures since its better to do this outside of
245 * spin locks and it will most likely be needed.
246 */
247 mcast = ipath_mcast_alloc(gid);
248 if (mcast == NULL) {
249 ret = -ENOMEM;
250 goto bail;
251 }
252 mqp = ipath_mcast_qp_alloc(qp);
253 if (mqp == NULL) {
254 ipath_mcast_free(mcast);
255 ret = -ENOMEM;
256 goto bail;
257 }
258 switch (ipath_mcast_add(dev, mcast, mqp)) {
259 case ESRCH:
260 /* Neither was used: can't attach the same QP twice. */
261 ipath_mcast_qp_free(mqp);
262 ipath_mcast_free(mcast);
263 ret = -EINVAL;
264 goto bail;
265 case EEXIST: /* The mcast wasn't used */
266 ipath_mcast_free(mcast);
267 break;
268 case ENOMEM:
269 /* Exceeded the maximum number of mcast groups. */
270 ipath_mcast_qp_free(mqp);
271 ipath_mcast_free(mcast);
272 ret = -ENOMEM;
273 goto bail;
274 default:
275 break;
276 }
277
278 ret = 0;
279
280bail:
281 return ret;
282}
283
284int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
285{
286 struct ipath_qp *qp = to_iqp(ibqp);
287 struct ipath_ibdev *dev = to_idev(ibqp->device);
288 struct ipath_mcast *mcast = NULL;
289 struct ipath_mcast_qp *p, *tmp;
290 struct rb_node *n;
291 int last = 0;
292 int ret;
293
294 spin_lock_irq(&mcast_lock);
295
296 /* Find the GID in the mcast table. */
297 n = mcast_tree.rb_node;
298 while (1) {
299 if (n == NULL) {
300 spin_unlock_irq(&mcast_lock);
301 ret = -EINVAL;
302 goto bail;
303 }
304
305 mcast = rb_entry(n, struct ipath_mcast, rb_node);
306 ret = memcmp(gid->raw, mcast->mgid.raw,
307 sizeof(union ib_gid));
308 if (ret < 0)
309 n = n->rb_left;
310 else if (ret > 0)
311 n = n->rb_right;
312 else
313 break;
314 }
315
316 /* Search the QP list. */
317 list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
318 if (p->qp != qp)
319 continue;
320 /*
321 * We found it, so remove it, but don't poison the forward
322 * link until we are sure there are no list walkers.
323 */
324 list_del_rcu(&p->list);
325 mcast->n_attached--;
326
327 /* If this was the last attached QP, remove the GID too. */
328 if (list_empty(&mcast->qp_list)) {
329 rb_erase(&mcast->rb_node, &mcast_tree);
330 last = 1;
331 }
332 break;
333 }
334
335 spin_unlock_irq(&mcast_lock);
336
337 if (p) {
338 /*
339 * Wait for any list walkers to finish before freeing the
340 * list element.
341 */
342 wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
343 ipath_mcast_qp_free(p);
344 }
345 if (last) {
346 atomic_dec(&mcast->refcount);
347 wait_event(mcast->wait, !atomic_read(&mcast->refcount));
348 ipath_mcast_free(mcast);
349 spin_lock_irq(&dev->n_mcast_grps_lock);
350 dev->n_mcast_grps_allocated--;
351 spin_unlock_irq(&dev->n_mcast_grps_lock);
352 }
353
354 ret = 0;
355
356bail:
357 return ret;
358}
359
360int ipath_mcast_tree_empty(void)
361{
362 return mcast_tree.rb_node == NULL;
363}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_ppc64.c b/drivers/staging/rdma/ipath/ipath_wc_ppc64.c
deleted file mode 100644
index 1a7e20a75149..000000000000
--- a/drivers/staging/rdma/ipath/ipath_wc_ppc64.c
+++ /dev/null
@@ -1,49 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33/*
34 * This file is conditionally built on PowerPC only. Otherwise weak symbol
35 * versions of the functions exported from here are used.
36 */
37
38#include "ipath_kernel.h"
39
40/**
41 * ipath_enable_wc - enable write combining for MMIO writes to the device
42 * @dd: infinipath device
43 *
44 * Nothing to do on PowerPC, so just return without error.
45 */
46int ipath_enable_wc(struct ipath_devdata *dd)
47{
48 return 0;
49}
diff --git a/drivers/staging/rdma/ipath/ipath_wc_x86_64.c b/drivers/staging/rdma/ipath/ipath_wc_x86_64.c
deleted file mode 100644
index 7b6e4c843e19..000000000000
--- a/drivers/staging/rdma/ipath/ipath_wc_x86_64.c
+++ /dev/null
@@ -1,144 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34/*
35 * This file is conditionally built on x86_64 only. Otherwise weak symbol
36 * versions of the functions exported from here are used.
37 */
38
39#include <linux/pci.h>
40#include <asm/processor.h>
41
42#include "ipath_kernel.h"
43
44/**
45 * ipath_enable_wc - enable write combining for MMIO writes to the device
46 * @dd: infinipath device
47 *
48 * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
49 * write combining.
50 */
51int ipath_enable_wc(struct ipath_devdata *dd)
52{
53 int ret = 0;
54 u64 pioaddr, piolen;
55 unsigned bits;
56 const unsigned long addr = pci_resource_start(dd->pcidev, 0);
57 const size_t len = pci_resource_len(dd->pcidev, 0);
58
59 /*
60 * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
61 * chip. Linux (possibly the hardware) requires it to be on a power
62 * of 2 address matching the length (which has to be a power of 2).
63 * For rev1, that means the base address, for rev2, it will be just
64 * the PIO buffers themselves.
65 * For chips with two sets of buffers, the calculations are
66 * somewhat more complicated; we need to sum, and the piobufbase
67 * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
68 * The buffers are still packed, so a single range covers both.
69 */
70 if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
71 unsigned long pio2kbase, pio4kbase;
72 pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
73 pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
74 if (pio2kbase < pio4kbase) { /* all, for now */
75 pioaddr = addr + pio2kbase;
76 piolen = pio4kbase - pio2kbase +
77 dd->ipath_piobcnt4k * dd->ipath_4kalign;
78 } else {
79 pioaddr = addr + pio4kbase;
80 piolen = pio2kbase - pio4kbase +
81 dd->ipath_piobcnt2k * dd->ipath_palign;
82 }
83 } else { /* single buffer size (2K, currently) */
84 pioaddr = addr + dd->ipath_piobufbase;
85 piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
86 dd->ipath_piobcnt4k * dd->ipath_4kalign;
87 }
88
89 for (bits = 0; !(piolen & (1ULL << bits)); bits++)
90 /* do nothing */ ;
91
92 if (piolen != (1ULL << bits)) {
93 piolen >>= bits;
94 while (piolen >>= 1)
95 bits++;
96 piolen = 1ULL << (bits + 1);
97 }
98 if (pioaddr & (piolen - 1)) {
99 u64 atmp;
100 ipath_dbg("pioaddr %llx not on right boundary for size "
101 "%llx, fixing\n",
102 (unsigned long long) pioaddr,
103 (unsigned long long) piolen);
104 atmp = pioaddr & ~(piolen - 1);
105 if (atmp < addr || (atmp + piolen) > (addr + len)) {
106 ipath_dev_err(dd, "No way to align address/size "
107 "(%llx/%llx), no WC mtrr\n",
108 (unsigned long long) atmp,
109 (unsigned long long) piolen << 1);
110 ret = -ENODEV;
111 } else {
112 ipath_dbg("changing WC base from %llx to %llx, "
113 "len from %llx to %llx\n",
114 (unsigned long long) pioaddr,
115 (unsigned long long) atmp,
116 (unsigned long long) piolen,
117 (unsigned long long) piolen << 1);
118 pioaddr = atmp;
119 piolen <<= 1;
120 }
121 }
122
123 if (!ret) {
124 dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen);
125 if (dd->wc_cookie < 0) {
126 ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n");
127 ret = -ENODEV;
128 } else if (dd->wc_cookie == 0)
129 ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n");
130 else
131 ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n");
132 }
133
134 return ret;
135}
136
137/**
138 * ipath_disable_wc - disable write combining for MMIO writes to the device
139 * @dd: infinipath device
140 */
141void ipath_disable_wc(struct ipath_devdata *dd)
142{
143 arch_phys_wc_del(dd->wc_cookie);
144}