diff options
Diffstat (limited to 'drivers/infiniband')
61 files changed, 25497 insertions, 125 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index bdf0891a92dd..afc612b8577d 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig | |||
@@ -30,6 +30,7 @@ config INFINIBAND_USER_ACCESS | |||
30 | <http://www.openib.org>. | 30 | <http://www.openib.org>. |
31 | 31 | ||
32 | source "drivers/infiniband/hw/mthca/Kconfig" | 32 | source "drivers/infiniband/hw/mthca/Kconfig" |
33 | source "drivers/infiniband/hw/ipath/Kconfig" | ||
33 | 34 | ||
34 | source "drivers/infiniband/ulp/ipoib/Kconfig" | 35 | source "drivers/infiniband/ulp/ipoib/Kconfig" |
35 | 36 | ||
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index a43fb34cca94..eea27322a22d 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | obj-$(CONFIG_INFINIBAND) += core/ | 1 | obj-$(CONFIG_INFINIBAND) += core/ |
2 | obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ | 2 | obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ |
3 | obj-$(CONFIG_IPATH_CORE) += hw/ipath/ | ||
3 | obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ | 4 | obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ |
4 | obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ | 5 | obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ |
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index c57a3871184c..50364c0b090c 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c | |||
@@ -302,7 +302,7 @@ static void ib_cache_setup_one(struct ib_device *device) | |||
302 | kmalloc(sizeof *device->cache.pkey_cache * | 302 | kmalloc(sizeof *device->cache.pkey_cache * |
303 | (end_port(device) - start_port(device) + 1), GFP_KERNEL); | 303 | (end_port(device) - start_port(device) + 1), GFP_KERNEL); |
304 | device->cache.gid_cache = | 304 | device->cache.gid_cache = |
305 | kmalloc(sizeof *device->cache.pkey_cache * | 305 | kmalloc(sizeof *device->cache.gid_cache * |
306 | (end_port(device) - start_port(device) + 1), GFP_KERNEL); | 306 | (end_port(device) - start_port(device) + 1), GFP_KERNEL); |
307 | 307 | ||
308 | if (!device->cache.pkey_cache || !device->cache.gid_cache) { | 308 | if (!device->cache.pkey_cache || !device->cache.gid_cache) { |
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index ba54c856b0e5..469b6923a2e2 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c | |||
@@ -228,10 +228,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, | |||
228 | goto error1; | 228 | goto error1; |
229 | } | 229 | } |
230 | /* Make sure class supplied is consistent with RMPP */ | 230 | /* Make sure class supplied is consistent with RMPP */ |
231 | if (ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { | 231 | if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { |
232 | if (!rmpp_version) | ||
233 | goto error1; | ||
234 | } else { | ||
235 | if (rmpp_version) | 232 | if (rmpp_version) |
236 | goto error1; | 233 | goto error1; |
237 | } | 234 | } |
@@ -2311,6 +2308,7 @@ static void local_completions(void *data) | |||
2311 | local = list_entry(mad_agent_priv->local_list.next, | 2308 | local = list_entry(mad_agent_priv->local_list.next, |
2312 | struct ib_mad_local_private, | 2309 | struct ib_mad_local_private, |
2313 | completion_list); | 2310 | completion_list); |
2311 | list_del(&local->completion_list); | ||
2314 | spin_unlock_irqrestore(&mad_agent_priv->lock, flags); | 2312 | spin_unlock_irqrestore(&mad_agent_priv->lock, flags); |
2315 | if (local->mad_priv) { | 2313 | if (local->mad_priv) { |
2316 | recv_mad_agent = local->recv_mad_agent; | 2314 | recv_mad_agent = local->recv_mad_agent; |
@@ -2362,7 +2360,6 @@ local_send_completion: | |||
2362 | &mad_send_wc); | 2360 | &mad_send_wc); |
2363 | 2361 | ||
2364 | spin_lock_irqsave(&mad_agent_priv->lock, flags); | 2362 | spin_lock_irqsave(&mad_agent_priv->lock, flags); |
2365 | list_del(&local->completion_list); | ||
2366 | atomic_dec(&mad_agent_priv->refcount); | 2363 | atomic_dec(&mad_agent_priv->refcount); |
2367 | if (!recv) | 2364 | if (!recv) |
2368 | kmem_cache_free(ib_mad_cache, local->mad_priv); | 2365 | kmem_cache_free(ib_mad_cache, local->mad_priv); |
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index cae0845f472a..b78e7dc69330 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c | |||
@@ -45,6 +45,40 @@ | |||
45 | #include <rdma/ib_verbs.h> | 45 | #include <rdma/ib_verbs.h> |
46 | #include <rdma/ib_cache.h> | 46 | #include <rdma/ib_cache.h> |
47 | 47 | ||
48 | int ib_rate_to_mult(enum ib_rate rate) | ||
49 | { | ||
50 | switch (rate) { | ||
51 | case IB_RATE_2_5_GBPS: return 1; | ||
52 | case IB_RATE_5_GBPS: return 2; | ||
53 | case IB_RATE_10_GBPS: return 4; | ||
54 | case IB_RATE_20_GBPS: return 8; | ||
55 | case IB_RATE_30_GBPS: return 12; | ||
56 | case IB_RATE_40_GBPS: return 16; | ||
57 | case IB_RATE_60_GBPS: return 24; | ||
58 | case IB_RATE_80_GBPS: return 32; | ||
59 | case IB_RATE_120_GBPS: return 48; | ||
60 | default: return -1; | ||
61 | } | ||
62 | } | ||
63 | EXPORT_SYMBOL(ib_rate_to_mult); | ||
64 | |||
65 | enum ib_rate mult_to_ib_rate(int mult) | ||
66 | { | ||
67 | switch (mult) { | ||
68 | case 1: return IB_RATE_2_5_GBPS; | ||
69 | case 2: return IB_RATE_5_GBPS; | ||
70 | case 4: return IB_RATE_10_GBPS; | ||
71 | case 8: return IB_RATE_20_GBPS; | ||
72 | case 12: return IB_RATE_30_GBPS; | ||
73 | case 16: return IB_RATE_40_GBPS; | ||
74 | case 24: return IB_RATE_60_GBPS; | ||
75 | case 32: return IB_RATE_80_GBPS; | ||
76 | case 48: return IB_RATE_120_GBPS; | ||
77 | default: return IB_RATE_PORT_CURRENT; | ||
78 | } | ||
79 | } | ||
80 | EXPORT_SYMBOL(mult_to_ib_rate); | ||
81 | |||
48 | /* Protection domains */ | 82 | /* Protection domains */ |
49 | 83 | ||
50 | struct ib_pd *ib_alloc_pd(struct ib_device *device) | 84 | struct ib_pd *ib_alloc_pd(struct ib_device *device) |
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig new file mode 100644 index 000000000000..9ea67c409b6d --- /dev/null +++ b/drivers/infiniband/hw/ipath/Kconfig | |||
@@ -0,0 +1,16 @@ | |||
1 | config IPATH_CORE | ||
2 | tristate "PathScale InfiniPath Driver" | ||
3 | depends on 64BIT && PCI_MSI && NET | ||
4 | ---help--- | ||
5 | This is a low-level driver for PathScale InfiniPath host channel | ||
6 | adapters (HCAs) based on the HT-400 and PE-800 chips. | ||
7 | |||
8 | config INFINIBAND_IPATH | ||
9 | tristate "PathScale InfiniPath Verbs Driver" | ||
10 | depends on IPATH_CORE && INFINIBAND | ||
11 | ---help--- | ||
12 | This is a driver that provides InfiniBand verbs support for | ||
13 | PathScale InfiniPath host channel adapters (HCAs). This | ||
14 | allows these devices to be used with both kernel upper level | ||
15 | protocols such as IP-over-InfiniBand as well as with userspace | ||
16 | applications (in conjunction with InfiniBand userspace access). | ||
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile new file mode 100644 index 000000000000..b4d084abfd22 --- /dev/null +++ b/drivers/infiniband/hw/ipath/Makefile | |||
@@ -0,0 +1,36 @@ | |||
1 | EXTRA_CFLAGS += -DIPATH_IDSTR='"PathScale kernel.org driver"' \ | ||
2 | -DIPATH_KERN_TYPE=0 | ||
3 | |||
4 | obj-$(CONFIG_IPATH_CORE) += ipath_core.o | ||
5 | obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o | ||
6 | |||
7 | ipath_core-y := \ | ||
8 | ipath_diag.o \ | ||
9 | ipath_driver.o \ | ||
10 | ipath_eeprom.o \ | ||
11 | ipath_file_ops.o \ | ||
12 | ipath_fs.o \ | ||
13 | ipath_ht400.o \ | ||
14 | ipath_init_chip.o \ | ||
15 | ipath_intr.o \ | ||
16 | ipath_layer.o \ | ||
17 | ipath_pe800.o \ | ||
18 | ipath_stats.o \ | ||
19 | ipath_sysfs.o \ | ||
20 | ipath_user_pages.o | ||
21 | |||
22 | ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o | ||
23 | |||
24 | ib_ipath-y := \ | ||
25 | ipath_cq.o \ | ||
26 | ipath_keys.o \ | ||
27 | ipath_mad.o \ | ||
28 | ipath_mr.o \ | ||
29 | ipath_qp.o \ | ||
30 | ipath_rc.o \ | ||
31 | ipath_ruc.o \ | ||
32 | ipath_srq.o \ | ||
33 | ipath_uc.o \ | ||
34 | ipath_ud.o \ | ||
35 | ipath_verbs.o \ | ||
36 | ipath_verbs_mcast.o | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h new file mode 100644 index 000000000000..48a55247b832 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_common.h | |||
@@ -0,0 +1,616 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef _IPATH_COMMON_H | ||
34 | #define _IPATH_COMMON_H | ||
35 | |||
36 | /* | ||
37 | * This file contains defines, structures, etc. that are used | ||
38 | * to communicate between kernel and user code. | ||
39 | */ | ||
40 | |||
41 | /* This is the IEEE-assigned OUI for PathScale, Inc. */ | ||
42 | #define IPATH_SRC_OUI_1 0x00 | ||
43 | #define IPATH_SRC_OUI_2 0x11 | ||
44 | #define IPATH_SRC_OUI_3 0x75 | ||
45 | |||
46 | /* version of protocol header (known to chip also). In the long run, | ||
47 | * we should be able to generate and accept a range of version numbers; | ||
48 | * for now we only accept one, and it's compiled in. | ||
49 | */ | ||
50 | #define IPS_PROTO_VERSION 2 | ||
51 | |||
52 | /* | ||
53 | * These are compile time constants that you may want to enable or disable | ||
54 | * if you are trying to debug problems with code or performance. | ||
55 | * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in | ||
56 | * fastpath code | ||
57 | * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be | ||
58 | * traced in faspath code | ||
59 | * _IPATH_TRACING define as 0 if you want to remove all tracing in a | ||
60 | * compilation unit | ||
61 | * _IPATH_DEBUGGING define as 0 if you want to remove debug prints | ||
62 | */ | ||
63 | |||
64 | /* | ||
65 | * The value in the BTH QP field that InfiniPath uses to differentiate | ||
66 | * an infinipath protocol IB packet vs standard IB transport | ||
67 | */ | ||
68 | #define IPATH_KD_QP 0x656b79 | ||
69 | |||
70 | /* | ||
71 | * valid states passed to ipath_set_linkstate() user call | ||
72 | */ | ||
73 | #define IPATH_IB_LINKDOWN 0 | ||
74 | #define IPATH_IB_LINKARM 1 | ||
75 | #define IPATH_IB_LINKACTIVE 2 | ||
76 | #define IPATH_IB_LINKINIT 3 | ||
77 | #define IPATH_IB_LINKDOWN_SLEEP 4 | ||
78 | #define IPATH_IB_LINKDOWN_DISABLE 5 | ||
79 | |||
80 | /* | ||
81 | * stats maintained by the driver. For now, at least, this is global | ||
82 | * to all minor devices. | ||
83 | */ | ||
84 | struct infinipath_stats { | ||
85 | /* number of interrupts taken */ | ||
86 | __u64 sps_ints; | ||
87 | /* number of interrupts for errors */ | ||
88 | __u64 sps_errints; | ||
89 | /* number of errors from chip (not incl. packet errors or CRC) */ | ||
90 | __u64 sps_errs; | ||
91 | /* number of packet errors from chip other than CRC */ | ||
92 | __u64 sps_pkterrs; | ||
93 | /* number of packets with CRC errors (ICRC and VCRC) */ | ||
94 | __u64 sps_crcerrs; | ||
95 | /* number of hardware errors reported (parity, etc.) */ | ||
96 | __u64 sps_hwerrs; | ||
97 | /* number of times IB link changed state unexpectedly */ | ||
98 | __u64 sps_iblink; | ||
99 | /* no longer used; left for compatibility */ | ||
100 | __u64 sps_unused3; | ||
101 | /* number of kernel (port0) packets received */ | ||
102 | __u64 sps_port0pkts; | ||
103 | /* number of "ethernet" packets sent by driver */ | ||
104 | __u64 sps_ether_spkts; | ||
105 | /* number of "ethernet" packets received by driver */ | ||
106 | __u64 sps_ether_rpkts; | ||
107 | /* number of SMA packets sent by driver */ | ||
108 | __u64 sps_sma_spkts; | ||
109 | /* number of SMA packets received by driver */ | ||
110 | __u64 sps_sma_rpkts; | ||
111 | /* number of times all ports rcvhdrq was full and packet dropped */ | ||
112 | __u64 sps_hdrqfull; | ||
113 | /* number of times all ports egrtid was full and packet dropped */ | ||
114 | __u64 sps_etidfull; | ||
115 | /* | ||
116 | * number of times we tried to send from driver, but no pio buffers | ||
117 | * avail | ||
118 | */ | ||
119 | __u64 sps_nopiobufs; | ||
120 | /* number of ports currently open */ | ||
121 | __u64 sps_ports; | ||
122 | /* list of pkeys (other than default) accepted (0 means not set) */ | ||
123 | __u16 sps_pkeys[4]; | ||
124 | /* lids for up to 4 infinipaths, indexed by infinipath # */ | ||
125 | __u16 sps_lid[4]; | ||
126 | /* number of user ports per chip (not IB ports) */ | ||
127 | __u32 sps_nports; | ||
128 | /* not our interrupt, or already handled */ | ||
129 | __u32 sps_nullintr; | ||
130 | /* max number of packets handled per receive call */ | ||
131 | __u32 sps_maxpkts_call; | ||
132 | /* avg number of packets handled per receive call */ | ||
133 | __u32 sps_avgpkts_call; | ||
134 | /* total number of pages locked */ | ||
135 | __u64 sps_pagelocks; | ||
136 | /* total number of pages unlocked */ | ||
137 | __u64 sps_pageunlocks; | ||
138 | /* | ||
139 | * Number of packets dropped in kernel other than errors (ether | ||
140 | * packets if ipath not configured, sma/mad, etc.) | ||
141 | */ | ||
142 | __u64 sps_krdrops; | ||
143 | /* mlids for up to 4 infinipaths, indexed by infinipath # */ | ||
144 | __u16 sps_mlid[4]; | ||
145 | /* pad for future growth */ | ||
146 | __u64 __sps_pad[45]; | ||
147 | }; | ||
148 | |||
149 | /* | ||
150 | * These are the status bits readable (in ascii form, 64bit value) | ||
151 | * from the "status" sysfs file. | ||
152 | */ | ||
153 | #define IPATH_STATUS_INITTED 0x1 /* basic initialization done */ | ||
154 | #define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */ | ||
155 | /* Device has been disabled via admin request */ | ||
156 | #define IPATH_STATUS_ADMIN_DISABLED 0x4 | ||
157 | #define IPATH_STATUS_OIB_SMA 0x8 /* ipath_mad kernel SMA running */ | ||
158 | #define IPATH_STATUS_SMA 0x10 /* user SMA running */ | ||
159 | /* Chip has been found and initted */ | ||
160 | #define IPATH_STATUS_CHIP_PRESENT 0x20 | ||
161 | /* IB link is at ACTIVE, usable for data traffic */ | ||
162 | #define IPATH_STATUS_IB_READY 0x40 | ||
163 | /* link is configured, LID, MTU, etc. have been set */ | ||
164 | #define IPATH_STATUS_IB_CONF 0x80 | ||
165 | /* no link established, probably no cable */ | ||
166 | #define IPATH_STATUS_IB_NOCABLE 0x100 | ||
167 | /* A Fatal hardware error has occurred. */ | ||
168 | #define IPATH_STATUS_HWERROR 0x200 | ||
169 | |||
170 | /* | ||
171 | * The list of usermode accessible registers. Also see Reg_* later in file. | ||
172 | */ | ||
173 | typedef enum _ipath_ureg { | ||
174 | /* (RO) DMA RcvHdr to be used next. */ | ||
175 | ur_rcvhdrtail = 0, | ||
176 | /* (RW) RcvHdr entry to be processed next by host. */ | ||
177 | ur_rcvhdrhead = 1, | ||
178 | /* (RO) Index of next Eager index to use. */ | ||
179 | ur_rcvegrindextail = 2, | ||
180 | /* (RW) Eager TID to be processed next */ | ||
181 | ur_rcvegrindexhead = 3, | ||
182 | /* For internal use only; max register number. */ | ||
183 | _IPATH_UregMax | ||
184 | } ipath_ureg; | ||
185 | |||
186 | /* bit values for spi_runtime_flags */ | ||
187 | #define IPATH_RUNTIME_HT 0x1 | ||
188 | #define IPATH_RUNTIME_PCIE 0x2 | ||
189 | #define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 | ||
190 | #define IPATH_RUNTIME_RCVHDR_COPY 0x8 | ||
191 | |||
192 | /* | ||
193 | * This structure is returned by ipath_userinit() immediately after | ||
194 | * open to get implementation-specific info, and info specific to this | ||
195 | * instance. | ||
196 | * | ||
197 | * This struct must have explict pad fields where type sizes | ||
198 | * may result in different alignments between 32 and 64 bit | ||
199 | * programs, since the 64 bit * bit kernel requires the user code | ||
200 | * to have matching offsets | ||
201 | */ | ||
202 | struct ipath_base_info { | ||
203 | /* version of hardware, for feature checking. */ | ||
204 | __u32 spi_hw_version; | ||
205 | /* version of software, for feature checking. */ | ||
206 | __u32 spi_sw_version; | ||
207 | /* InfiniPath port assigned, goes into sent packets */ | ||
208 | __u32 spi_port; | ||
209 | /* | ||
210 | * IB MTU, packets IB data must be less than this. | ||
211 | * The MTU is in bytes, and will be a multiple of 4 bytes. | ||
212 | */ | ||
213 | __u32 spi_mtu; | ||
214 | /* | ||
215 | * Size of a PIO buffer. Any given packet's total size must be less | ||
216 | * than this (in words). Included is the starting control word, so | ||
217 | * if 513 is returned, then total pkt size is 512 words or less. | ||
218 | */ | ||
219 | __u32 spi_piosize; | ||
220 | /* size of the TID cache in infinipath, in entries */ | ||
221 | __u32 spi_tidcnt; | ||
222 | /* size of the TID Eager list in infinipath, in entries */ | ||
223 | __u32 spi_tidegrcnt; | ||
224 | /* size of a single receive header queue entry. */ | ||
225 | __u32 spi_rcvhdrent_size; | ||
226 | /* | ||
227 | * Count of receive header queue entries allocated. | ||
228 | * This may be less than the spu_rcvhdrcnt passed in!. | ||
229 | */ | ||
230 | __u32 spi_rcvhdr_cnt; | ||
231 | |||
232 | /* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */ | ||
233 | __u32 spi_runtime_flags; | ||
234 | |||
235 | /* address where receive buffer queue is mapped into */ | ||
236 | __u64 spi_rcvhdr_base; | ||
237 | |||
238 | /* user program. */ | ||
239 | |||
240 | /* base address of eager TID receive buffers. */ | ||
241 | __u64 spi_rcv_egrbufs; | ||
242 | |||
243 | /* Allocated by initialization code, not by protocol. */ | ||
244 | |||
245 | /* | ||
246 | * Size of each TID buffer in host memory, starting at | ||
247 | * spi_rcv_egrbufs. The buffers are virtually contiguous. | ||
248 | */ | ||
249 | __u32 spi_rcv_egrbufsize; | ||
250 | /* | ||
251 | * The special QP (queue pair) value that identifies an infinipath | ||
252 | * protocol packet from standard IB packets. More, probably much | ||
253 | * more, to be added. | ||
254 | */ | ||
255 | __u32 spi_qpair; | ||
256 | |||
257 | /* | ||
258 | * User register base for init code, not to be used directly by | ||
259 | * protocol or applications. | ||
260 | */ | ||
261 | __u64 __spi_uregbase; | ||
262 | /* | ||
263 | * Maximum buffer size in bytes that can be used in a single TID | ||
264 | * entry (assuming the buffer is aligned to this boundary). This is | ||
265 | * the minimum of what the hardware and software support Guaranteed | ||
266 | * to be a power of 2. | ||
267 | */ | ||
268 | __u32 spi_tid_maxsize; | ||
269 | /* | ||
270 | * alignment of each pio send buffer (byte count | ||
271 | * to add to spi_piobufbase to get to second buffer) | ||
272 | */ | ||
273 | __u32 spi_pioalign; | ||
274 | /* | ||
275 | * The index of the first pio buffer available to this process; | ||
276 | * needed to do lookup in spi_pioavailaddr; not added to | ||
277 | * spi_piobufbase. | ||
278 | */ | ||
279 | __u32 spi_pioindex; | ||
280 | /* number of buffers mapped for this process */ | ||
281 | __u32 spi_piocnt; | ||
282 | |||
283 | /* | ||
284 | * Base address of writeonly pio buffers for this process. | ||
285 | * Each buffer has spi_piosize words, and is aligned on spi_pioalign | ||
286 | * boundaries. spi_piocnt buffers are mapped from this address | ||
287 | */ | ||
288 | __u64 spi_piobufbase; | ||
289 | |||
290 | /* | ||
291 | * Base address of readonly memory copy of the pioavail registers. | ||
292 | * There are 2 bits for each buffer. | ||
293 | */ | ||
294 | __u64 spi_pioavailaddr; | ||
295 | |||
296 | /* | ||
297 | * Address where driver updates a copy of the interface and driver | ||
298 | * status (IPATH_STATUS_*) as a 64 bit value. It's followed by a | ||
299 | * string indicating hardware error, if there was one. | ||
300 | */ | ||
301 | __u64 spi_status; | ||
302 | |||
303 | /* number of chip ports available to user processes */ | ||
304 | __u32 spi_nports; | ||
305 | /* unit number of chip we are using */ | ||
306 | __u32 spi_unit; | ||
307 | /* num bufs in each contiguous set */ | ||
308 | __u32 spi_rcv_egrperchunk; | ||
309 | /* size in bytes of each contiguous set */ | ||
310 | __u32 spi_rcv_egrchunksize; | ||
311 | /* total size of mmap to cover full rcvegrbuffers */ | ||
312 | __u32 spi_rcv_egrbuftotlen; | ||
313 | } __attribute__ ((aligned(8))); | ||
314 | |||
315 | |||
316 | /* | ||
317 | * This version number is given to the driver by the user code during | ||
318 | * initialization in the spu_userversion field of ipath_user_info, so | ||
319 | * the driver can check for compatibility with user code. | ||
320 | * | ||
321 | * The major version changes when data structures | ||
322 | * change in an incompatible way. The driver must be the same or higher | ||
323 | * for initialization to succeed. In some cases, a higher version | ||
324 | * driver will not interoperate with older software, and initialization | ||
325 | * will return an error. | ||
326 | */ | ||
327 | #define IPATH_USER_SWMAJOR 1 | ||
328 | |||
329 | /* | ||
330 | * Minor version differences are always compatible | ||
331 | * a within a major version, however if if user software is larger | ||
332 | * than driver software, some new features and/or structure fields | ||
333 | * may not be implemented; the user code must deal with this if it | ||
334 | * cares, or it must abort after initialization reports the difference | ||
335 | */ | ||
336 | #define IPATH_USER_SWMINOR 2 | ||
337 | |||
338 | #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) | ||
339 | |||
340 | #define IPATH_KERN_TYPE 0 | ||
341 | |||
342 | /* | ||
343 | * Similarly, this is the kernel version going back to the user. It's | ||
344 | * slightly different, in that we want to tell if the driver was built as | ||
345 | * part of a PathScale release, or from the driver from OpenIB, kernel.org, | ||
346 | * or a standard distribution, for support reasons. The high bit is 0 for | ||
347 | * non-PathScale, and 1 for PathScale-built/supplied. | ||
348 | * | ||
349 | * It's returned by the driver to the user code during initialization in the | ||
350 | * spi_sw_version field of ipath_base_info, so the user code can in turn | ||
351 | * check for compatibility with the kernel. | ||
352 | */ | ||
353 | #define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION) | ||
354 | |||
355 | /* | ||
356 | * This structure is passed to ipath_userinit() to tell the driver where | ||
357 | * user code buffers are, sizes, etc. The offsets and sizes of the | ||
358 | * fields must remain unchanged, for binary compatibility. It can | ||
359 | * be extended, if userversion is changed so user code can tell, if needed | ||
360 | */ | ||
361 | struct ipath_user_info { | ||
362 | /* | ||
363 | * version of user software, to detect compatibility issues. | ||
364 | * Should be set to IPATH_USER_SWVERSION. | ||
365 | */ | ||
366 | __u32 spu_userversion; | ||
367 | |||
368 | /* desired number of receive header queue entries */ | ||
369 | __u32 spu_rcvhdrcnt; | ||
370 | |||
371 | /* size of struct base_info to write to */ | ||
372 | __u32 spu_base_info_size; | ||
373 | |||
374 | /* | ||
375 | * number of words in KD protocol header | ||
376 | * This tells InfiniPath how many words to copy to rcvhdrq. If 0, | ||
377 | * kernel uses a default. Once set, attempts to set any other value | ||
378 | * are an error (EAGAIN) until driver is reloaded. | ||
379 | */ | ||
380 | __u32 spu_rcvhdrsize; | ||
381 | |||
382 | /* | ||
383 | * cache line aligned (64 byte) user address to | ||
384 | * which the rcvhdrtail register will be written by infinipath | ||
385 | * whenever it changes, so that no chip registers are read in | ||
386 | * the performance path. | ||
387 | */ | ||
388 | __u64 spu_rcvhdraddr; | ||
389 | |||
390 | /* | ||
391 | * address of struct base_info to write to | ||
392 | */ | ||
393 | __u64 spu_base_info; | ||
394 | |||
395 | } __attribute__ ((aligned(8))); | ||
396 | |||
397 | /* User commands. */ | ||
398 | |||
399 | #define IPATH_CMD_MIN 16 | ||
400 | |||
401 | #define IPATH_CMD_USER_INIT 16 /* set up userspace */ | ||
402 | #define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */ | ||
403 | #define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */ | ||
404 | #define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ | ||
405 | #define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ | ||
406 | #define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ | ||
407 | |||
408 | #define IPATH_CMD_MAX 21 | ||
409 | |||
410 | struct ipath_port_info { | ||
411 | __u32 num_active; /* number of active units */ | ||
412 | __u32 unit; /* unit (chip) assigned to caller */ | ||
413 | __u32 port; /* port on unit assigned to caller */ | ||
414 | }; | ||
415 | |||
416 | struct ipath_tid_info { | ||
417 | __u32 tidcnt; | ||
418 | /* make structure same size in 32 and 64 bit */ | ||
419 | __u32 tid__unused; | ||
420 | /* virtual address of first page in transfer */ | ||
421 | __u64 tidvaddr; | ||
422 | /* pointer (same size 32/64 bit) to __u16 tid array */ | ||
423 | __u64 tidlist; | ||
424 | |||
425 | /* | ||
426 | * pointer (same size 32/64 bit) to bitmap of TIDs used | ||
427 | * for this call; checked for being large enough at open | ||
428 | */ | ||
429 | __u64 tidmap; | ||
430 | }; | ||
431 | |||
432 | struct ipath_cmd { | ||
433 | __u32 type; /* command type */ | ||
434 | union { | ||
435 | struct ipath_tid_info tid_info; | ||
436 | struct ipath_user_info user_info; | ||
437 | /* address in userspace of struct ipath_port_info to | ||
438 | write result to */ | ||
439 | __u64 port_info; | ||
440 | /* enable/disable receipt of packets */ | ||
441 | __u32 recv_ctrl; | ||
442 | /* partition key to set */ | ||
443 | __u16 part_key; | ||
444 | } cmd; | ||
445 | }; | ||
446 | |||
447 | struct ipath_iovec { | ||
448 | /* Pointer to data, but same size 32 and 64 bit */ | ||
449 | __u64 iov_base; | ||
450 | |||
451 | /* | ||
452 | * Length of data; don't need 64 bits, but want | ||
453 | * ipath_sendpkt to remain same size as before 32 bit changes, so... | ||
454 | */ | ||
455 | __u64 iov_len; | ||
456 | }; | ||
457 | |||
458 | /* | ||
459 | * Describes a single packet for send. Each packet can have one or more | ||
460 | * buffers, but the total length (exclusive of IB headers) must be less | ||
461 | * than the MTU, and if using the PIO method, entire packet length, | ||
462 | * including IB headers, must be less than the ipath_piosize value (words). | ||
463 | * Use of this necessitates including sys/uio.h | ||
464 | */ | ||
465 | struct __ipath_sendpkt { | ||
466 | __u32 sps_flags; /* flags for packet (TBD) */ | ||
467 | __u32 sps_cnt; /* number of entries to use in sps_iov */ | ||
468 | /* array of iov's describing packet. TEMPORARY */ | ||
469 | struct ipath_iovec sps_iov[4]; | ||
470 | }; | ||
471 | |||
472 | /* Passed into SMA special file's ->read and ->write methods. */ | ||
473 | struct ipath_sma_pkt | ||
474 | { | ||
475 | __u32 unit; /* unit on which to send packet */ | ||
476 | __u64 data; /* address of payload in userspace */ | ||
477 | __u32 len; /* length of payload */ | ||
478 | }; | ||
479 | |||
480 | /* | ||
481 | * Data layout in I2C flash (for GUID, etc.) | ||
482 | * All fields are little-endian binary unless otherwise stated | ||
483 | */ | ||
484 | #define IPATH_FLASH_VERSION 1 | ||
485 | struct ipath_flash { | ||
486 | /* flash layout version (IPATH_FLASH_VERSION) */ | ||
487 | __u8 if_fversion; | ||
488 | /* checksum protecting if_length bytes */ | ||
489 | __u8 if_csum; | ||
490 | /* | ||
491 | * valid length (in use, protected by if_csum), including | ||
492 | * if_fversion and if_sum themselves) | ||
493 | */ | ||
494 | __u8 if_length; | ||
495 | /* the GUID, in network order */ | ||
496 | __u8 if_guid[8]; | ||
497 | /* number of GUIDs to use, starting from if_guid */ | ||
498 | __u8 if_numguid; | ||
499 | /* the board serial number, in ASCII */ | ||
500 | char if_serial[12]; | ||
501 | /* board mfg date (YYYYMMDD ASCII) */ | ||
502 | char if_mfgdate[8]; | ||
503 | /* last board rework/test date (YYYYMMDD ASCII) */ | ||
504 | char if_testdate[8]; | ||
505 | /* logging of error counts, TBD */ | ||
506 | __u8 if_errcntp[4]; | ||
507 | /* powered on hours, updated at driver unload */ | ||
508 | __u8 if_powerhour[2]; | ||
509 | /* ASCII free-form comment field */ | ||
510 | char if_comment[32]; | ||
511 | /* 78 bytes used, min flash size is 128 bytes */ | ||
512 | __u8 if_future[50]; | ||
513 | }; | ||
514 | |||
515 | /* | ||
516 | * These are the counters implemented in the chip, and are listed in order. | ||
517 | * The InterCaps naming is taken straight from the chip spec. | ||
518 | */ | ||
519 | struct infinipath_counters { | ||
520 | __u64 LBIntCnt; | ||
521 | __u64 LBFlowStallCnt; | ||
522 | __u64 Reserved1; | ||
523 | __u64 TxUnsupVLErrCnt; | ||
524 | __u64 TxDataPktCnt; | ||
525 | __u64 TxFlowPktCnt; | ||
526 | __u64 TxDwordCnt; | ||
527 | __u64 TxLenErrCnt; | ||
528 | __u64 TxMaxMinLenErrCnt; | ||
529 | __u64 TxUnderrunCnt; | ||
530 | __u64 TxFlowStallCnt; | ||
531 | __u64 TxDroppedPktCnt; | ||
532 | __u64 RxDroppedPktCnt; | ||
533 | __u64 RxDataPktCnt; | ||
534 | __u64 RxFlowPktCnt; | ||
535 | __u64 RxDwordCnt; | ||
536 | __u64 RxLenErrCnt; | ||
537 | __u64 RxMaxMinLenErrCnt; | ||
538 | __u64 RxICRCErrCnt; | ||
539 | __u64 RxVCRCErrCnt; | ||
540 | __u64 RxFlowCtrlErrCnt; | ||
541 | __u64 RxBadFormatCnt; | ||
542 | __u64 RxLinkProblemCnt; | ||
543 | __u64 RxEBPCnt; | ||
544 | __u64 RxLPCRCErrCnt; | ||
545 | __u64 RxBufOvflCnt; | ||
546 | __u64 RxTIDFullErrCnt; | ||
547 | __u64 RxTIDValidErrCnt; | ||
548 | __u64 RxPKeyMismatchCnt; | ||
549 | __u64 RxP0HdrEgrOvflCnt; | ||
550 | __u64 RxP1HdrEgrOvflCnt; | ||
551 | __u64 RxP2HdrEgrOvflCnt; | ||
552 | __u64 RxP3HdrEgrOvflCnt; | ||
553 | __u64 RxP4HdrEgrOvflCnt; | ||
554 | __u64 RxP5HdrEgrOvflCnt; | ||
555 | __u64 RxP6HdrEgrOvflCnt; | ||
556 | __u64 RxP7HdrEgrOvflCnt; | ||
557 | __u64 RxP8HdrEgrOvflCnt; | ||
558 | __u64 Reserved6; | ||
559 | __u64 Reserved7; | ||
560 | __u64 IBStatusChangeCnt; | ||
561 | __u64 IBLinkErrRecoveryCnt; | ||
562 | __u64 IBLinkDownedCnt; | ||
563 | __u64 IBSymbolErrCnt; | ||
564 | }; | ||
565 | |||
566 | /* | ||
567 | * The next set of defines are for packet headers, and chip register | ||
568 | * and memory bits that are visible to and/or used by user-mode software | ||
569 | * The other bits that are used only by the driver or diags are in | ||
570 | * ipath_registers.h | ||
571 | */ | ||
572 | |||
573 | /* RcvHdrFlags bits */ | ||
574 | #define INFINIPATH_RHF_LENGTH_MASK 0x7FF | ||
575 | #define INFINIPATH_RHF_LENGTH_SHIFT 0 | ||
576 | #define INFINIPATH_RHF_RCVTYPE_MASK 0x7 | ||
577 | #define INFINIPATH_RHF_RCVTYPE_SHIFT 11 | ||
578 | #define INFINIPATH_RHF_EGRINDEX_MASK 0x7FF | ||
579 | #define INFINIPATH_RHF_EGRINDEX_SHIFT 16 | ||
580 | #define INFINIPATH_RHF_H_ICRCERR 0x80000000 | ||
581 | #define INFINIPATH_RHF_H_VCRCERR 0x40000000 | ||
582 | #define INFINIPATH_RHF_H_PARITYERR 0x20000000 | ||
583 | #define INFINIPATH_RHF_H_LENERR 0x10000000 | ||
584 | #define INFINIPATH_RHF_H_MTUERR 0x08000000 | ||
585 | #define INFINIPATH_RHF_H_IHDRERR 0x04000000 | ||
586 | #define INFINIPATH_RHF_H_TIDERR 0x02000000 | ||
587 | #define INFINIPATH_RHF_H_MKERR 0x01000000 | ||
588 | #define INFINIPATH_RHF_H_IBERR 0x00800000 | ||
589 | #define INFINIPATH_RHF_L_SWA 0x00008000 | ||
590 | #define INFINIPATH_RHF_L_SWB 0x00004000 | ||
591 | |||
592 | /* infinipath header fields */ | ||
593 | #define INFINIPATH_I_VERS_MASK 0xF | ||
594 | #define INFINIPATH_I_VERS_SHIFT 28 | ||
595 | #define INFINIPATH_I_PORT_MASK 0xF | ||
596 | #define INFINIPATH_I_PORT_SHIFT 24 | ||
597 | #define INFINIPATH_I_TID_MASK 0x7FF | ||
598 | #define INFINIPATH_I_TID_SHIFT 13 | ||
599 | #define INFINIPATH_I_OFFSET_MASK 0x1FFF | ||
600 | #define INFINIPATH_I_OFFSET_SHIFT 0 | ||
601 | |||
602 | /* K_PktFlags bits */ | ||
603 | #define INFINIPATH_KPF_INTR 0x1 | ||
604 | |||
605 | /* SendPIO per-buffer control */ | ||
606 | #define INFINIPATH_SP_LENGTHP1_MASK 0x3FF | ||
607 | #define INFINIPATH_SP_LENGTHP1_SHIFT 0 | ||
608 | #define INFINIPATH_SP_INTR 0x80000000 | ||
609 | #define INFINIPATH_SP_TEST 0x40000000 | ||
610 | #define INFINIPATH_SP_TESTEBP 0x20000000 | ||
611 | |||
612 | /* SendPIOAvail bits */ | ||
613 | #define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1 | ||
614 | #define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0 | ||
615 | |||
616 | #endif /* _IPATH_COMMON_H */ | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c new file mode 100644 index 000000000000..7ece1135ddfe --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_cq.c | |||
@@ -0,0 +1,295 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/err.h> | ||
34 | #include <linux/vmalloc.h> | ||
35 | |||
36 | #include "ipath_verbs.h" | ||
37 | |||
38 | /** | ||
39 | * ipath_cq_enter - add a new entry to the completion queue | ||
40 | * @cq: completion queue | ||
41 | * @entry: work completion entry to add | ||
42 | * @sig: true if @entry is a solicitated entry | ||
43 | * | ||
44 | * This may be called with one of the qp->s_lock or qp->r_rq.lock held. | ||
45 | */ | ||
46 | void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) | ||
47 | { | ||
48 | unsigned long flags; | ||
49 | u32 next; | ||
50 | |||
51 | spin_lock_irqsave(&cq->lock, flags); | ||
52 | |||
53 | if (cq->head == cq->ibcq.cqe) | ||
54 | next = 0; | ||
55 | else | ||
56 | next = cq->head + 1; | ||
57 | if (unlikely(next == cq->tail)) { | ||
58 | spin_unlock_irqrestore(&cq->lock, flags); | ||
59 | if (cq->ibcq.event_handler) { | ||
60 | struct ib_event ev; | ||
61 | |||
62 | ev.device = cq->ibcq.device; | ||
63 | ev.element.cq = &cq->ibcq; | ||
64 | ev.event = IB_EVENT_CQ_ERR; | ||
65 | cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); | ||
66 | } | ||
67 | return; | ||
68 | } | ||
69 | cq->queue[cq->head] = *entry; | ||
70 | cq->head = next; | ||
71 | |||
72 | if (cq->notify == IB_CQ_NEXT_COMP || | ||
73 | (cq->notify == IB_CQ_SOLICITED && solicited)) { | ||
74 | cq->notify = IB_CQ_NONE; | ||
75 | cq->triggered++; | ||
76 | /* | ||
77 | * This will cause send_complete() to be called in | ||
78 | * another thread. | ||
79 | */ | ||
80 | tasklet_hi_schedule(&cq->comptask); | ||
81 | } | ||
82 | |||
83 | spin_unlock_irqrestore(&cq->lock, flags); | ||
84 | |||
85 | if (entry->status != IB_WC_SUCCESS) | ||
86 | to_idev(cq->ibcq.device)->n_wqe_errs++; | ||
87 | } | ||
88 | |||
89 | /** | ||
90 | * ipath_poll_cq - poll for work completion entries | ||
91 | * @ibcq: the completion queue to poll | ||
92 | * @num_entries: the maximum number of entries to return | ||
93 | * @entry: pointer to array where work completions are placed | ||
94 | * | ||
95 | * Returns the number of completion entries polled. | ||
96 | * | ||
97 | * This may be called from interrupt context. Also called by ib_poll_cq() | ||
98 | * in the generic verbs code. | ||
99 | */ | ||
100 | int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) | ||
101 | { | ||
102 | struct ipath_cq *cq = to_icq(ibcq); | ||
103 | unsigned long flags; | ||
104 | int npolled; | ||
105 | |||
106 | spin_lock_irqsave(&cq->lock, flags); | ||
107 | |||
108 | for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { | ||
109 | if (cq->tail == cq->head) | ||
110 | break; | ||
111 | *entry = cq->queue[cq->tail]; | ||
112 | if (cq->tail == cq->ibcq.cqe) | ||
113 | cq->tail = 0; | ||
114 | else | ||
115 | cq->tail++; | ||
116 | } | ||
117 | |||
118 | spin_unlock_irqrestore(&cq->lock, flags); | ||
119 | |||
120 | return npolled; | ||
121 | } | ||
122 | |||
123 | static void send_complete(unsigned long data) | ||
124 | { | ||
125 | struct ipath_cq *cq = (struct ipath_cq *)data; | ||
126 | |||
127 | /* | ||
128 | * The completion handler will most likely rearm the notification | ||
129 | * and poll for all pending entries. If a new completion entry | ||
130 | * is added while we are in this routine, tasklet_hi_schedule() | ||
131 | * won't call us again until we return so we check triggered to | ||
132 | * see if we need to call the handler again. | ||
133 | */ | ||
134 | for (;;) { | ||
135 | u8 triggered = cq->triggered; | ||
136 | |||
137 | cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); | ||
138 | |||
139 | if (cq->triggered == triggered) | ||
140 | return; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * ipath_create_cq - create a completion queue | ||
146 | * @ibdev: the device this completion queue is attached to | ||
147 | * @entries: the minimum size of the completion queue | ||
148 | * @context: unused by the InfiniPath driver | ||
149 | * @udata: unused by the InfiniPath driver | ||
150 | * | ||
151 | * Returns a pointer to the completion queue or negative errno values | ||
152 | * for failure. | ||
153 | * | ||
154 | * Called by ib_create_cq() in the generic verbs code. | ||
155 | */ | ||
156 | struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, | ||
157 | struct ib_ucontext *context, | ||
158 | struct ib_udata *udata) | ||
159 | { | ||
160 | struct ipath_cq *cq; | ||
161 | struct ib_wc *wc; | ||
162 | struct ib_cq *ret; | ||
163 | |||
164 | /* | ||
165 | * Need to use vmalloc() if we want to support large #s of | ||
166 | * entries. | ||
167 | */ | ||
168 | cq = kmalloc(sizeof(*cq), GFP_KERNEL); | ||
169 | if (!cq) { | ||
170 | ret = ERR_PTR(-ENOMEM); | ||
171 | goto bail; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * Need to use vmalloc() if we want to support large #s of entries. | ||
176 | */ | ||
177 | wc = vmalloc(sizeof(*wc) * (entries + 1)); | ||
178 | if (!wc) { | ||
179 | kfree(cq); | ||
180 | ret = ERR_PTR(-ENOMEM); | ||
181 | goto bail; | ||
182 | } | ||
183 | /* | ||
184 | * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. | ||
185 | * The number of entries should be >= the number requested or return | ||
186 | * an error. | ||
187 | */ | ||
188 | cq->ibcq.cqe = entries; | ||
189 | cq->notify = IB_CQ_NONE; | ||
190 | cq->triggered = 0; | ||
191 | spin_lock_init(&cq->lock); | ||
192 | tasklet_init(&cq->comptask, send_complete, (unsigned long)cq); | ||
193 | cq->head = 0; | ||
194 | cq->tail = 0; | ||
195 | cq->queue = wc; | ||
196 | |||
197 | ret = &cq->ibcq; | ||
198 | |||
199 | bail: | ||
200 | return ret; | ||
201 | } | ||
202 | |||
203 | /** | ||
204 | * ipath_destroy_cq - destroy a completion queue | ||
205 | * @ibcq: the completion queue to destroy. | ||
206 | * | ||
207 | * Returns 0 for success. | ||
208 | * | ||
209 | * Called by ib_destroy_cq() in the generic verbs code. | ||
210 | */ | ||
211 | int ipath_destroy_cq(struct ib_cq *ibcq) | ||
212 | { | ||
213 | struct ipath_cq *cq = to_icq(ibcq); | ||
214 | |||
215 | tasklet_kill(&cq->comptask); | ||
216 | vfree(cq->queue); | ||
217 | kfree(cq); | ||
218 | |||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | /** | ||
223 | * ipath_req_notify_cq - change the notification type for a completion queue | ||
224 | * @ibcq: the completion queue | ||
225 | * @notify: the type of notification to request | ||
226 | * | ||
227 | * Returns 0 for success. | ||
228 | * | ||
229 | * This may be called from interrupt context. Also called by | ||
230 | * ib_req_notify_cq() in the generic verbs code. | ||
231 | */ | ||
232 | int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) | ||
233 | { | ||
234 | struct ipath_cq *cq = to_icq(ibcq); | ||
235 | unsigned long flags; | ||
236 | |||
237 | spin_lock_irqsave(&cq->lock, flags); | ||
238 | /* | ||
239 | * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow | ||
240 | * any other transitions. | ||
241 | */ | ||
242 | if (cq->notify != IB_CQ_NEXT_COMP) | ||
243 | cq->notify = notify; | ||
244 | spin_unlock_irqrestore(&cq->lock, flags); | ||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) | ||
249 | { | ||
250 | struct ipath_cq *cq = to_icq(ibcq); | ||
251 | struct ib_wc *wc, *old_wc; | ||
252 | u32 n; | ||
253 | int ret; | ||
254 | |||
255 | /* | ||
256 | * Need to use vmalloc() if we want to support large #s of entries. | ||
257 | */ | ||
258 | wc = vmalloc(sizeof(*wc) * (cqe + 1)); | ||
259 | if (!wc) { | ||
260 | ret = -ENOMEM; | ||
261 | goto bail; | ||
262 | } | ||
263 | |||
264 | spin_lock_irq(&cq->lock); | ||
265 | if (cq->head < cq->tail) | ||
266 | n = cq->ibcq.cqe + 1 + cq->head - cq->tail; | ||
267 | else | ||
268 | n = cq->head - cq->tail; | ||
269 | if (unlikely((u32)cqe < n)) { | ||
270 | spin_unlock_irq(&cq->lock); | ||
271 | vfree(wc); | ||
272 | ret = -EOVERFLOW; | ||
273 | goto bail; | ||
274 | } | ||
275 | for (n = 0; cq->tail != cq->head; n++) { | ||
276 | wc[n] = cq->queue[cq->tail]; | ||
277 | if (cq->tail == cq->ibcq.cqe) | ||
278 | cq->tail = 0; | ||
279 | else | ||
280 | cq->tail++; | ||
281 | } | ||
282 | cq->ibcq.cqe = cqe; | ||
283 | cq->head = n; | ||
284 | cq->tail = 0; | ||
285 | old_wc = cq->queue; | ||
286 | cq->queue = wc; | ||
287 | spin_unlock_irq(&cq->lock); | ||
288 | |||
289 | vfree(old_wc); | ||
290 | |||
291 | ret = 0; | ||
292 | |||
293 | bail: | ||
294 | return ret; | ||
295 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h new file mode 100644 index 000000000000..593e28969c69 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_debug.h | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef _IPATH_DEBUG_H | ||
34 | #define _IPATH_DEBUG_H | ||
35 | |||
36 | #ifndef _IPATH_DEBUGGING /* debugging enabled or not */ | ||
37 | #define _IPATH_DEBUGGING 1 | ||
38 | #endif | ||
39 | |||
40 | #if _IPATH_DEBUGGING | ||
41 | |||
42 | /* | ||
43 | * Mask values for debugging. The scheme allows us to compile out any | ||
44 | * of the debug tracing stuff, and if compiled in, to enable or disable | ||
45 | * dynamically. This can be set at modprobe time also: | ||
46 | * modprobe infinipath.ko infinipath_debug=7 | ||
47 | */ | ||
48 | |||
49 | #define __IPATH_INFO 0x1 /* generic low verbosity stuff */ | ||
50 | #define __IPATH_DBG 0x2 /* generic debug */ | ||
51 | #define __IPATH_TRSAMPLE 0x8 /* generate trace buffer sample entries */ | ||
52 | /* leave some low verbosity spots open */ | ||
53 | #define __IPATH_VERBDBG 0x40 /* very verbose debug */ | ||
54 | #define __IPATH_PKTDBG 0x80 /* print packet data */ | ||
55 | /* print process startup (init)/exit messages */ | ||
56 | #define __IPATH_PROCDBG 0x100 | ||
57 | /* print mmap/nopage stuff, not using VDBG any more */ | ||
58 | #define __IPATH_MMDBG 0x200 | ||
59 | #define __IPATH_USER_SEND 0x1000 /* use user mode send */ | ||
60 | #define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ | ||
61 | #define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ | ||
62 | #define __IPATH_SMADBG 0x8000 /* sma packet debug */ | ||
63 | #define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) general debug on */ | ||
64 | #define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings on */ | ||
65 | #define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors on */ | ||
66 | #define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump on */ | ||
67 | #define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump on */ | ||
68 | |||
69 | #else /* _IPATH_DEBUGGING */ | ||
70 | |||
71 | /* | ||
72 | * define all of these even with debugging off, for the few places that do | ||
73 | * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the | ||
74 | * compiler eliminate the code | ||
75 | */ | ||
76 | |||
77 | #define __IPATH_INFO 0x0 /* generic low verbosity stuff */ | ||
78 | #define __IPATH_DBG 0x0 /* generic debug */ | ||
79 | #define __IPATH_TRSAMPLE 0x0 /* generate trace buffer sample entries */ | ||
80 | #define __IPATH_VERBDBG 0x0 /* very verbose debug */ | ||
81 | #define __IPATH_PKTDBG 0x0 /* print packet data */ | ||
82 | #define __IPATH_PROCDBG 0x0 /* print process startup (init)/exit messages */ | ||
83 | /* print mmap/nopage stuff, not using VDBG any more */ | ||
84 | #define __IPATH_MMDBG 0x0 | ||
85 | #define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ | ||
86 | #define __IPATH_SMADBG 0x0 /* print process startup (init)/exit messages */#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ | ||
87 | #define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */ | ||
88 | #define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */ | ||
89 | #define __IPATH_IPATHPD 0x0 /* Ethernet (IPATH) packet dump on */ | ||
90 | #define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on */ | ||
91 | |||
92 | #endif /* _IPATH_DEBUGGING */ | ||
93 | |||
94 | #define __IPATH_VERBOSEDBG __IPATH_VERBDBG | ||
95 | |||
96 | #endif /* _IPATH_DEBUG_H */ | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c new file mode 100644 index 000000000000..7d3fb6996b41 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_diag.c | |||
@@ -0,0 +1,367 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | * This file contains support for diagnostic functions. It is accessed by | ||
35 | * opening the ipath_diag device, normally minor number 129. Diagnostic use | ||
36 | * of the InfiniPath chip may render the chip or board unusable until the | ||
37 | * driver is unloaded, or in some cases, until the system is rebooted. | ||
38 | * | ||
39 | * Accesses to the chip through this interface are not similar to going | ||
40 | * through the /sys/bus/pci resource mmap interface. | ||
41 | */ | ||
42 | |||
43 | #include <linux/pci.h> | ||
44 | #include <asm/uaccess.h> | ||
45 | |||
46 | #include "ipath_common.h" | ||
47 | #include "ipath_kernel.h" | ||
48 | #include "ips_common.h" | ||
49 | #include "ipath_layer.h" | ||
50 | |||
51 | int ipath_diag_inuse; | ||
52 | static int diag_set_link; | ||
53 | |||
54 | static int ipath_diag_open(struct inode *in, struct file *fp); | ||
55 | static int ipath_diag_release(struct inode *in, struct file *fp); | ||
56 | static ssize_t ipath_diag_read(struct file *fp, char __user *data, | ||
57 | size_t count, loff_t *off); | ||
58 | static ssize_t ipath_diag_write(struct file *fp, const char __user *data, | ||
59 | size_t count, loff_t *off); | ||
60 | |||
61 | static struct file_operations diag_file_ops = { | ||
62 | .owner = THIS_MODULE, | ||
63 | .write = ipath_diag_write, | ||
64 | .read = ipath_diag_read, | ||
65 | .open = ipath_diag_open, | ||
66 | .release = ipath_diag_release | ||
67 | }; | ||
68 | |||
69 | static struct cdev *diag_cdev; | ||
70 | static struct class_device *diag_class_dev; | ||
71 | |||
72 | int ipath_diag_init(void) | ||
73 | { | ||
74 | return ipath_cdev_init(IPATH_DIAG_MINOR, "ipath_diag", | ||
75 | &diag_file_ops, &diag_cdev, &diag_class_dev); | ||
76 | } | ||
77 | |||
78 | void ipath_diag_cleanup(void) | ||
79 | { | ||
80 | ipath_cdev_cleanup(&diag_cdev, &diag_class_dev); | ||
81 | } | ||
82 | |||
83 | /** | ||
84 | * ipath_read_umem64 - read a 64-bit quantity from the chip into user space | ||
85 | * @dd: the infinipath device | ||
86 | * @uaddr: the location to store the data in user memory | ||
87 | * @caddr: the source chip address (full pointer, not offset) | ||
88 | * @count: number of bytes to copy (multiple of 32 bits) | ||
89 | * | ||
90 | * This function also localizes all chip memory accesses. | ||
91 | * The copy should be written such that we read full cacheline packets | ||
92 | * from the chip. This is usually used for a single qword | ||
93 | * | ||
94 | * NOTE: This assumes the chip address is 64-bit aligned. | ||
95 | */ | ||
96 | static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr, | ||
97 | const void __iomem *caddr, size_t count) | ||
98 | { | ||
99 | const u64 __iomem *reg_addr = caddr; | ||
100 | const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64)); | ||
101 | int ret; | ||
102 | |||
103 | /* not very efficient, but it works for now */ | ||
104 | if (reg_addr < dd->ipath_kregbase || | ||
105 | reg_end > dd->ipath_kregend) { | ||
106 | ret = -EINVAL; | ||
107 | goto bail; | ||
108 | } | ||
109 | while (reg_addr < reg_end) { | ||
110 | u64 data = readq(reg_addr); | ||
111 | if (copy_to_user(uaddr, &data, sizeof(u64))) { | ||
112 | ret = -EFAULT; | ||
113 | goto bail; | ||
114 | } | ||
115 | reg_addr++; | ||
116 | uaddr++; | ||
117 | } | ||
118 | ret = 0; | ||
119 | bail: | ||
120 | return ret; | ||
121 | } | ||
122 | |||
123 | /** | ||
124 | * ipath_write_umem64 - write a 64-bit quantity to the chip from user space | ||
125 | * @dd: the infinipath device | ||
126 | * @caddr: the destination chip address (full pointer, not offset) | ||
127 | * @uaddr: the source of the data in user memory | ||
128 | * @count: the number of bytes to copy (multiple of 32 bits) | ||
129 | * | ||
130 | * This is usually used for a single qword | ||
131 | * NOTE: This assumes the chip address is 64-bit aligned. | ||
132 | */ | ||
133 | |||
134 | static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr, | ||
135 | const void __user *uaddr, size_t count) | ||
136 | { | ||
137 | u64 __iomem *reg_addr = caddr; | ||
138 | const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64)); | ||
139 | int ret; | ||
140 | |||
141 | /* not very efficient, but it works for now */ | ||
142 | if (reg_addr < dd->ipath_kregbase || | ||
143 | reg_end > dd->ipath_kregend) { | ||
144 | ret = -EINVAL; | ||
145 | goto bail; | ||
146 | } | ||
147 | while (reg_addr < reg_end) { | ||
148 | u64 data; | ||
149 | if (copy_from_user(&data, uaddr, sizeof(data))) { | ||
150 | ret = -EFAULT; | ||
151 | goto bail; | ||
152 | } | ||
153 | writeq(data, reg_addr); | ||
154 | |||
155 | reg_addr++; | ||
156 | uaddr++; | ||
157 | } | ||
158 | ret = 0; | ||
159 | bail: | ||
160 | return ret; | ||
161 | } | ||
162 | |||
163 | /** | ||
164 | * ipath_read_umem32 - read a 32-bit quantity from the chip into user space | ||
165 | * @dd: the infinipath device | ||
166 | * @uaddr: the location to store the data in user memory | ||
167 | * @caddr: the source chip address (full pointer, not offset) | ||
168 | * @count: number of bytes to copy | ||
169 | * | ||
170 | * read 32 bit values, not 64 bit; for memories that only | ||
171 | * support 32 bit reads; usually a single dword. | ||
172 | */ | ||
173 | static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr, | ||
174 | const void __iomem *caddr, size_t count) | ||
175 | { | ||
176 | const u32 __iomem *reg_addr = caddr; | ||
177 | const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32)); | ||
178 | int ret; | ||
179 | |||
180 | if (reg_addr < (u32 __iomem *) dd->ipath_kregbase || | ||
181 | reg_end > (u32 __iomem *) dd->ipath_kregend) { | ||
182 | ret = -EINVAL; | ||
183 | goto bail; | ||
184 | } | ||
185 | /* not very efficient, but it works for now */ | ||
186 | while (reg_addr < reg_end) { | ||
187 | u32 data = readl(reg_addr); | ||
188 | if (copy_to_user(uaddr, &data, sizeof(data))) { | ||
189 | ret = -EFAULT; | ||
190 | goto bail; | ||
191 | } | ||
192 | |||
193 | reg_addr++; | ||
194 | uaddr++; | ||
195 | } | ||
196 | ret = 0; | ||
197 | bail: | ||
198 | return ret; | ||
199 | } | ||
200 | |||
201 | /** | ||
202 | * ipath_write_umem32 - write a 32-bit quantity to the chip from user space | ||
203 | * @dd: the infinipath device | ||
204 | * @caddr: the destination chip address (full pointer, not offset) | ||
205 | * @uaddr: the source of the data in user memory | ||
206 | * @count: number of bytes to copy | ||
207 | * | ||
208 | * write 32 bit values, not 64 bit; for memories that only | ||
209 | * support 32 bit write; usually a single dword. | ||
210 | */ | ||
211 | |||
212 | static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr, | ||
213 | const void __user *uaddr, size_t count) | ||
214 | { | ||
215 | u32 __iomem *reg_addr = caddr; | ||
216 | const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32)); | ||
217 | int ret; | ||
218 | |||
219 | if (reg_addr < (u32 __iomem *) dd->ipath_kregbase || | ||
220 | reg_end > (u32 __iomem *) dd->ipath_kregend) { | ||
221 | ret = -EINVAL; | ||
222 | goto bail; | ||
223 | } | ||
224 | while (reg_addr < reg_end) { | ||
225 | u32 data; | ||
226 | if (copy_from_user(&data, uaddr, sizeof(data))) { | ||
227 | ret = -EFAULT; | ||
228 | goto bail; | ||
229 | } | ||
230 | writel(data, reg_addr); | ||
231 | |||
232 | reg_addr++; | ||
233 | uaddr++; | ||
234 | } | ||
235 | ret = 0; | ||
236 | bail: | ||
237 | return ret; | ||
238 | } | ||
239 | |||
240 | static int ipath_diag_open(struct inode *in, struct file *fp) | ||
241 | { | ||
242 | struct ipath_devdata *dd; | ||
243 | int unit = 0; /* XXX this is bogus */ | ||
244 | unsigned long flags; | ||
245 | int ret; | ||
246 | |||
247 | dd = ipath_lookup(unit); | ||
248 | |||
249 | mutex_lock(&ipath_mutex); | ||
250 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
251 | |||
252 | if (ipath_diag_inuse) { | ||
253 | ret = -EBUSY; | ||
254 | goto bail; | ||
255 | } | ||
256 | |||
257 | list_for_each_entry(dd, &ipath_dev_list, ipath_list) { | ||
258 | /* | ||
259 | * we need at least one infinipath device to be present | ||
260 | * (don't use INITTED, because we want to be able to open | ||
261 | * even if device is in freeze mode, which cleared INITTED). | ||
262 | * There is a small amount of risk to this, which is why we | ||
263 | * also verify kregbase is set. | ||
264 | */ | ||
265 | |||
266 | if (!(dd->ipath_flags & IPATH_PRESENT) || | ||
267 | !dd->ipath_kregbase) | ||
268 | continue; | ||
269 | |||
270 | ipath_diag_inuse = 1; | ||
271 | diag_set_link = 0; | ||
272 | ret = 0; | ||
273 | goto bail; | ||
274 | } | ||
275 | |||
276 | ret = -ENODEV; | ||
277 | |||
278 | bail: | ||
279 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
280 | mutex_unlock(&ipath_mutex); | ||
281 | |||
282 | /* Only expose a way to reset the device if we | ||
283 | make it into diag mode. */ | ||
284 | if (ret == 0) | ||
285 | ipath_expose_reset(&dd->pcidev->dev); | ||
286 | |||
287 | return ret; | ||
288 | } | ||
289 | |||
290 | static int ipath_diag_release(struct inode *i, struct file *f) | ||
291 | { | ||
292 | mutex_lock(&ipath_mutex); | ||
293 | ipath_diag_inuse = 0; | ||
294 | mutex_unlock(&ipath_mutex); | ||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | static ssize_t ipath_diag_read(struct file *fp, char __user *data, | ||
299 | size_t count, loff_t *off) | ||
300 | { | ||
301 | int unit = 0; /* XXX provide for reads on other units some day */ | ||
302 | struct ipath_devdata *dd; | ||
303 | void __iomem *kreg_base; | ||
304 | ssize_t ret; | ||
305 | |||
306 | dd = ipath_lookup(unit); | ||
307 | if (!dd) { | ||
308 | ret = -ENODEV; | ||
309 | goto bail; | ||
310 | } | ||
311 | |||
312 | kreg_base = dd->ipath_kregbase; | ||
313 | |||
314 | if (count == 0) | ||
315 | ret = 0; | ||
316 | else if ((count % 4) || (*off % 4)) | ||
317 | /* address or length is not 32-bit aligned, hence invalid */ | ||
318 | ret = -EINVAL; | ||
319 | else if ((count % 8) || (*off % 8)) | ||
320 | /* address or length not 64-bit aligned; do 32-bit reads */ | ||
321 | ret = ipath_read_umem32(dd, data, kreg_base + *off, count); | ||
322 | else | ||
323 | ret = ipath_read_umem64(dd, data, kreg_base + *off, count); | ||
324 | |||
325 | if (ret >= 0) { | ||
326 | *off += count; | ||
327 | ret = count; | ||
328 | } | ||
329 | |||
330 | bail: | ||
331 | return ret; | ||
332 | } | ||
333 | |||
334 | static ssize_t ipath_diag_write(struct file *fp, const char __user *data, | ||
335 | size_t count, loff_t *off) | ||
336 | { | ||
337 | int unit = 0; /* XXX this is bogus */ | ||
338 | struct ipath_devdata *dd; | ||
339 | void __iomem *kreg_base; | ||
340 | ssize_t ret; | ||
341 | |||
342 | dd = ipath_lookup(unit); | ||
343 | if (!dd) { | ||
344 | ret = -ENODEV; | ||
345 | goto bail; | ||
346 | } | ||
347 | kreg_base = dd->ipath_kregbase; | ||
348 | |||
349 | if (count == 0) | ||
350 | ret = 0; | ||
351 | else if ((count % 4) || (*off % 4)) | ||
352 | /* address or length is not 32-bit aligned, hence invalid */ | ||
353 | ret = -EINVAL; | ||
354 | else if ((count % 8) || (*off % 8)) | ||
355 | /* address or length not 64-bit aligned; do 32-bit writes */ | ||
356 | ret = ipath_write_umem32(dd, kreg_base + *off, data, count); | ||
357 | else | ||
358 | ret = ipath_write_umem64(dd, kreg_base + *off, data, count); | ||
359 | |||
360 | if (ret >= 0) { | ||
361 | *off += count; | ||
362 | ret = count; | ||
363 | } | ||
364 | |||
365 | bail: | ||
366 | return ret; | ||
367 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c new file mode 100644 index 000000000000..e7617c3982ea --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_driver.c | |||
@@ -0,0 +1,1983 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/spinlock.h> | ||
34 | #include <linux/idr.h> | ||
35 | #include <linux/pci.h> | ||
36 | #include <linux/delay.h> | ||
37 | #include <linux/netdevice.h> | ||
38 | #include <linux/vmalloc.h> | ||
39 | |||
40 | #include "ipath_kernel.h" | ||
41 | #include "ips_common.h" | ||
42 | #include "ipath_layer.h" | ||
43 | |||
44 | static void ipath_update_pio_bufs(struct ipath_devdata *); | ||
45 | |||
46 | const char *ipath_get_unit_name(int unit) | ||
47 | { | ||
48 | static char iname[16]; | ||
49 | snprintf(iname, sizeof iname, "infinipath%u", unit); | ||
50 | return iname; | ||
51 | } | ||
52 | |||
53 | EXPORT_SYMBOL_GPL(ipath_get_unit_name); | ||
54 | |||
55 | #define DRIVER_LOAD_MSG "PathScale " IPATH_DRV_NAME " loaded: " | ||
56 | #define PFX IPATH_DRV_NAME ": " | ||
57 | |||
58 | /* | ||
59 | * The size has to be longer than this string, so we can append | ||
60 | * board/chip information to it in the init code. | ||
61 | */ | ||
62 | const char ipath_core_version[] = IPATH_IDSTR "\n"; | ||
63 | |||
64 | static struct idr unit_table; | ||
65 | DEFINE_SPINLOCK(ipath_devs_lock); | ||
66 | LIST_HEAD(ipath_dev_list); | ||
67 | |||
68 | wait_queue_head_t ipath_sma_state_wait; | ||
69 | |||
70 | unsigned ipath_debug = __IPATH_INFO; | ||
71 | |||
72 | module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO); | ||
73 | MODULE_PARM_DESC(debug, "mask for debug prints"); | ||
74 | EXPORT_SYMBOL_GPL(ipath_debug); | ||
75 | |||
76 | MODULE_LICENSE("GPL"); | ||
77 | MODULE_AUTHOR("PathScale <support@pathscale.com>"); | ||
78 | MODULE_DESCRIPTION("Pathscale InfiniPath driver"); | ||
79 | |||
80 | const char *ipath_ibcstatus_str[] = { | ||
81 | "Disabled", | ||
82 | "LinkUp", | ||
83 | "PollActive", | ||
84 | "PollQuiet", | ||
85 | "SleepDelay", | ||
86 | "SleepQuiet", | ||
87 | "LState6", /* unused */ | ||
88 | "LState7", /* unused */ | ||
89 | "CfgDebounce", | ||
90 | "CfgRcvfCfg", | ||
91 | "CfgWaitRmt", | ||
92 | "CfgIdle", | ||
93 | "RecovRetrain", | ||
94 | "LState0xD", /* unused */ | ||
95 | "RecovWaitRmt", | ||
96 | "RecovIdle", | ||
97 | }; | ||
98 | |||
99 | /* | ||
100 | * These variables are initialized in the chip-specific files | ||
101 | * but are defined here. | ||
102 | */ | ||
103 | u16 ipath_gpio_sda_num, ipath_gpio_scl_num; | ||
104 | u64 ipath_gpio_sda, ipath_gpio_scl; | ||
105 | u64 infinipath_i_bitsextant; | ||
106 | ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; | ||
107 | u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask; | ||
108 | |||
109 | static void __devexit ipath_remove_one(struct pci_dev *); | ||
110 | static int __devinit ipath_init_one(struct pci_dev *, | ||
111 | const struct pci_device_id *); | ||
112 | |||
113 | /* Only needed for registration, nothing else needs this info */ | ||
114 | #define PCI_VENDOR_ID_PATHSCALE 0x1fc1 | ||
115 | #define PCI_DEVICE_ID_INFINIPATH_HT 0xd | ||
116 | #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10 | ||
117 | |||
118 | static const struct pci_device_id ipath_pci_tbl[] = { | ||
119 | {PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, | ||
120 | PCI_DEVICE_ID_INFINIPATH_HT)}, | ||
121 | {PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, | ||
122 | PCI_DEVICE_ID_INFINIPATH_PE800)}, | ||
123 | }; | ||
124 | |||
125 | MODULE_DEVICE_TABLE(pci, ipath_pci_tbl); | ||
126 | |||
127 | static struct pci_driver ipath_driver = { | ||
128 | .name = IPATH_DRV_NAME, | ||
129 | .probe = ipath_init_one, | ||
130 | .remove = __devexit_p(ipath_remove_one), | ||
131 | .id_table = ipath_pci_tbl, | ||
132 | }; | ||
133 | |||
134 | /* | ||
135 | * This is where port 0's rcvhdrtail register is written back; we also | ||
136 | * want nothing else sharing the cache line, so make it a cache line | ||
137 | * in size. Used for all units. | ||
138 | */ | ||
139 | volatile __le64 *ipath_port0_rcvhdrtail; | ||
140 | dma_addr_t ipath_port0_rcvhdrtail_dma; | ||
141 | static int port0_rcvhdrtail_refs; | ||
142 | |||
143 | static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, | ||
144 | u32 *bar0, u32 *bar1) | ||
145 | { | ||
146 | int ret; | ||
147 | |||
148 | ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0); | ||
149 | if (ret) | ||
150 | ipath_dev_err(dd, "failed to read bar0 before enable: " | ||
151 | "error %d\n", -ret); | ||
152 | |||
153 | ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1); | ||
154 | if (ret) | ||
155 | ipath_dev_err(dd, "failed to read bar1 before enable: " | ||
156 | "error %d\n", -ret); | ||
157 | |||
158 | ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1); | ||
159 | } | ||
160 | |||
161 | static void ipath_free_devdata(struct pci_dev *pdev, | ||
162 | struct ipath_devdata *dd) | ||
163 | { | ||
164 | unsigned long flags; | ||
165 | |||
166 | pci_set_drvdata(pdev, NULL); | ||
167 | |||
168 | if (dd->ipath_unit != -1) { | ||
169 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
170 | idr_remove(&unit_table, dd->ipath_unit); | ||
171 | list_del(&dd->ipath_list); | ||
172 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
173 | } | ||
174 | dma_free_coherent(&pdev->dev, sizeof(*dd), dd, dd->ipath_dma_addr); | ||
175 | } | ||
176 | |||
177 | static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) | ||
178 | { | ||
179 | unsigned long flags; | ||
180 | struct ipath_devdata *dd; | ||
181 | dma_addr_t dma_addr; | ||
182 | int ret; | ||
183 | |||
184 | if (!idr_pre_get(&unit_table, GFP_KERNEL)) { | ||
185 | dd = ERR_PTR(-ENOMEM); | ||
186 | goto bail; | ||
187 | } | ||
188 | |||
189 | dd = dma_alloc_coherent(&pdev->dev, sizeof(*dd), &dma_addr, | ||
190 | GFP_KERNEL); | ||
191 | |||
192 | if (!dd) { | ||
193 | dd = ERR_PTR(-ENOMEM); | ||
194 | goto bail; | ||
195 | } | ||
196 | |||
197 | dd->ipath_dma_addr = dma_addr; | ||
198 | dd->ipath_unit = -1; | ||
199 | |||
200 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
201 | |||
202 | ret = idr_get_new(&unit_table, dd, &dd->ipath_unit); | ||
203 | if (ret < 0) { | ||
204 | printk(KERN_ERR IPATH_DRV_NAME | ||
205 | ": Could not allocate unit ID: error %d\n", -ret); | ||
206 | ipath_free_devdata(pdev, dd); | ||
207 | dd = ERR_PTR(ret); | ||
208 | goto bail_unlock; | ||
209 | } | ||
210 | |||
211 | dd->pcidev = pdev; | ||
212 | pci_set_drvdata(pdev, dd); | ||
213 | |||
214 | list_add(&dd->ipath_list, &ipath_dev_list); | ||
215 | |||
216 | bail_unlock: | ||
217 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
218 | |||
219 | bail: | ||
220 | return dd; | ||
221 | } | ||
222 | |||
223 | static inline struct ipath_devdata *__ipath_lookup(int unit) | ||
224 | { | ||
225 | return idr_find(&unit_table, unit); | ||
226 | } | ||
227 | |||
228 | struct ipath_devdata *ipath_lookup(int unit) | ||
229 | { | ||
230 | struct ipath_devdata *dd; | ||
231 | unsigned long flags; | ||
232 | |||
233 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
234 | dd = __ipath_lookup(unit); | ||
235 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
236 | |||
237 | return dd; | ||
238 | } | ||
239 | |||
240 | int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp) | ||
241 | { | ||
242 | int nunits, npresent, nup; | ||
243 | struct ipath_devdata *dd; | ||
244 | unsigned long flags; | ||
245 | u32 maxports; | ||
246 | |||
247 | nunits = npresent = nup = maxports = 0; | ||
248 | |||
249 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
250 | |||
251 | list_for_each_entry(dd, &ipath_dev_list, ipath_list) { | ||
252 | nunits++; | ||
253 | if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase) | ||
254 | npresent++; | ||
255 | if (dd->ipath_lid && | ||
256 | !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN | ||
257 | | IPATH_LINKUNK))) | ||
258 | nup++; | ||
259 | if (dd->ipath_cfgports > maxports) | ||
260 | maxports = dd->ipath_cfgports; | ||
261 | } | ||
262 | |||
263 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
264 | |||
265 | if (npresentp) | ||
266 | *npresentp = npresent; | ||
267 | if (nupp) | ||
268 | *nupp = nup; | ||
269 | if (maxportsp) | ||
270 | *maxportsp = maxports; | ||
271 | |||
272 | return nunits; | ||
273 | } | ||
274 | |||
275 | static int init_port0_rcvhdrtail(struct pci_dev *pdev) | ||
276 | { | ||
277 | int ret; | ||
278 | |||
279 | mutex_lock(&ipath_mutex); | ||
280 | |||
281 | if (!ipath_port0_rcvhdrtail) { | ||
282 | ipath_port0_rcvhdrtail = | ||
283 | dma_alloc_coherent(&pdev->dev, | ||
284 | IPATH_PORT0_RCVHDRTAIL_SIZE, | ||
285 | &ipath_port0_rcvhdrtail_dma, | ||
286 | GFP_KERNEL); | ||
287 | |||
288 | if (!ipath_port0_rcvhdrtail) { | ||
289 | ret = -ENOMEM; | ||
290 | goto bail; | ||
291 | } | ||
292 | } | ||
293 | port0_rcvhdrtail_refs++; | ||
294 | ret = 0; | ||
295 | |||
296 | bail: | ||
297 | mutex_unlock(&ipath_mutex); | ||
298 | |||
299 | return ret; | ||
300 | } | ||
301 | |||
302 | static void cleanup_port0_rcvhdrtail(struct pci_dev *pdev) | ||
303 | { | ||
304 | mutex_lock(&ipath_mutex); | ||
305 | |||
306 | if (!--port0_rcvhdrtail_refs) { | ||
307 | dma_free_coherent(&pdev->dev, IPATH_PORT0_RCVHDRTAIL_SIZE, | ||
308 | (void *) ipath_port0_rcvhdrtail, | ||
309 | ipath_port0_rcvhdrtail_dma); | ||
310 | ipath_port0_rcvhdrtail = NULL; | ||
311 | } | ||
312 | |||
313 | mutex_unlock(&ipath_mutex); | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * These next two routines are placeholders in case we don't have per-arch | ||
318 | * code for controlling write combining. If explicit control of write | ||
319 | * combining is not available, performance will probably be awful. | ||
320 | */ | ||
321 | |||
322 | int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd) | ||
323 | { | ||
324 | return -EOPNOTSUPP; | ||
325 | } | ||
326 | |||
327 | void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd) | ||
328 | { | ||
329 | } | ||
330 | |||
331 | static int __devinit ipath_init_one(struct pci_dev *pdev, | ||
332 | const struct pci_device_id *ent) | ||
333 | { | ||
334 | int ret, len, j; | ||
335 | struct ipath_devdata *dd; | ||
336 | unsigned long long addr; | ||
337 | u32 bar0 = 0, bar1 = 0; | ||
338 | u8 rev; | ||
339 | |||
340 | ret = init_port0_rcvhdrtail(pdev); | ||
341 | if (ret < 0) { | ||
342 | printk(KERN_ERR IPATH_DRV_NAME | ||
343 | ": Could not allocate port0_rcvhdrtail: error %d\n", | ||
344 | -ret); | ||
345 | goto bail; | ||
346 | } | ||
347 | |||
348 | dd = ipath_alloc_devdata(pdev); | ||
349 | if (IS_ERR(dd)) { | ||
350 | ret = PTR_ERR(dd); | ||
351 | printk(KERN_ERR IPATH_DRV_NAME | ||
352 | ": Could not allocate devdata: error %d\n", -ret); | ||
353 | goto bail_rcvhdrtail; | ||
354 | } | ||
355 | |||
356 | ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); | ||
357 | |||
358 | read_bars(dd, pdev, &bar0, &bar1); | ||
359 | |||
360 | ret = pci_enable_device(pdev); | ||
361 | if (ret) { | ||
362 | /* This can happen iff: | ||
363 | * | ||
364 | * We did a chip reset, and then failed to reprogram the | ||
365 | * BAR, or the chip reset due to an internal error. We then | ||
366 | * unloaded the driver and reloaded it. | ||
367 | * | ||
368 | * Both reset cases set the BAR back to initial state. For | ||
369 | * the latter case, the AER sticky error bit at offset 0x718 | ||
370 | * should be set, but the Linux kernel doesn't yet know | ||
371 | * about that, it appears. If the original BAR was retained | ||
372 | * in the kernel data structures, this may be OK. | ||
373 | */ | ||
374 | ipath_dev_err(dd, "enable unit %d failed: error %d\n", | ||
375 | dd->ipath_unit, -ret); | ||
376 | goto bail_devdata; | ||
377 | } | ||
378 | addr = pci_resource_start(pdev, 0); | ||
379 | len = pci_resource_len(pdev, 0); | ||
380 | ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %x, vend %x/%x " | ||
381 | "driver_data %lx\n", addr, len, pdev->irq, ent->vendor, | ||
382 | ent->device, ent->driver_data); | ||
383 | |||
384 | read_bars(dd, pdev, &bar0, &bar1); | ||
385 | |||
386 | if (!bar1 && !(bar0 & ~0xf)) { | ||
387 | if (addr) { | ||
388 | dev_info(&pdev->dev, "BAR is 0 (probable RESET), " | ||
389 | "rewriting as %llx\n", addr); | ||
390 | ret = pci_write_config_dword( | ||
391 | pdev, PCI_BASE_ADDRESS_0, addr); | ||
392 | if (ret) { | ||
393 | ipath_dev_err(dd, "rewrite of BAR0 " | ||
394 | "failed: err %d\n", -ret); | ||
395 | goto bail_disable; | ||
396 | } | ||
397 | ret = pci_write_config_dword( | ||
398 | pdev, PCI_BASE_ADDRESS_1, addr >> 32); | ||
399 | if (ret) { | ||
400 | ipath_dev_err(dd, "rewrite of BAR1 " | ||
401 | "failed: err %d\n", -ret); | ||
402 | goto bail_disable; | ||
403 | } | ||
404 | } else { | ||
405 | ipath_dev_err(dd, "BAR is 0 (probable RESET), " | ||
406 | "not usable until reboot\n"); | ||
407 | ret = -ENODEV; | ||
408 | goto bail_disable; | ||
409 | } | ||
410 | } | ||
411 | |||
412 | ret = pci_request_regions(pdev, IPATH_DRV_NAME); | ||
413 | if (ret) { | ||
414 | dev_info(&pdev->dev, "pci_request_regions unit %u fails: " | ||
415 | "err %d\n", dd->ipath_unit, -ret); | ||
416 | goto bail_disable; | ||
417 | } | ||
418 | |||
419 | ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK); | ||
420 | if (ret) { | ||
421 | dev_info(&pdev->dev, "pci_set_dma_mask unit %u " | ||
422 | "fails: %d\n", dd->ipath_unit, ret); | ||
423 | goto bail_regions; | ||
424 | } | ||
425 | |||
426 | pci_set_master(pdev); | ||
427 | |||
428 | /* | ||
429 | * Save BARs to rewrite after device reset. Save all 64 bits of | ||
430 | * BAR, just in case. | ||
431 | */ | ||
432 | dd->ipath_pcibar0 = addr; | ||
433 | dd->ipath_pcibar1 = addr >> 32; | ||
434 | dd->ipath_deviceid = ent->device; /* save for later use */ | ||
435 | dd->ipath_vendorid = ent->vendor; | ||
436 | |||
437 | /* setup the chip-specific functions, as early as possible. */ | ||
438 | switch (ent->device) { | ||
439 | case PCI_DEVICE_ID_INFINIPATH_HT: | ||
440 | ipath_init_ht400_funcs(dd); | ||
441 | break; | ||
442 | case PCI_DEVICE_ID_INFINIPATH_PE800: | ||
443 | ipath_init_pe800_funcs(dd); | ||
444 | break; | ||
445 | default: | ||
446 | ipath_dev_err(dd, "Found unknown PathScale deviceid 0x%x, " | ||
447 | "failing\n", ent->device); | ||
448 | return -ENODEV; | ||
449 | } | ||
450 | |||
451 | for (j = 0; j < 6; j++) { | ||
452 | if (!pdev->resource[j].start) | ||
453 | continue; | ||
454 | ipath_cdbg(VERBOSE, "BAR %d start %lx, end %lx, len %lx\n", | ||
455 | j, pdev->resource[j].start, | ||
456 | pdev->resource[j].end, | ||
457 | pci_resource_len(pdev, j)); | ||
458 | } | ||
459 | |||
460 | if (!addr) { | ||
461 | ipath_dev_err(dd, "No valid address in BAR 0!\n"); | ||
462 | ret = -ENODEV; | ||
463 | goto bail_regions; | ||
464 | } | ||
465 | |||
466 | dd->ipath_deviceid = ent->device; /* save for later use */ | ||
467 | dd->ipath_vendorid = ent->vendor; | ||
468 | |||
469 | ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev); | ||
470 | if (ret) { | ||
471 | ipath_dev_err(dd, "Failed to read PCI revision ID unit " | ||
472 | "%u: err %d\n", dd->ipath_unit, -ret); | ||
473 | goto bail_regions; /* shouldn't ever happen */ | ||
474 | } | ||
475 | dd->ipath_pcirev = rev; | ||
476 | |||
477 | dd->ipath_kregbase = ioremap_nocache(addr, len); | ||
478 | |||
479 | if (!dd->ipath_kregbase) { | ||
480 | ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", | ||
481 | addr); | ||
482 | ret = -ENOMEM; | ||
483 | goto bail_iounmap; | ||
484 | } | ||
485 | dd->ipath_kregend = (u64 __iomem *) | ||
486 | ((void __iomem *)dd->ipath_kregbase + len); | ||
487 | dd->ipath_physaddr = addr; /* used for io_remap, etc. */ | ||
488 | /* for user mmap */ | ||
489 | dd->ipath_kregvirt = (u64 __iomem *) phys_to_virt(addr); | ||
490 | ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p " | ||
491 | "kregvirt %p\n", addr, dd->ipath_kregbase, | ||
492 | dd->ipath_kregvirt); | ||
493 | |||
494 | /* | ||
495 | * clear ipath_flags here instead of in ipath_init_chip as it is set | ||
496 | * by ipath_setup_htconfig. | ||
497 | */ | ||
498 | dd->ipath_flags = 0; | ||
499 | |||
500 | if (dd->ipath_f_bus(dd, pdev)) | ||
501 | ipath_dev_err(dd, "Failed to setup config space; " | ||
502 | "continuing anyway\n"); | ||
503 | |||
504 | /* | ||
505 | * set up our interrupt handler; SA_SHIRQ probably not needed, | ||
506 | * since MSI interrupts shouldn't be shared but won't hurt for now. | ||
507 | * check 0 irq after we return from chip-specific bus setup, since | ||
508 | * that can affect this due to setup | ||
509 | */ | ||
510 | if (!pdev->irq) | ||
511 | ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " | ||
512 | "work\n"); | ||
513 | else { | ||
514 | ret = request_irq(pdev->irq, ipath_intr, SA_SHIRQ, | ||
515 | IPATH_DRV_NAME, dd); | ||
516 | if (ret) { | ||
517 | ipath_dev_err(dd, "Couldn't setup irq handler, " | ||
518 | "irq=%u: %d\n", pdev->irq, ret); | ||
519 | goto bail_iounmap; | ||
520 | } | ||
521 | } | ||
522 | |||
523 | ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ | ||
524 | if (ret) | ||
525 | goto bail_iounmap; | ||
526 | |||
527 | ret = ipath_enable_wc(dd); | ||
528 | |||
529 | if (ret) { | ||
530 | ipath_dev_err(dd, "Write combining not enabled " | ||
531 | "(err %d): performance may be poor\n", | ||
532 | -ret); | ||
533 | ret = 0; | ||
534 | } | ||
535 | |||
536 | ipath_device_create_group(&pdev->dev, dd); | ||
537 | ipathfs_add_device(dd); | ||
538 | ipath_user_add(dd); | ||
539 | ipath_layer_add(dd); | ||
540 | |||
541 | goto bail; | ||
542 | |||
543 | bail_iounmap: | ||
544 | iounmap((volatile void __iomem *) dd->ipath_kregbase); | ||
545 | |||
546 | bail_regions: | ||
547 | pci_release_regions(pdev); | ||
548 | |||
549 | bail_disable: | ||
550 | pci_disable_device(pdev); | ||
551 | |||
552 | bail_devdata: | ||
553 | ipath_free_devdata(pdev, dd); | ||
554 | |||
555 | bail_rcvhdrtail: | ||
556 | cleanup_port0_rcvhdrtail(pdev); | ||
557 | |||
558 | bail: | ||
559 | return ret; | ||
560 | } | ||
561 | |||
562 | static void __devexit ipath_remove_one(struct pci_dev *pdev) | ||
563 | { | ||
564 | struct ipath_devdata *dd; | ||
565 | |||
566 | ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev); | ||
567 | if (!pdev) | ||
568 | return; | ||
569 | |||
570 | dd = pci_get_drvdata(pdev); | ||
571 | ipath_layer_del(dd); | ||
572 | ipath_user_del(dd); | ||
573 | ipathfs_remove_device(dd); | ||
574 | ipath_device_remove_group(&pdev->dev, dd); | ||
575 | ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " | ||
576 | "unit %u\n", dd, (u32) dd->ipath_unit); | ||
577 | if (dd->ipath_kregbase) { | ||
578 | ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", | ||
579 | dd->ipath_kregbase); | ||
580 | iounmap((volatile void __iomem *) dd->ipath_kregbase); | ||
581 | dd->ipath_kregbase = NULL; | ||
582 | } | ||
583 | pci_release_regions(pdev); | ||
584 | ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); | ||
585 | pci_disable_device(pdev); | ||
586 | |||
587 | ipath_free_devdata(pdev, dd); | ||
588 | cleanup_port0_rcvhdrtail(pdev); | ||
589 | } | ||
590 | |||
591 | /* general driver use */ | ||
592 | DEFINE_MUTEX(ipath_mutex); | ||
593 | |||
594 | static DEFINE_SPINLOCK(ipath_pioavail_lock); | ||
595 | |||
596 | /** | ||
597 | * ipath_disarm_piobufs - cancel a range of PIO buffers | ||
598 | * @dd: the infinipath device | ||
599 | * @first: the first PIO buffer to cancel | ||
600 | * @cnt: the number of PIO buffers to cancel | ||
601 | * | ||
602 | * cancel a range of PIO buffers, used when they might be armed, but | ||
603 | * not triggered. Used at init to ensure buffer state, and also user | ||
604 | * process close, in case it died while writing to a PIO buffer | ||
605 | * Also after errors. | ||
606 | */ | ||
607 | void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, | ||
608 | unsigned cnt) | ||
609 | { | ||
610 | unsigned i, last = first + cnt; | ||
611 | u64 sendctrl, sendorig; | ||
612 | |||
613 | ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); | ||
614 | sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM; | ||
615 | for (i = first; i < last; i++) { | ||
616 | sendctrl = sendorig | | ||
617 | (i << INFINIPATH_S_DISARMPIOBUF_SHIFT); | ||
618 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
619 | sendctrl); | ||
620 | } | ||
621 | |||
622 | /* | ||
623 | * Write it again with current value, in case ipath_sendctrl changed | ||
624 | * while we were looping; no critical bits that would require | ||
625 | * locking. | ||
626 | * | ||
627 | * Write a 0, and then the original value, reading scratch in | ||
628 | * between. This seems to avoid a chip timing race that causes | ||
629 | * pioavail updates to memory to stop. | ||
630 | */ | ||
631 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
632 | 0); | ||
633 | sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); | ||
634 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
635 | dd->ipath_sendctrl); | ||
636 | } | ||
637 | |||
638 | /** | ||
639 | * ipath_wait_linkstate - wait for an IB link state change to occur | ||
640 | * @dd: the infinipath device | ||
641 | * @state: the state to wait for | ||
642 | * @msecs: the number of milliseconds to wait | ||
643 | * | ||
644 | * wait up to msecs milliseconds for IB link state change to occur for | ||
645 | * now, take the easy polling route. Currently used only by | ||
646 | * ipath_layer_set_linkstate. Returns 0 if state reached, otherwise | ||
647 | * -ETIMEDOUT state can have multiple states set, for any of several | ||
648 | * transitions. | ||
649 | */ | ||
650 | int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) | ||
651 | { | ||
652 | dd->ipath_sma_state_wanted = state; | ||
653 | wait_event_interruptible_timeout(ipath_sma_state_wait, | ||
654 | (dd->ipath_flags & state), | ||
655 | msecs_to_jiffies(msecs)); | ||
656 | dd->ipath_sma_state_wanted = 0; | ||
657 | |||
658 | if (!(dd->ipath_flags & state)) { | ||
659 | u64 val; | ||
660 | ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n", | ||
661 | /* test INIT ahead of DOWN, both can be set */ | ||
662 | (state & IPATH_LINKINIT) ? "INIT" : | ||
663 | ((state & IPATH_LINKDOWN) ? "DOWN" : | ||
664 | ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")), | ||
665 | msecs); | ||
666 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); | ||
667 | ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n", | ||
668 | (unsigned long long) ipath_read_kreg64( | ||
669 | dd, dd->ipath_kregs->kr_ibcctrl), | ||
670 | (unsigned long long) val, | ||
671 | ipath_ibcstatus_str[val & 0xf]); | ||
672 | } | ||
673 | return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; | ||
674 | } | ||
675 | |||
676 | void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) | ||
677 | { | ||
678 | *buf = '\0'; | ||
679 | if (err & INFINIPATH_E_RHDRLEN) | ||
680 | strlcat(buf, "rhdrlen ", blen); | ||
681 | if (err & INFINIPATH_E_RBADTID) | ||
682 | strlcat(buf, "rbadtid ", blen); | ||
683 | if (err & INFINIPATH_E_RBADVERSION) | ||
684 | strlcat(buf, "rbadversion ", blen); | ||
685 | if (err & INFINIPATH_E_RHDR) | ||
686 | strlcat(buf, "rhdr ", blen); | ||
687 | if (err & INFINIPATH_E_RLONGPKTLEN) | ||
688 | strlcat(buf, "rlongpktlen ", blen); | ||
689 | if (err & INFINIPATH_E_RSHORTPKTLEN) | ||
690 | strlcat(buf, "rshortpktlen ", blen); | ||
691 | if (err & INFINIPATH_E_RMAXPKTLEN) | ||
692 | strlcat(buf, "rmaxpktlen ", blen); | ||
693 | if (err & INFINIPATH_E_RMINPKTLEN) | ||
694 | strlcat(buf, "rminpktlen ", blen); | ||
695 | if (err & INFINIPATH_E_RFORMATERR) | ||
696 | strlcat(buf, "rformaterr ", blen); | ||
697 | if (err & INFINIPATH_E_RUNSUPVL) | ||
698 | strlcat(buf, "runsupvl ", blen); | ||
699 | if (err & INFINIPATH_E_RUNEXPCHAR) | ||
700 | strlcat(buf, "runexpchar ", blen); | ||
701 | if (err & INFINIPATH_E_RIBFLOW) | ||
702 | strlcat(buf, "ribflow ", blen); | ||
703 | if (err & INFINIPATH_E_REBP) | ||
704 | strlcat(buf, "EBP ", blen); | ||
705 | if (err & INFINIPATH_E_SUNDERRUN) | ||
706 | strlcat(buf, "sunderrun ", blen); | ||
707 | if (err & INFINIPATH_E_SPIOARMLAUNCH) | ||
708 | strlcat(buf, "spioarmlaunch ", blen); | ||
709 | if (err & INFINIPATH_E_SUNEXPERRPKTNUM) | ||
710 | strlcat(buf, "sunexperrpktnum ", blen); | ||
711 | if (err & INFINIPATH_E_SDROPPEDDATAPKT) | ||
712 | strlcat(buf, "sdroppeddatapkt ", blen); | ||
713 | if (err & INFINIPATH_E_SDROPPEDSMPPKT) | ||
714 | strlcat(buf, "sdroppedsmppkt ", blen); | ||
715 | if (err & INFINIPATH_E_SMAXPKTLEN) | ||
716 | strlcat(buf, "smaxpktlen ", blen); | ||
717 | if (err & INFINIPATH_E_SMINPKTLEN) | ||
718 | strlcat(buf, "sminpktlen ", blen); | ||
719 | if (err & INFINIPATH_E_SUNSUPVL) | ||
720 | strlcat(buf, "sunsupVL ", blen); | ||
721 | if (err & INFINIPATH_E_SPKTLEN) | ||
722 | strlcat(buf, "spktlen ", blen); | ||
723 | if (err & INFINIPATH_E_INVALIDADDR) | ||
724 | strlcat(buf, "invalidaddr ", blen); | ||
725 | if (err & INFINIPATH_E_RICRC) | ||
726 | strlcat(buf, "CRC ", blen); | ||
727 | if (err & INFINIPATH_E_RVCRC) | ||
728 | strlcat(buf, "VCRC ", blen); | ||
729 | if (err & INFINIPATH_E_RRCVEGRFULL) | ||
730 | strlcat(buf, "rcvegrfull ", blen); | ||
731 | if (err & INFINIPATH_E_RRCVHDRFULL) | ||
732 | strlcat(buf, "rcvhdrfull ", blen); | ||
733 | if (err & INFINIPATH_E_IBSTATUSCHANGED) | ||
734 | strlcat(buf, "ibcstatuschg ", blen); | ||
735 | if (err & INFINIPATH_E_RIBLOSTLINK) | ||
736 | strlcat(buf, "riblostlink ", blen); | ||
737 | if (err & INFINIPATH_E_HARDWARE) | ||
738 | strlcat(buf, "hardware ", blen); | ||
739 | if (err & INFINIPATH_E_RESET) | ||
740 | strlcat(buf, "reset ", blen); | ||
741 | } | ||
742 | |||
743 | /** | ||
744 | * get_rhf_errstring - decode RHF errors | ||
745 | * @err: the err number | ||
746 | * @msg: the output buffer | ||
747 | * @len: the length of the output buffer | ||
748 | * | ||
749 | * only used one place now, may want more later | ||
750 | */ | ||
751 | static void get_rhf_errstring(u32 err, char *msg, size_t len) | ||
752 | { | ||
753 | /* if no errors, and so don't need to check what's first */ | ||
754 | *msg = '\0'; | ||
755 | |||
756 | if (err & INFINIPATH_RHF_H_ICRCERR) | ||
757 | strlcat(msg, "icrcerr ", len); | ||
758 | if (err & INFINIPATH_RHF_H_VCRCERR) | ||
759 | strlcat(msg, "vcrcerr ", len); | ||
760 | if (err & INFINIPATH_RHF_H_PARITYERR) | ||
761 | strlcat(msg, "parityerr ", len); | ||
762 | if (err & INFINIPATH_RHF_H_LENERR) | ||
763 | strlcat(msg, "lenerr ", len); | ||
764 | if (err & INFINIPATH_RHF_H_MTUERR) | ||
765 | strlcat(msg, "mtuerr ", len); | ||
766 | if (err & INFINIPATH_RHF_H_IHDRERR) | ||
767 | /* infinipath hdr checksum error */ | ||
768 | strlcat(msg, "ipathhdrerr ", len); | ||
769 | if (err & INFINIPATH_RHF_H_TIDERR) | ||
770 | strlcat(msg, "tiderr ", len); | ||
771 | if (err & INFINIPATH_RHF_H_MKERR) | ||
772 | /* bad port, offset, etc. */ | ||
773 | strlcat(msg, "invalid ipathhdr ", len); | ||
774 | if (err & INFINIPATH_RHF_H_IBERR) | ||
775 | strlcat(msg, "iberr ", len); | ||
776 | if (err & INFINIPATH_RHF_L_SWA) | ||
777 | strlcat(msg, "swA ", len); | ||
778 | if (err & INFINIPATH_RHF_L_SWB) | ||
779 | strlcat(msg, "swB ", len); | ||
780 | } | ||
781 | |||
782 | /** | ||
783 | * ipath_get_egrbuf - get an eager buffer | ||
784 | * @dd: the infinipath device | ||
785 | * @bufnum: the eager buffer to get | ||
786 | * @err: unused | ||
787 | * | ||
788 | * must only be called if ipath_pd[port] is known to be allocated | ||
789 | */ | ||
790 | static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum, | ||
791 | int err) | ||
792 | { | ||
793 | return dd->ipath_port0_skbs ? | ||
794 | (void *)dd->ipath_port0_skbs[bufnum]->data : NULL; | ||
795 | } | ||
796 | |||
797 | /** | ||
798 | * ipath_alloc_skb - allocate an skb and buffer with possible constraints | ||
799 | * @dd: the infinipath device | ||
800 | * @gfp_mask: the sk_buff SFP mask | ||
801 | */ | ||
802 | struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, | ||
803 | gfp_t gfp_mask) | ||
804 | { | ||
805 | struct sk_buff *skb; | ||
806 | u32 len; | ||
807 | |||
808 | /* | ||
809 | * Only fully supported way to handle this is to allocate lots | ||
810 | * extra, align as needed, and then do skb_reserve(). That wastes | ||
811 | * a lot of memory... I'll have to hack this into infinipath_copy | ||
812 | * also. | ||
813 | */ | ||
814 | |||
815 | /* | ||
816 | * We need 4 extra bytes for unaligned transfer copying | ||
817 | */ | ||
818 | if (dd->ipath_flags & IPATH_4BYTE_TID) { | ||
819 | /* we need a 4KB multiple alignment, and there is no way | ||
820 | * to do it except to allocate extra and then skb_reserve | ||
821 | * enough to bring it up to the right alignment. | ||
822 | */ | ||
823 | len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1; | ||
824 | } | ||
825 | else | ||
826 | len = dd->ipath_ibmaxlen + 4; | ||
827 | skb = __dev_alloc_skb(len, gfp_mask); | ||
828 | if (!skb) { | ||
829 | ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", | ||
830 | len); | ||
831 | goto bail; | ||
832 | } | ||
833 | if (dd->ipath_flags & IPATH_4BYTE_TID) { | ||
834 | u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4); | ||
835 | if (una) | ||
836 | skb_reserve(skb, 4 + (1 << 11) - una); | ||
837 | else | ||
838 | skb_reserve(skb, 4); | ||
839 | } else | ||
840 | skb_reserve(skb, 4); | ||
841 | |||
842 | bail: | ||
843 | return skb; | ||
844 | } | ||
845 | |||
846 | /** | ||
847 | * ipath_rcv_layer - receive a packet for the layered (ethernet) driver | ||
848 | * @dd: the infinipath device | ||
849 | * @etail: the sk_buff number | ||
850 | * @tlen: the total packet length | ||
851 | * @hdr: the ethernet header | ||
852 | * | ||
853 | * Separate routine for better overall optimization | ||
854 | */ | ||
855 | static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail, | ||
856 | u32 tlen, struct ether_header *hdr) | ||
857 | { | ||
858 | u32 elen; | ||
859 | u8 pad, *bthbytes; | ||
860 | struct sk_buff *skb, *nskb; | ||
861 | |||
862 | if (dd->ipath_port0_skbs && hdr->sub_opcode == OPCODE_ENCAP) { | ||
863 | /* | ||
864 | * Allocate a new sk_buff to replace the one we give | ||
865 | * to the network stack. | ||
866 | */ | ||
867 | nskb = ipath_alloc_skb(dd, GFP_ATOMIC); | ||
868 | if (!nskb) { | ||
869 | /* count OK packets that we drop */ | ||
870 | ipath_stats.sps_krdrops++; | ||
871 | return; | ||
872 | } | ||
873 | |||
874 | bthbytes = (u8 *) hdr->bth; | ||
875 | pad = (bthbytes[1] >> 4) & 3; | ||
876 | /* +CRC32 */ | ||
877 | elen = tlen - (sizeof(*hdr) + pad + sizeof(u32)); | ||
878 | |||
879 | skb = dd->ipath_port0_skbs[etail]; | ||
880 | dd->ipath_port0_skbs[etail] = nskb; | ||
881 | skb_put(skb, elen); | ||
882 | |||
883 | dd->ipath_f_put_tid(dd, etail + (u64 __iomem *) | ||
884 | ((char __iomem *) dd->ipath_kregbase | ||
885 | + dd->ipath_rcvegrbase), 0, | ||
886 | virt_to_phys(nskb->data)); | ||
887 | |||
888 | __ipath_layer_rcv(dd, hdr, skb); | ||
889 | |||
890 | /* another ether packet received */ | ||
891 | ipath_stats.sps_ether_rpkts++; | ||
892 | } | ||
893 | else if (hdr->sub_opcode == OPCODE_LID_ARP) | ||
894 | __ipath_layer_rcv_lid(dd, hdr); | ||
895 | } | ||
896 | |||
897 | /* | ||
898 | * ipath_kreceive - receive a packet | ||
899 | * @dd: the infinipath device | ||
900 | * | ||
901 | * called from interrupt handler for errors or receive interrupt | ||
902 | */ | ||
903 | void ipath_kreceive(struct ipath_devdata *dd) | ||
904 | { | ||
905 | u64 *rc; | ||
906 | void *ebuf; | ||
907 | const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ | ||
908 | const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ | ||
909 | u32 etail = -1, l, hdrqtail; | ||
910 | struct ips_message_header *hdr; | ||
911 | u32 eflags, i, etype, tlen, pkttot = 0; | ||
912 | static u64 totcalls; /* stats, may eventually remove */ | ||
913 | char emsg[128]; | ||
914 | |||
915 | if (!dd->ipath_hdrqtailptr) { | ||
916 | ipath_dev_err(dd, | ||
917 | "hdrqtailptr not set, can't do receives\n"); | ||
918 | goto bail; | ||
919 | } | ||
920 | |||
921 | /* There is already a thread processing this queue. */ | ||
922 | if (test_and_set_bit(0, &dd->ipath_rcv_pending)) | ||
923 | goto bail; | ||
924 | |||
925 | if (dd->ipath_port0head == | ||
926 | (u32)le64_to_cpu(*dd->ipath_hdrqtailptr)) | ||
927 | goto done; | ||
928 | |||
929 | gotmore: | ||
930 | /* | ||
931 | * read only once at start. If in flood situation, this helps | ||
932 | * performance slightly. If more arrive while we are processing, | ||
933 | * we'll come back here and do them | ||
934 | */ | ||
935 | hdrqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr); | ||
936 | |||
937 | for (i = 0, l = dd->ipath_port0head; l != hdrqtail; i++) { | ||
938 | u32 qp; | ||
939 | u8 *bthbytes; | ||
940 | |||
941 | rc = (u64 *) (dd->ipath_pd[0]->port_rcvhdrq + (l << 2)); | ||
942 | hdr = (struct ips_message_header *)&rc[1]; | ||
943 | /* | ||
944 | * could make a network order version of IPATH_KD_QP, and | ||
945 | * do the obvious shift before masking to speed this up. | ||
946 | */ | ||
947 | qp = ntohl(hdr->bth[1]) & 0xffffff; | ||
948 | bthbytes = (u8 *) hdr->bth; | ||
949 | |||
950 | eflags = ips_get_hdr_err_flags((__le32 *) rc); | ||
951 | etype = ips_get_rcv_type((__le32 *) rc); | ||
952 | /* total length */ | ||
953 | tlen = ips_get_length_in_bytes((__le32 *) rc); | ||
954 | ebuf = NULL; | ||
955 | if (etype != RCVHQ_RCV_TYPE_EXPECTED) { | ||
956 | /* | ||
957 | * it turns out that the chips uses an eager buffer | ||
958 | * for all non-expected packets, whether it "needs" | ||
959 | * one or not. So always get the index, but don't | ||
960 | * set ebuf (so we try to copy data) unless the | ||
961 | * length requires it. | ||
962 | */ | ||
963 | etail = ips_get_index((__le32 *) rc); | ||
964 | if (tlen > sizeof(*hdr) || | ||
965 | etype == RCVHQ_RCV_TYPE_NON_KD) | ||
966 | ebuf = ipath_get_egrbuf(dd, etail, 0); | ||
967 | } | ||
968 | |||
969 | /* | ||
970 | * both tiderr and ipathhdrerr are set for all plain IB | ||
971 | * packets; only ipathhdrerr should be set. | ||
972 | */ | ||
973 | |||
974 | if (etype != RCVHQ_RCV_TYPE_NON_KD && etype != | ||
975 | RCVHQ_RCV_TYPE_ERROR && ips_get_ipath_ver( | ||
976 | hdr->iph.ver_port_tid_offset) != | ||
977 | IPS_PROTO_VERSION) { | ||
978 | ipath_cdbg(PKT, "Bad InfiniPath protocol version " | ||
979 | "%x\n", etype); | ||
980 | } | ||
981 | |||
982 | if (eflags & ~(INFINIPATH_RHF_H_TIDERR | | ||
983 | INFINIPATH_RHF_H_IHDRERR)) { | ||
984 | get_rhf_errstring(eflags, emsg, sizeof emsg); | ||
985 | ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " | ||
986 | "tlen=%x opcode=%x egridx=%x: %s\n", | ||
987 | eflags, l, etype, tlen, bthbytes[0], | ||
988 | ips_get_index((__le32 *) rc), emsg); | ||
989 | } else if (etype == RCVHQ_RCV_TYPE_NON_KD) { | ||
990 | int ret = __ipath_verbs_rcv(dd, rc + 1, | ||
991 | ebuf, tlen); | ||
992 | if (ret == -ENODEV) | ||
993 | ipath_cdbg(VERBOSE, | ||
994 | "received IB packet, " | ||
995 | "not SMA (QP=%x)\n", qp); | ||
996 | } else if (etype == RCVHQ_RCV_TYPE_EAGER) { | ||
997 | if (qp == IPATH_KD_QP && | ||
998 | bthbytes[0] == ipath_layer_rcv_opcode && | ||
999 | ebuf) | ||
1000 | ipath_rcv_layer(dd, etail, tlen, | ||
1001 | (struct ether_header *)hdr); | ||
1002 | else | ||
1003 | ipath_cdbg(PKT, "typ %x, opcode %x (eager, " | ||
1004 | "qp=%x), len %x; ignored\n", | ||
1005 | etype, bthbytes[0], qp, tlen); | ||
1006 | } | ||
1007 | else if (etype == RCVHQ_RCV_TYPE_EXPECTED) | ||
1008 | ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", | ||
1009 | be32_to_cpu(hdr->bth[0]) & 0xff); | ||
1010 | else if (eflags & (INFINIPATH_RHF_H_TIDERR | | ||
1011 | INFINIPATH_RHF_H_IHDRERR)) { | ||
1012 | /* | ||
1013 | * This is a type 3 packet, only the LRH is in the | ||
1014 | * rcvhdrq, the rest of the header is in the eager | ||
1015 | * buffer. | ||
1016 | */ | ||
1017 | u8 opcode; | ||
1018 | if (ebuf) { | ||
1019 | bthbytes = (u8 *) ebuf; | ||
1020 | opcode = *bthbytes; | ||
1021 | } | ||
1022 | else | ||
1023 | opcode = 0; | ||
1024 | get_rhf_errstring(eflags, emsg, sizeof emsg); | ||
1025 | ipath_dbg("Err %x (%s), opcode %x, egrbuf %x, " | ||
1026 | "len %x\n", eflags, emsg, opcode, etail, | ||
1027 | tlen); | ||
1028 | } else { | ||
1029 | /* | ||
1030 | * error packet, type of error unknown. | ||
1031 | * Probably type 3, but we don't know, so don't | ||
1032 | * even try to print the opcode, etc. | ||
1033 | */ | ||
1034 | ipath_dbg("Error Pkt, but no eflags! egrbuf %x, " | ||
1035 | "len %x\nhdrq@%lx;hdrq+%x rhf: %llx; " | ||
1036 | "hdr %llx %llx %llx %llx %llx\n", | ||
1037 | etail, tlen, (unsigned long) rc, l, | ||
1038 | (unsigned long long) rc[0], | ||
1039 | (unsigned long long) rc[1], | ||
1040 | (unsigned long long) rc[2], | ||
1041 | (unsigned long long) rc[3], | ||
1042 | (unsigned long long) rc[4], | ||
1043 | (unsigned long long) rc[5]); | ||
1044 | } | ||
1045 | l += rsize; | ||
1046 | if (l >= maxcnt) | ||
1047 | l = 0; | ||
1048 | /* | ||
1049 | * update for each packet, to help prevent overflows if we | ||
1050 | * have lots of packets. | ||
1051 | */ | ||
1052 | (void)ipath_write_ureg(dd, ur_rcvhdrhead, | ||
1053 | dd->ipath_rhdrhead_intr_off | l, 0); | ||
1054 | if (etype != RCVHQ_RCV_TYPE_EXPECTED) | ||
1055 | (void)ipath_write_ureg(dd, ur_rcvegrindexhead, | ||
1056 | etail, 0); | ||
1057 | } | ||
1058 | |||
1059 | pkttot += i; | ||
1060 | |||
1061 | dd->ipath_port0head = l; | ||
1062 | |||
1063 | if (hdrqtail != (u32)le64_to_cpu(*dd->ipath_hdrqtailptr)) | ||
1064 | /* more arrived while we handled first batch */ | ||
1065 | goto gotmore; | ||
1066 | |||
1067 | if (pkttot > ipath_stats.sps_maxpkts_call) | ||
1068 | ipath_stats.sps_maxpkts_call = pkttot; | ||
1069 | ipath_stats.sps_port0pkts += pkttot; | ||
1070 | ipath_stats.sps_avgpkts_call = | ||
1071 | ipath_stats.sps_port0pkts / ++totcalls; | ||
1072 | |||
1073 | done: | ||
1074 | clear_bit(0, &dd->ipath_rcv_pending); | ||
1075 | smp_mb__after_clear_bit(); | ||
1076 | |||
1077 | bail:; | ||
1078 | } | ||
1079 | |||
1080 | /** | ||
1081 | * ipath_update_pio_bufs - update shadow copy of the PIO availability map | ||
1082 | * @dd: the infinipath device | ||
1083 | * | ||
1084 | * called whenever our local copy indicates we have run out of send buffers | ||
1085 | * NOTE: This can be called from interrupt context by some code | ||
1086 | * and from non-interrupt context by ipath_getpiobuf(). | ||
1087 | */ | ||
1088 | |||
1089 | static void ipath_update_pio_bufs(struct ipath_devdata *dd) | ||
1090 | { | ||
1091 | unsigned long flags; | ||
1092 | int i; | ||
1093 | const unsigned piobregs = (unsigned)dd->ipath_pioavregs; | ||
1094 | |||
1095 | /* If the generation (check) bits have changed, then we update the | ||
1096 | * busy bit for the corresponding PIO buffer. This algorithm will | ||
1097 | * modify positions to the value they already have in some cases | ||
1098 | * (i.e., no change), but it's faster than changing only the bits | ||
1099 | * that have changed. | ||
1100 | * | ||
1101 | * We would like to do this atomicly, to avoid spinlocks in the | ||
1102 | * critical send path, but that's not really possible, given the | ||
1103 | * type of changes, and that this routine could be called on | ||
1104 | * multiple cpu's simultaneously, so we lock in this routine only, | ||
1105 | * to avoid conflicting updates; all we change is the shadow, and | ||
1106 | * it's a single 64 bit memory location, so by definition the update | ||
1107 | * is atomic in terms of what other cpu's can see in testing the | ||
1108 | * bits. The spin_lock overhead isn't too bad, since it only | ||
1109 | * happens when all buffers are in use, so only cpu overhead, not | ||
1110 | * latency or bandwidth is affected. | ||
1111 | */ | ||
1112 | #define _IPATH_ALL_CHECKBITS 0x5555555555555555ULL | ||
1113 | if (!dd->ipath_pioavailregs_dma) { | ||
1114 | ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n"); | ||
1115 | return; | ||
1116 | } | ||
1117 | if (ipath_debug & __IPATH_VERBDBG) { | ||
1118 | /* only if packet debug and verbose */ | ||
1119 | volatile __le64 *dma = dd->ipath_pioavailregs_dma; | ||
1120 | unsigned long *shadow = dd->ipath_pioavailshadow; | ||
1121 | |||
1122 | ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, " | ||
1123 | "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx " | ||
1124 | "s3=%lx\n", | ||
1125 | (unsigned long long) le64_to_cpu(dma[0]), | ||
1126 | shadow[0], | ||
1127 | (unsigned long long) le64_to_cpu(dma[1]), | ||
1128 | shadow[1], | ||
1129 | (unsigned long long) le64_to_cpu(dma[2]), | ||
1130 | shadow[2], | ||
1131 | (unsigned long long) le64_to_cpu(dma[3]), | ||
1132 | shadow[3]); | ||
1133 | if (piobregs > 4) | ||
1134 | ipath_cdbg( | ||
1135 | PKT, "2nd group, dma4=%llx shad4=%lx, " | ||
1136 | "d5=%llx s5=%lx, d6=%llx s6=%lx, " | ||
1137 | "d7=%llx s7=%lx\n", | ||
1138 | (unsigned long long) le64_to_cpu(dma[4]), | ||
1139 | shadow[4], | ||
1140 | (unsigned long long) le64_to_cpu(dma[5]), | ||
1141 | shadow[5], | ||
1142 | (unsigned long long) le64_to_cpu(dma[6]), | ||
1143 | shadow[6], | ||
1144 | (unsigned long long) le64_to_cpu(dma[7]), | ||
1145 | shadow[7]); | ||
1146 | } | ||
1147 | spin_lock_irqsave(&ipath_pioavail_lock, flags); | ||
1148 | for (i = 0; i < piobregs; i++) { | ||
1149 | u64 pchbusy, pchg, piov, pnew; | ||
1150 | /* | ||
1151 | * Chip Errata: bug 6641; even and odd qwords>3 are swapped | ||
1152 | */ | ||
1153 | if (i > 3) { | ||
1154 | if (i & 1) | ||
1155 | piov = le64_to_cpu( | ||
1156 | dd->ipath_pioavailregs_dma[i - 1]); | ||
1157 | else | ||
1158 | piov = le64_to_cpu( | ||
1159 | dd->ipath_pioavailregs_dma[i + 1]); | ||
1160 | } else | ||
1161 | piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); | ||
1162 | pchg = _IPATH_ALL_CHECKBITS & | ||
1163 | ~(dd->ipath_pioavailshadow[i] ^ piov); | ||
1164 | pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT; | ||
1165 | if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) { | ||
1166 | pnew = dd->ipath_pioavailshadow[i] & ~pchbusy; | ||
1167 | pnew |= piov & pchbusy; | ||
1168 | dd->ipath_pioavailshadow[i] = pnew; | ||
1169 | } | ||
1170 | } | ||
1171 | spin_unlock_irqrestore(&ipath_pioavail_lock, flags); | ||
1172 | } | ||
1173 | |||
1174 | /** | ||
1175 | * ipath_setrcvhdrsize - set the receive header size | ||
1176 | * @dd: the infinipath device | ||
1177 | * @rhdrsize: the receive header size | ||
1178 | * | ||
1179 | * called from user init code, and also layered driver init | ||
1180 | */ | ||
1181 | int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) | ||
1182 | { | ||
1183 | int ret = 0; | ||
1184 | |||
1185 | if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) { | ||
1186 | if (dd->ipath_rcvhdrsize != rhdrsize) { | ||
1187 | dev_info(&dd->pcidev->dev, | ||
1188 | "Error: can't set protocol header " | ||
1189 | "size %u, already %u\n", | ||
1190 | rhdrsize, dd->ipath_rcvhdrsize); | ||
1191 | ret = -EAGAIN; | ||
1192 | } else | ||
1193 | ipath_cdbg(VERBOSE, "Reuse same protocol header " | ||
1194 | "size %u\n", dd->ipath_rcvhdrsize); | ||
1195 | } else if (rhdrsize > (dd->ipath_rcvhdrentsize - | ||
1196 | (sizeof(u64) / sizeof(u32)))) { | ||
1197 | ipath_dbg("Error: can't set protocol header size %u " | ||
1198 | "(> max %u)\n", rhdrsize, | ||
1199 | dd->ipath_rcvhdrentsize - | ||
1200 | (u32) (sizeof(u64) / sizeof(u32))); | ||
1201 | ret = -EOVERFLOW; | ||
1202 | } else { | ||
1203 | dd->ipath_flags |= IPATH_RCVHDRSZ_SET; | ||
1204 | dd->ipath_rcvhdrsize = rhdrsize; | ||
1205 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, | ||
1206 | dd->ipath_rcvhdrsize); | ||
1207 | ipath_cdbg(VERBOSE, "Set protocol header size to %u\n", | ||
1208 | dd->ipath_rcvhdrsize); | ||
1209 | } | ||
1210 | return ret; | ||
1211 | } | ||
1212 | |||
1213 | /** | ||
1214 | * ipath_getpiobuf - find an available pio buffer | ||
1215 | * @dd: the infinipath device | ||
1216 | * @pbufnum: the buffer number is placed here | ||
1217 | * | ||
1218 | * do appropriate marking as busy, etc. | ||
1219 | * returns buffer number if one found (>=0), negative number is error. | ||
1220 | * Used by ipath_sma_send_pkt and ipath_layer_send | ||
1221 | */ | ||
1222 | u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum) | ||
1223 | { | ||
1224 | int i, j, starti, updated = 0; | ||
1225 | unsigned piobcnt, iter; | ||
1226 | unsigned long flags; | ||
1227 | unsigned long *shadow = dd->ipath_pioavailshadow; | ||
1228 | u32 __iomem *buf; | ||
1229 | |||
1230 | piobcnt = (unsigned)(dd->ipath_piobcnt2k | ||
1231 | + dd->ipath_piobcnt4k); | ||
1232 | starti = dd->ipath_lastport_piobuf; | ||
1233 | iter = piobcnt - starti; | ||
1234 | if (dd->ipath_upd_pio_shadow) { | ||
1235 | /* | ||
1236 | * Minor optimization. If we had no buffers on last call, | ||
1237 | * start out by doing the update; continue and do scan even | ||
1238 | * if no buffers were updated, to be paranoid | ||
1239 | */ | ||
1240 | ipath_update_pio_bufs(dd); | ||
1241 | /* we scanned here, don't do it at end of scan */ | ||
1242 | updated = 1; | ||
1243 | i = starti; | ||
1244 | } else | ||
1245 | i = dd->ipath_lastpioindex; | ||
1246 | |||
1247 | rescan: | ||
1248 | /* | ||
1249 | * while test_and_set_bit() is atomic, we do that and then the | ||
1250 | * change_bit(), and the pair is not. See if this is the cause | ||
1251 | * of the remaining armlaunch errors. | ||
1252 | */ | ||
1253 | spin_lock_irqsave(&ipath_pioavail_lock, flags); | ||
1254 | for (j = 0; j < iter; j++, i++) { | ||
1255 | if (i >= piobcnt) | ||
1256 | i = starti; | ||
1257 | /* | ||
1258 | * To avoid bus lock overhead, we first find a candidate | ||
1259 | * buffer, then do the test and set, and continue if that | ||
1260 | * fails. | ||
1261 | */ | ||
1262 | if (test_bit((2 * i) + 1, shadow) || | ||
1263 | test_and_set_bit((2 * i) + 1, shadow)) | ||
1264 | continue; | ||
1265 | /* flip generation bit */ | ||
1266 | change_bit(2 * i, shadow); | ||
1267 | break; | ||
1268 | } | ||
1269 | spin_unlock_irqrestore(&ipath_pioavail_lock, flags); | ||
1270 | |||
1271 | if (j == iter) { | ||
1272 | volatile __le64 *dma = dd->ipath_pioavailregs_dma; | ||
1273 | |||
1274 | /* | ||
1275 | * first time through; shadow exhausted, but may be real | ||
1276 | * buffers available, so go see; if any updated, rescan | ||
1277 | * (once) | ||
1278 | */ | ||
1279 | if (!updated) { | ||
1280 | ipath_update_pio_bufs(dd); | ||
1281 | updated = 1; | ||
1282 | i = starti; | ||
1283 | goto rescan; | ||
1284 | } | ||
1285 | dd->ipath_upd_pio_shadow = 1; | ||
1286 | /* | ||
1287 | * not atomic, but if we lose one once in a while, that's OK | ||
1288 | */ | ||
1289 | ipath_stats.sps_nopiobufs++; | ||
1290 | if (!(++dd->ipath_consec_nopiobuf % 100000)) { | ||
1291 | ipath_dbg( | ||
1292 | "%u pio sends with no bufavail; dmacopy: " | ||
1293 | "%llx %llx %llx %llx; shadow: " | ||
1294 | "%lx %lx %lx %lx\n", | ||
1295 | dd->ipath_consec_nopiobuf, | ||
1296 | (unsigned long long) le64_to_cpu(dma[0]), | ||
1297 | (unsigned long long) le64_to_cpu(dma[1]), | ||
1298 | (unsigned long long) le64_to_cpu(dma[2]), | ||
1299 | (unsigned long long) le64_to_cpu(dma[3]), | ||
1300 | shadow[0], shadow[1], shadow[2], | ||
1301 | shadow[3]); | ||
1302 | /* | ||
1303 | * 4 buffers per byte, 4 registers above, cover rest | ||
1304 | * below | ||
1305 | */ | ||
1306 | if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > | ||
1307 | (sizeof(shadow[0]) * 4 * 4)) | ||
1308 | ipath_dbg("2nd group: dmacopy: %llx %llx " | ||
1309 | "%llx %llx; shadow: %lx %lx " | ||
1310 | "%lx %lx\n", | ||
1311 | (unsigned long long) | ||
1312 | le64_to_cpu(dma[4]), | ||
1313 | (unsigned long long) | ||
1314 | le64_to_cpu(dma[5]), | ||
1315 | (unsigned long long) | ||
1316 | le64_to_cpu(dma[6]), | ||
1317 | (unsigned long long) | ||
1318 | le64_to_cpu(dma[7]), | ||
1319 | shadow[4], shadow[5], | ||
1320 | shadow[6], shadow[7]); | ||
1321 | } | ||
1322 | buf = NULL; | ||
1323 | goto bail; | ||
1324 | } | ||
1325 | |||
1326 | if (updated) | ||
1327 | /* | ||
1328 | * ran out of bufs, now some (at least this one we just | ||
1329 | * got) are now available, so tell the layered driver. | ||
1330 | */ | ||
1331 | __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE); | ||
1332 | |||
1333 | /* | ||
1334 | * set next starting place. Since it's just an optimization, | ||
1335 | * it doesn't matter who wins on this, so no locking | ||
1336 | */ | ||
1337 | dd->ipath_lastpioindex = i + 1; | ||
1338 | if (dd->ipath_upd_pio_shadow) | ||
1339 | dd->ipath_upd_pio_shadow = 0; | ||
1340 | if (dd->ipath_consec_nopiobuf) | ||
1341 | dd->ipath_consec_nopiobuf = 0; | ||
1342 | if (i < dd->ipath_piobcnt2k) | ||
1343 | buf = (u32 __iomem *) (dd->ipath_pio2kbase + | ||
1344 | i * dd->ipath_palign); | ||
1345 | else | ||
1346 | buf = (u32 __iomem *) | ||
1347 | (dd->ipath_pio4kbase + | ||
1348 | (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign); | ||
1349 | ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n", | ||
1350 | i, (i < dd->ipath_piobcnt2k) ? 2 : 4, buf); | ||
1351 | if (pbufnum) | ||
1352 | *pbufnum = i; | ||
1353 | |||
1354 | bail: | ||
1355 | return buf; | ||
1356 | } | ||
1357 | |||
1358 | /** | ||
1359 | * ipath_create_rcvhdrq - create a receive header queue | ||
1360 | * @dd: the infinipath device | ||
1361 | * @pd: the port data | ||
1362 | * | ||
1363 | * this *must* be physically contiguous memory, and for now, | ||
1364 | * that limits it to what kmalloc can do. | ||
1365 | */ | ||
1366 | int ipath_create_rcvhdrq(struct ipath_devdata *dd, | ||
1367 | struct ipath_portdata *pd) | ||
1368 | { | ||
1369 | int ret = 0, amt; | ||
1370 | |||
1371 | amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * | ||
1372 | sizeof(u32), PAGE_SIZE); | ||
1373 | if (!pd->port_rcvhdrq) { | ||
1374 | /* | ||
1375 | * not using REPEAT isn't viable; at 128KB, we can easily | ||
1376 | * fail this. The problem with REPEAT is we can block here | ||
1377 | * "forever". There isn't an inbetween, unfortunately. We | ||
1378 | * could reduce the risk by never freeing the rcvhdrq except | ||
1379 | * at unload, but even then, the first time a port is used, | ||
1380 | * we could delay for some time... | ||
1381 | */ | ||
1382 | gfp_t gfp_flags = GFP_USER | __GFP_COMP; | ||
1383 | |||
1384 | pd->port_rcvhdrq = dma_alloc_coherent( | ||
1385 | &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys, | ||
1386 | gfp_flags); | ||
1387 | |||
1388 | if (!pd->port_rcvhdrq) { | ||
1389 | ipath_dev_err(dd, "attempt to allocate %d bytes " | ||
1390 | "for port %u rcvhdrq failed\n", | ||
1391 | amt, pd->port_port); | ||
1392 | ret = -ENOMEM; | ||
1393 | goto bail; | ||
1394 | } | ||
1395 | |||
1396 | pd->port_rcvhdrq_size = amt; | ||
1397 | |||
1398 | ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu " | ||
1399 | "for port %u rcvhdr Q\n", | ||
1400 | amt >> PAGE_SHIFT, pd->port_rcvhdrq, | ||
1401 | (unsigned long) pd->port_rcvhdrq_phys, | ||
1402 | (unsigned long) pd->port_rcvhdrq_size, | ||
1403 | pd->port_port); | ||
1404 | } else { | ||
1405 | /* | ||
1406 | * clear for security, sanity, and/or debugging, each | ||
1407 | * time we reuse | ||
1408 | */ | ||
1409 | memset(pd->port_rcvhdrq, 0, amt); | ||
1410 | } | ||
1411 | |||
1412 | /* | ||
1413 | * tell chip each time we init it, even if we are re-using previous | ||
1414 | * memory (we zero it at process close) | ||
1415 | */ | ||
1416 | ipath_cdbg(VERBOSE, "writing port %d rcvhdraddr as %lx\n", | ||
1417 | pd->port_port, (unsigned long) pd->port_rcvhdrq_phys); | ||
1418 | ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, | ||
1419 | pd->port_port, pd->port_rcvhdrq_phys); | ||
1420 | |||
1421 | ret = 0; | ||
1422 | bail: | ||
1423 | return ret; | ||
1424 | } | ||
1425 | |||
1426 | int ipath_waitfor_complete(struct ipath_devdata *dd, ipath_kreg reg_id, | ||
1427 | u64 bits_to_wait_for, u64 * valp) | ||
1428 | { | ||
1429 | unsigned long timeout; | ||
1430 | u64 lastval, val; | ||
1431 | int ret; | ||
1432 | |||
1433 | lastval = ipath_read_kreg64(dd, reg_id); | ||
1434 | /* wait a ridiculously long time */ | ||
1435 | timeout = jiffies + msecs_to_jiffies(5); | ||
1436 | do { | ||
1437 | val = ipath_read_kreg64(dd, reg_id); | ||
1438 | /* set so they have something, even on failures. */ | ||
1439 | *valp = val; | ||
1440 | if ((val & bits_to_wait_for) == bits_to_wait_for) { | ||
1441 | ret = 0; | ||
1442 | break; | ||
1443 | } | ||
1444 | if (val != lastval) | ||
1445 | ipath_cdbg(VERBOSE, "Changed from %llx to %llx, " | ||
1446 | "waiting for %llx bits\n", | ||
1447 | (unsigned long long) lastval, | ||
1448 | (unsigned long long) val, | ||
1449 | (unsigned long long) bits_to_wait_for); | ||
1450 | cond_resched(); | ||
1451 | if (time_after(jiffies, timeout)) { | ||
1452 | ipath_dbg("Didn't get bits %llx in register 0x%x, " | ||
1453 | "got %llx\n", | ||
1454 | (unsigned long long) bits_to_wait_for, | ||
1455 | reg_id, (unsigned long long) *valp); | ||
1456 | ret = -ENODEV; | ||
1457 | break; | ||
1458 | } | ||
1459 | } while (1); | ||
1460 | |||
1461 | return ret; | ||
1462 | } | ||
1463 | |||
1464 | /** | ||
1465 | * ipath_waitfor_mdio_cmdready - wait for last command to complete | ||
1466 | * @dd: the infinipath device | ||
1467 | * | ||
1468 | * Like ipath_waitfor_complete(), but we wait for the CMDVALID bit to go | ||
1469 | * away indicating the last command has completed. It doesn't return data | ||
1470 | */ | ||
1471 | int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd) | ||
1472 | { | ||
1473 | unsigned long timeout; | ||
1474 | u64 val; | ||
1475 | int ret; | ||
1476 | |||
1477 | /* wait a ridiculously long time */ | ||
1478 | timeout = jiffies + msecs_to_jiffies(5); | ||
1479 | do { | ||
1480 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_mdio); | ||
1481 | if (!(val & IPATH_MDIO_CMDVALID)) { | ||
1482 | ret = 0; | ||
1483 | break; | ||
1484 | } | ||
1485 | cond_resched(); | ||
1486 | if (time_after(jiffies, timeout)) { | ||
1487 | ipath_dbg("CMDVALID stuck in mdio reg? (%llx)\n", | ||
1488 | (unsigned long long) val); | ||
1489 | ret = -ENODEV; | ||
1490 | break; | ||
1491 | } | ||
1492 | } while (1); | ||
1493 | |||
1494 | return ret; | ||
1495 | } | ||
1496 | |||
1497 | void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) | ||
1498 | { | ||
1499 | static const char *what[4] = { | ||
1500 | [0] = "DOWN", | ||
1501 | [INFINIPATH_IBCC_LINKCMD_INIT] = "INIT", | ||
1502 | [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", | ||
1503 | [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" | ||
1504 | }; | ||
1505 | ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate " | ||
1506 | "is %s\n", dd->ipath_unit, | ||
1507 | what[(which >> INFINIPATH_IBCC_LINKCMD_SHIFT) & | ||
1508 | INFINIPATH_IBCC_LINKCMD_MASK], | ||
1509 | ipath_ibcstatus_str[ | ||
1510 | (ipath_read_kreg64 | ||
1511 | (dd, dd->ipath_kregs->kr_ibcstatus) >> | ||
1512 | INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & | ||
1513 | INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); | ||
1514 | |||
1515 | ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, | ||
1516 | dd->ipath_ibcctrl | which); | ||
1517 | } | ||
1518 | |||
1519 | /** | ||
1520 | * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register | ||
1521 | * @dd: the infinipath device | ||
1522 | * @regno: the register number to read | ||
1523 | * @port: the port containing the register | ||
1524 | * | ||
1525 | * Registers that vary with the chip implementation constants (port) | ||
1526 | * use this routine. | ||
1527 | */ | ||
1528 | u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno, | ||
1529 | unsigned port) | ||
1530 | { | ||
1531 | u16 where; | ||
1532 | |||
1533 | if (port < dd->ipath_portcnt && | ||
1534 | (regno == dd->ipath_kregs->kr_rcvhdraddr || | ||
1535 | regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) | ||
1536 | where = regno + port; | ||
1537 | else | ||
1538 | where = -1; | ||
1539 | |||
1540 | return ipath_read_kreg64(dd, where); | ||
1541 | } | ||
1542 | |||
1543 | /** | ||
1544 | * ipath_write_kreg_port - write a device's per-port 64-bit kernel register | ||
1545 | * @dd: the infinipath device | ||
1546 | * @regno: the register number to write | ||
1547 | * @port: the port containing the register | ||
1548 | * @value: the value to write | ||
1549 | * | ||
1550 | * Registers that vary with the chip implementation constants (port) | ||
1551 | * use this routine. | ||
1552 | */ | ||
1553 | void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, | ||
1554 | unsigned port, u64 value) | ||
1555 | { | ||
1556 | u16 where; | ||
1557 | |||
1558 | if (port < dd->ipath_portcnt && | ||
1559 | (regno == dd->ipath_kregs->kr_rcvhdraddr || | ||
1560 | regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) | ||
1561 | where = regno + port; | ||
1562 | else | ||
1563 | where = -1; | ||
1564 | |||
1565 | ipath_write_kreg(dd, where, value); | ||
1566 | } | ||
1567 | |||
1568 | /** | ||
1569 | * ipath_shutdown_device - shut down a device | ||
1570 | * @dd: the infinipath device | ||
1571 | * | ||
1572 | * This is called to make the device quiet when we are about to | ||
1573 | * unload the driver, and also when the device is administratively | ||
1574 | * disabled. It does not free any data structures. | ||
1575 | * Everything it does has to be setup again by ipath_init_chip(dd,1) | ||
1576 | */ | ||
1577 | void ipath_shutdown_device(struct ipath_devdata *dd) | ||
1578 | { | ||
1579 | u64 val; | ||
1580 | |||
1581 | ipath_dbg("Shutting down the device\n"); | ||
1582 | |||
1583 | dd->ipath_flags |= IPATH_LINKUNK; | ||
1584 | dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | | ||
1585 | IPATH_LINKINIT | IPATH_LINKARMED | | ||
1586 | IPATH_LINKACTIVE); | ||
1587 | *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF | | ||
1588 | IPATH_STATUS_IB_READY); | ||
1589 | |||
1590 | /* mask interrupts, but not errors */ | ||
1591 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); | ||
1592 | |||
1593 | dd->ipath_rcvctrl = 0; | ||
1594 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, | ||
1595 | dd->ipath_rcvctrl); | ||
1596 | |||
1597 | /* | ||
1598 | * gracefully stop all sends allowing any in progress to trickle out | ||
1599 | * first. | ||
1600 | */ | ||
1601 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL); | ||
1602 | /* flush it */ | ||
1603 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); | ||
1604 | /* | ||
1605 | * enough for anything that's going to trickle out to have actually | ||
1606 | * done so. | ||
1607 | */ | ||
1608 | udelay(5); | ||
1609 | |||
1610 | /* | ||
1611 | * abort any armed or launched PIO buffers that didn't go. (self | ||
1612 | * clearing). Will cause any packet currently being transmitted to | ||
1613 | * go out with an EBP, and may also cause a short packet error on | ||
1614 | * the receiver. | ||
1615 | */ | ||
1616 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
1617 | INFINIPATH_S_ABORT); | ||
1618 | |||
1619 | ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE << | ||
1620 | INFINIPATH_IBCC_LINKINITCMD_SHIFT); | ||
1621 | |||
1622 | /* | ||
1623 | * we are shutting down, so tell the layered driver. We don't do | ||
1624 | * this on just a link state change, much like ethernet, a cable | ||
1625 | * unplug, etc. doesn't change driver state | ||
1626 | */ | ||
1627 | ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN); | ||
1628 | |||
1629 | /* disable IBC */ | ||
1630 | dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; | ||
1631 | ipath_write_kreg(dd, dd->ipath_kregs->kr_control, | ||
1632 | dd->ipath_control); | ||
1633 | |||
1634 | /* | ||
1635 | * clear SerdesEnable and turn the leds off; do this here because | ||
1636 | * we are unloading, so don't count on interrupts to move along | ||
1637 | * Turn the LEDs off explictly for the same reason. | ||
1638 | */ | ||
1639 | dd->ipath_f_quiet_serdes(dd); | ||
1640 | dd->ipath_f_setextled(dd, 0, 0); | ||
1641 | |||
1642 | if (dd->ipath_stats_timer_active) { | ||
1643 | del_timer_sync(&dd->ipath_stats_timer); | ||
1644 | dd->ipath_stats_timer_active = 0; | ||
1645 | } | ||
1646 | |||
1647 | /* | ||
1648 | * clear all interrupts and errors, so that the next time the driver | ||
1649 | * is loaded or device is enabled, we know that whatever is set | ||
1650 | * happened while we were unloaded | ||
1651 | */ | ||
1652 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, | ||
1653 | ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); | ||
1654 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); | ||
1655 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); | ||
1656 | } | ||
1657 | |||
1658 | /** | ||
1659 | * ipath_free_pddata - free a port's allocated data | ||
1660 | * @dd: the infinipath device | ||
1661 | * @port: the port | ||
1662 | * @freehdrq: free the port data structure if true | ||
1663 | * | ||
1664 | * when closing, free up any allocated data for a port, if the | ||
1665 | * reference count goes to zero | ||
1666 | * Note: this also optionally frees the portdata itself! | ||
1667 | * Any changes here have to be matched up with the reinit case | ||
1668 | * of ipath_init_chip(), which calls this routine on reinit after reset. | ||
1669 | */ | ||
1670 | void ipath_free_pddata(struct ipath_devdata *dd, u32 port, int freehdrq) | ||
1671 | { | ||
1672 | struct ipath_portdata *pd = dd->ipath_pd[port]; | ||
1673 | |||
1674 | if (!pd) | ||
1675 | return; | ||
1676 | if (freehdrq) | ||
1677 | /* | ||
1678 | * only clear and free portdata if we are going to also | ||
1679 | * release the hdrq, otherwise we leak the hdrq on each | ||
1680 | * open/close cycle | ||
1681 | */ | ||
1682 | dd->ipath_pd[port] = NULL; | ||
1683 | if (freehdrq && pd->port_rcvhdrq) { | ||
1684 | ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p " | ||
1685 | "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq, | ||
1686 | (unsigned long) pd->port_rcvhdrq_size); | ||
1687 | dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size, | ||
1688 | pd->port_rcvhdrq, pd->port_rcvhdrq_phys); | ||
1689 | pd->port_rcvhdrq = NULL; | ||
1690 | } | ||
1691 | if (port && pd->port_rcvegrbuf) { | ||
1692 | /* always free this */ | ||
1693 | if (pd->port_rcvegrbuf) { | ||
1694 | unsigned e; | ||
1695 | |||
1696 | for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { | ||
1697 | void *base = pd->port_rcvegrbuf[e]; | ||
1698 | size_t size = pd->port_rcvegrbuf_size; | ||
1699 | |||
1700 | ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), " | ||
1701 | "chunk %u/%u\n", base, | ||
1702 | (unsigned long) size, | ||
1703 | e, pd->port_rcvegrbuf_chunks); | ||
1704 | dma_free_coherent( | ||
1705 | &dd->pcidev->dev, size, base, | ||
1706 | pd->port_rcvegrbuf_phys[e]); | ||
1707 | } | ||
1708 | vfree(pd->port_rcvegrbuf); | ||
1709 | pd->port_rcvegrbuf = NULL; | ||
1710 | vfree(pd->port_rcvegrbuf_phys); | ||
1711 | pd->port_rcvegrbuf_phys = NULL; | ||
1712 | } | ||
1713 | pd->port_rcvegrbuf_chunks = 0; | ||
1714 | } else if (port == 0 && dd->ipath_port0_skbs) { | ||
1715 | unsigned e; | ||
1716 | struct sk_buff **skbs = dd->ipath_port0_skbs; | ||
1717 | |||
1718 | dd->ipath_port0_skbs = NULL; | ||
1719 | ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs " | ||
1720 | "@ %p\n", pd->port_port, skbs); | ||
1721 | for (e = 0; e < dd->ipath_rcvegrcnt; e++) | ||
1722 | if (skbs[e]) | ||
1723 | dev_kfree_skb(skbs[e]); | ||
1724 | vfree(skbs); | ||
1725 | } | ||
1726 | if (freehdrq) { | ||
1727 | kfree(pd->port_tid_pg_list); | ||
1728 | kfree(pd); | ||
1729 | } | ||
1730 | } | ||
1731 | |||
1732 | static int __init infinipath_init(void) | ||
1733 | { | ||
1734 | int ret; | ||
1735 | |||
1736 | ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version); | ||
1737 | |||
1738 | /* | ||
1739 | * These must be called before the driver is registered with | ||
1740 | * the PCI subsystem. | ||
1741 | */ | ||
1742 | idr_init(&unit_table); | ||
1743 | if (!idr_pre_get(&unit_table, GFP_KERNEL)) { | ||
1744 | ret = -ENOMEM; | ||
1745 | goto bail; | ||
1746 | } | ||
1747 | |||
1748 | ret = pci_register_driver(&ipath_driver); | ||
1749 | if (ret < 0) { | ||
1750 | printk(KERN_ERR IPATH_DRV_NAME | ||
1751 | ": Unable to register driver: error %d\n", -ret); | ||
1752 | goto bail_unit; | ||
1753 | } | ||
1754 | |||
1755 | ret = ipath_driver_create_group(&ipath_driver.driver); | ||
1756 | if (ret < 0) { | ||
1757 | printk(KERN_ERR IPATH_DRV_NAME ": Unable to create driver " | ||
1758 | "sysfs entries: error %d\n", -ret); | ||
1759 | goto bail_pci; | ||
1760 | } | ||
1761 | |||
1762 | ret = ipath_init_ipathfs(); | ||
1763 | if (ret < 0) { | ||
1764 | printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " | ||
1765 | "ipathfs: error %d\n", -ret); | ||
1766 | goto bail_group; | ||
1767 | } | ||
1768 | |||
1769 | goto bail; | ||
1770 | |||
1771 | bail_group: | ||
1772 | ipath_driver_remove_group(&ipath_driver.driver); | ||
1773 | |||
1774 | bail_pci: | ||
1775 | pci_unregister_driver(&ipath_driver); | ||
1776 | |||
1777 | bail_unit: | ||
1778 | idr_destroy(&unit_table); | ||
1779 | |||
1780 | bail: | ||
1781 | return ret; | ||
1782 | } | ||
1783 | |||
1784 | static void cleanup_device(struct ipath_devdata *dd) | ||
1785 | { | ||
1786 | int port; | ||
1787 | |||
1788 | ipath_shutdown_device(dd); | ||
1789 | |||
1790 | if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { | ||
1791 | /* can't do anything more with chip; needs re-init */ | ||
1792 | *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; | ||
1793 | if (dd->ipath_kregbase) { | ||
1794 | /* | ||
1795 | * if we haven't already cleaned up before these are | ||
1796 | * to ensure any register reads/writes "fail" until | ||
1797 | * re-init | ||
1798 | */ | ||
1799 | dd->ipath_kregbase = NULL; | ||
1800 | dd->ipath_kregvirt = NULL; | ||
1801 | dd->ipath_uregbase = 0; | ||
1802 | dd->ipath_sregbase = 0; | ||
1803 | dd->ipath_cregbase = 0; | ||
1804 | dd->ipath_kregsize = 0; | ||
1805 | } | ||
1806 | ipath_disable_wc(dd); | ||
1807 | } | ||
1808 | |||
1809 | if (dd->ipath_pioavailregs_dma) { | ||
1810 | dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, | ||
1811 | (void *) dd->ipath_pioavailregs_dma, | ||
1812 | dd->ipath_pioavailregs_phys); | ||
1813 | dd->ipath_pioavailregs_dma = NULL; | ||
1814 | } | ||
1815 | |||
1816 | if (dd->ipath_pageshadow) { | ||
1817 | struct page **tmpp = dd->ipath_pageshadow; | ||
1818 | int i, cnt = 0; | ||
1819 | |||
1820 | ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " | ||
1821 | "locked\n"); | ||
1822 | for (port = 0; port < dd->ipath_cfgports; port++) { | ||
1823 | int port_tidbase = port * dd->ipath_rcvtidcnt; | ||
1824 | int maxtid = port_tidbase + dd->ipath_rcvtidcnt; | ||
1825 | for (i = port_tidbase; i < maxtid; i++) { | ||
1826 | if (!tmpp[i]) | ||
1827 | continue; | ||
1828 | ipath_release_user_pages(&tmpp[i], 1); | ||
1829 | tmpp[i] = NULL; | ||
1830 | cnt++; | ||
1831 | } | ||
1832 | } | ||
1833 | if (cnt) { | ||
1834 | ipath_stats.sps_pageunlocks += cnt; | ||
1835 | ipath_cdbg(VERBOSE, "There were still %u expTID " | ||
1836 | "entries locked\n", cnt); | ||
1837 | } | ||
1838 | if (ipath_stats.sps_pagelocks || | ||
1839 | ipath_stats.sps_pageunlocks) | ||
1840 | ipath_cdbg(VERBOSE, "%llu pages locked, %llu " | ||
1841 | "unlocked via ipath_m{un}lock\n", | ||
1842 | (unsigned long long) | ||
1843 | ipath_stats.sps_pagelocks, | ||
1844 | (unsigned long long) | ||
1845 | ipath_stats.sps_pageunlocks); | ||
1846 | |||
1847 | ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", | ||
1848 | dd->ipath_pageshadow); | ||
1849 | vfree(dd->ipath_pageshadow); | ||
1850 | dd->ipath_pageshadow = NULL; | ||
1851 | } | ||
1852 | |||
1853 | /* | ||
1854 | * free any resources still in use (usually just kernel ports) | ||
1855 | * at unload | ||
1856 | */ | ||
1857 | for (port = 0; port < dd->ipath_cfgports; port++) | ||
1858 | ipath_free_pddata(dd, port, 1); | ||
1859 | kfree(dd->ipath_pd); | ||
1860 | /* | ||
1861 | * debuggability, in case some cleanup path tries to use it | ||
1862 | * after this | ||
1863 | */ | ||
1864 | dd->ipath_pd = NULL; | ||
1865 | } | ||
1866 | |||
1867 | static void __exit infinipath_cleanup(void) | ||
1868 | { | ||
1869 | struct ipath_devdata *dd, *tmp; | ||
1870 | unsigned long flags; | ||
1871 | |||
1872 | ipath_exit_ipathfs(); | ||
1873 | |||
1874 | ipath_driver_remove_group(&ipath_driver.driver); | ||
1875 | |||
1876 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
1877 | |||
1878 | /* | ||
1879 | * turn off rcv, send, and interrupts for all ports, all drivers | ||
1880 | * should also hard reset the chip here? | ||
1881 | * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs | ||
1882 | * for all versions of the driver, if they were allocated | ||
1883 | */ | ||
1884 | list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { | ||
1885 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
1886 | |||
1887 | if (dd->ipath_kregbase) | ||
1888 | cleanup_device(dd); | ||
1889 | |||
1890 | if (dd->pcidev) { | ||
1891 | if (dd->pcidev->irq) { | ||
1892 | ipath_cdbg(VERBOSE, | ||
1893 | "unit %u free_irq of irq %x\n", | ||
1894 | dd->ipath_unit, dd->pcidev->irq); | ||
1895 | free_irq(dd->pcidev->irq, dd); | ||
1896 | } else | ||
1897 | ipath_dbg("irq is 0, not doing free_irq " | ||
1898 | "for unit %u\n", dd->ipath_unit); | ||
1899 | dd->pcidev = NULL; | ||
1900 | } | ||
1901 | |||
1902 | /* | ||
1903 | * we check for NULL here, because it's outside the kregbase | ||
1904 | * check, and we need to call it after the free_irq. Thus | ||
1905 | * it's possible that the function pointers were never | ||
1906 | * initialized. | ||
1907 | */ | ||
1908 | if (dd->ipath_f_cleanup) | ||
1909 | /* clean up chip-specific stuff */ | ||
1910 | dd->ipath_f_cleanup(dd); | ||
1911 | |||
1912 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
1913 | } | ||
1914 | |||
1915 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
1916 | |||
1917 | ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); | ||
1918 | pci_unregister_driver(&ipath_driver); | ||
1919 | |||
1920 | idr_destroy(&unit_table); | ||
1921 | } | ||
1922 | |||
1923 | /** | ||
1924 | * ipath_reset_device - reset the chip if possible | ||
1925 | * @unit: the device to reset | ||
1926 | * | ||
1927 | * Whether or not reset is successful, we attempt to re-initialize the chip | ||
1928 | * (that is, much like a driver unload/reload). We clear the INITTED flag | ||
1929 | * so that the various entry points will fail until we reinitialize. For | ||
1930 | * now, we only allow this if no user ports are open that use chip resources | ||
1931 | */ | ||
1932 | int ipath_reset_device(int unit) | ||
1933 | { | ||
1934 | int ret, i; | ||
1935 | struct ipath_devdata *dd = ipath_lookup(unit); | ||
1936 | |||
1937 | if (!dd) { | ||
1938 | ret = -ENODEV; | ||
1939 | goto bail; | ||
1940 | } | ||
1941 | |||
1942 | dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); | ||
1943 | |||
1944 | if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { | ||
1945 | dev_info(&dd->pcidev->dev, "Invalid unit number %u or " | ||
1946 | "not initialized or not present\n", unit); | ||
1947 | ret = -ENXIO; | ||
1948 | goto bail; | ||
1949 | } | ||
1950 | |||
1951 | if (dd->ipath_pd) | ||
1952 | for (i = 1; i < dd->ipath_portcnt; i++) { | ||
1953 | if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) { | ||
1954 | ipath_dbg("unit %u port %d is in use " | ||
1955 | "(PID %u cmd %s), can't reset\n", | ||
1956 | unit, i, | ||
1957 | dd->ipath_pd[i]->port_pid, | ||
1958 | dd->ipath_pd[i]->port_comm); | ||
1959 | ret = -EBUSY; | ||
1960 | goto bail; | ||
1961 | } | ||
1962 | } | ||
1963 | |||
1964 | dd->ipath_flags &= ~IPATH_INITTED; | ||
1965 | ret = dd->ipath_f_reset(dd); | ||
1966 | if (ret != 1) | ||
1967 | ipath_dbg("reset was not successful\n"); | ||
1968 | ipath_dbg("Trying to reinitialize unit %u after reset attempt\n", | ||
1969 | unit); | ||
1970 | ret = ipath_init_chip(dd, 1); | ||
1971 | if (ret) | ||
1972 | ipath_dev_err(dd, "Reinitialize unit %u after " | ||
1973 | "reset failed with %d\n", unit, ret); | ||
1974 | else | ||
1975 | dev_info(&dd->pcidev->dev, "Reinitialized unit %u after " | ||
1976 | "resetting\n", unit); | ||
1977 | |||
1978 | bail: | ||
1979 | return ret; | ||
1980 | } | ||
1981 | |||
1982 | module_init(infinipath_init); | ||
1983 | module_exit(infinipath_cleanup); | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c new file mode 100644 index 000000000000..f11a900e8cd7 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c | |||
@@ -0,0 +1,613 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/delay.h> | ||
34 | #include <linux/pci.h> | ||
35 | #include <linux/vmalloc.h> | ||
36 | |||
37 | #include "ipath_kernel.h" | ||
38 | |||
39 | /* | ||
40 | * InfiniPath I2C driver for a serial eeprom. This is not a generic | ||
41 | * I2C interface. For a start, the device we're using (Atmel AT24C11) | ||
42 | * doesn't work like a regular I2C device. It looks like one | ||
43 | * electrically, but not logically. Normal I2C devices have a single | ||
44 | * 7-bit or 10-bit I2C address that they respond to. Valid 7-bit | ||
45 | * addresses range from 0x03 to 0x77. Addresses 0x00 to 0x02 and 0x78 | ||
46 | * to 0x7F are special reserved addresses (e.g. 0x00 is the "general | ||
47 | * call" address.) The Atmel device, on the other hand, responds to ALL | ||
48 | * 7-bit addresses. It's designed to be the only device on a given I2C | ||
49 | * bus. A 7-bit address corresponds to the memory address within the | ||
50 | * Atmel device itself. | ||
51 | * | ||
52 | * Also, the timing requirements mean more than simple software | ||
53 | * bitbanging, with readbacks from chip to ensure timing (simple udelay | ||
54 | * is not enough). | ||
55 | * | ||
56 | * This all means that accessing the device is specialized enough | ||
57 | * that using the standard kernel I2C bitbanging interface would be | ||
58 | * impossible. For example, the core I2C eeprom driver expects to find | ||
59 | * a device at one or more of a limited set of addresses only. It doesn't | ||
60 | * allow writing to an eeprom. It also doesn't provide any means of | ||
61 | * accessing eeprom contents from within the kernel, only via sysfs. | ||
62 | */ | ||
63 | |||
64 | enum i2c_type { | ||
65 | i2c_line_scl = 0, | ||
66 | i2c_line_sda | ||
67 | }; | ||
68 | |||
69 | enum i2c_state { | ||
70 | i2c_line_low = 0, | ||
71 | i2c_line_high | ||
72 | }; | ||
73 | |||
74 | #define READ_CMD 1 | ||
75 | #define WRITE_CMD 0 | ||
76 | |||
77 | static int eeprom_init; | ||
78 | |||
79 | /* | ||
80 | * The gpioval manipulation really should be protected by spinlocks | ||
81 | * or be converted to use atomic operations. | ||
82 | */ | ||
83 | |||
84 | /** | ||
85 | * i2c_gpio_set - set a GPIO line | ||
86 | * @dd: the infinipath device | ||
87 | * @line: the line to set | ||
88 | * @new_line_state: the state to set | ||
89 | * | ||
90 | * Returns 0 if the line was set to the new state successfully, non-zero | ||
91 | * on error. | ||
92 | */ | ||
93 | static int i2c_gpio_set(struct ipath_devdata *dd, | ||
94 | enum i2c_type line, | ||
95 | enum i2c_state new_line_state) | ||
96 | { | ||
97 | u64 read_val, write_val, mask, *gpioval; | ||
98 | |||
99 | gpioval = &dd->ipath_gpio_out; | ||
100 | read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); | ||
101 | if (line == i2c_line_scl) | ||
102 | mask = ipath_gpio_scl; | ||
103 | else | ||
104 | mask = ipath_gpio_sda; | ||
105 | |||
106 | if (new_line_state == i2c_line_high) | ||
107 | /* tri-state the output rather than force high */ | ||
108 | write_val = read_val & ~mask; | ||
109 | else | ||
110 | /* config line to be an output */ | ||
111 | write_val = read_val | mask; | ||
112 | ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); | ||
113 | |||
114 | /* set high and verify */ | ||
115 | if (new_line_state == i2c_line_high) | ||
116 | write_val = 0x1UL; | ||
117 | else | ||
118 | write_val = 0x0UL; | ||
119 | |||
120 | if (line == i2c_line_scl) { | ||
121 | write_val <<= ipath_gpio_scl_num; | ||
122 | *gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num); | ||
123 | *gpioval |= write_val; | ||
124 | } else { | ||
125 | write_val <<= ipath_gpio_sda_num; | ||
126 | *gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num); | ||
127 | *gpioval |= write_val; | ||
128 | } | ||
129 | ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval); | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | /** | ||
135 | * i2c_gpio_get - get a GPIO line state | ||
136 | * @dd: the infinipath device | ||
137 | * @line: the line to get | ||
138 | * @curr_statep: where to put the line state | ||
139 | * | ||
140 | * Returns 0 if the line was set to the new state successfully, non-zero | ||
141 | * on error. curr_state is not set on error. | ||
142 | */ | ||
143 | static int i2c_gpio_get(struct ipath_devdata *dd, | ||
144 | enum i2c_type line, | ||
145 | enum i2c_state *curr_statep) | ||
146 | { | ||
147 | u64 read_val, write_val, mask; | ||
148 | int ret; | ||
149 | |||
150 | /* check args */ | ||
151 | if (curr_statep == NULL) { | ||
152 | ret = 1; | ||
153 | goto bail; | ||
154 | } | ||
155 | |||
156 | read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl); | ||
157 | /* config line to be an input */ | ||
158 | if (line == i2c_line_scl) | ||
159 | mask = ipath_gpio_scl; | ||
160 | else | ||
161 | mask = ipath_gpio_sda; | ||
162 | write_val = read_val & ~mask; | ||
163 | ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val); | ||
164 | read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); | ||
165 | |||
166 | if (read_val & mask) | ||
167 | *curr_statep = i2c_line_high; | ||
168 | else | ||
169 | *curr_statep = i2c_line_low; | ||
170 | |||
171 | ret = 0; | ||
172 | |||
173 | bail: | ||
174 | return ret; | ||
175 | } | ||
176 | |||
177 | /** | ||
178 | * i2c_wait_for_writes - wait for a write | ||
179 | * @dd: the infinipath device | ||
180 | * | ||
181 | * We use this instead of udelay directly, so we can make sure | ||
182 | * that previous register writes have been flushed all the way | ||
183 | * to the chip. Since we are delaying anyway, the cost doesn't | ||
184 | * hurt, and makes the bit twiddling more regular | ||
185 | */ | ||
186 | static void i2c_wait_for_writes(struct ipath_devdata *dd) | ||
187 | { | ||
188 | (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); | ||
189 | } | ||
190 | |||
191 | static void scl_out(struct ipath_devdata *dd, u8 bit) | ||
192 | { | ||
193 | i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low); | ||
194 | |||
195 | i2c_wait_for_writes(dd); | ||
196 | } | ||
197 | |||
198 | static void sda_out(struct ipath_devdata *dd, u8 bit) | ||
199 | { | ||
200 | i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low); | ||
201 | |||
202 | i2c_wait_for_writes(dd); | ||
203 | } | ||
204 | |||
205 | static u8 sda_in(struct ipath_devdata *dd, int wait) | ||
206 | { | ||
207 | enum i2c_state bit; | ||
208 | |||
209 | if (i2c_gpio_get(dd, i2c_line_sda, &bit)) | ||
210 | ipath_dbg("get bit failed!\n"); | ||
211 | |||
212 | if (wait) | ||
213 | i2c_wait_for_writes(dd); | ||
214 | |||
215 | return bit == i2c_line_high ? 1U : 0; | ||
216 | } | ||
217 | |||
218 | /** | ||
219 | * i2c_ackrcv - see if ack following write is true | ||
220 | * @dd: the infinipath device | ||
221 | */ | ||
222 | static int i2c_ackrcv(struct ipath_devdata *dd) | ||
223 | { | ||
224 | u8 ack_received; | ||
225 | |||
226 | /* AT ENTRY SCL = LOW */ | ||
227 | /* change direction, ignore data */ | ||
228 | ack_received = sda_in(dd, 1); | ||
229 | scl_out(dd, i2c_line_high); | ||
230 | ack_received = sda_in(dd, 1) == 0; | ||
231 | scl_out(dd, i2c_line_low); | ||
232 | return ack_received; | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * wr_byte - write a byte, one bit at a time | ||
237 | * @dd: the infinipath device | ||
238 | * @data: the byte to write | ||
239 | * | ||
240 | * Returns 0 if we got the following ack, otherwise 1 | ||
241 | */ | ||
242 | static int wr_byte(struct ipath_devdata *dd, u8 data) | ||
243 | { | ||
244 | int bit_cntr; | ||
245 | u8 bit; | ||
246 | |||
247 | for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) { | ||
248 | bit = (data >> bit_cntr) & 1; | ||
249 | sda_out(dd, bit); | ||
250 | scl_out(dd, i2c_line_high); | ||
251 | scl_out(dd, i2c_line_low); | ||
252 | } | ||
253 | return (!i2c_ackrcv(dd)) ? 1 : 0; | ||
254 | } | ||
255 | |||
256 | static void send_ack(struct ipath_devdata *dd) | ||
257 | { | ||
258 | sda_out(dd, i2c_line_low); | ||
259 | scl_out(dd, i2c_line_high); | ||
260 | scl_out(dd, i2c_line_low); | ||
261 | sda_out(dd, i2c_line_high); | ||
262 | } | ||
263 | |||
264 | /** | ||
265 | * i2c_startcmd - transmit the start condition, followed by address/cmd | ||
266 | * @dd: the infinipath device | ||
267 | * @offset_dir: direction byte | ||
268 | * | ||
269 | * (both clock/data high, clock high, data low while clock is high) | ||
270 | */ | ||
271 | static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir) | ||
272 | { | ||
273 | int res; | ||
274 | |||
275 | /* issue start sequence */ | ||
276 | sda_out(dd, i2c_line_high); | ||
277 | scl_out(dd, i2c_line_high); | ||
278 | sda_out(dd, i2c_line_low); | ||
279 | scl_out(dd, i2c_line_low); | ||
280 | |||
281 | /* issue length and direction byte */ | ||
282 | res = wr_byte(dd, offset_dir); | ||
283 | |||
284 | if (res) | ||
285 | ipath_cdbg(VERBOSE, "No ack to complete start\n"); | ||
286 | |||
287 | return res; | ||
288 | } | ||
289 | |||
290 | /** | ||
291 | * stop_cmd - transmit the stop condition | ||
292 | * @dd: the infinipath device | ||
293 | * | ||
294 | * (both clock/data low, clock high, data high while clock is high) | ||
295 | */ | ||
296 | static void stop_cmd(struct ipath_devdata *dd) | ||
297 | { | ||
298 | scl_out(dd, i2c_line_low); | ||
299 | sda_out(dd, i2c_line_low); | ||
300 | scl_out(dd, i2c_line_high); | ||
301 | sda_out(dd, i2c_line_high); | ||
302 | udelay(2); | ||
303 | } | ||
304 | |||
305 | /** | ||
306 | * eeprom_reset - reset I2C communication | ||
307 | * @dd: the infinipath device | ||
308 | */ | ||
309 | |||
310 | static int eeprom_reset(struct ipath_devdata *dd) | ||
311 | { | ||
312 | int clock_cycles_left = 9; | ||
313 | u64 *gpioval = &dd->ipath_gpio_out; | ||
314 | int ret; | ||
315 | |||
316 | eeprom_init = 1; | ||
317 | *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out); | ||
318 | ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg " | ||
319 | "is %llx\n", (unsigned long long) *gpioval); | ||
320 | |||
321 | /* | ||
322 | * This is to get the i2c into a known state, by first going low, | ||
323 | * then tristate sda (and then tristate scl as first thing | ||
324 | * in loop) | ||
325 | */ | ||
326 | scl_out(dd, i2c_line_low); | ||
327 | sda_out(dd, i2c_line_high); | ||
328 | |||
329 | while (clock_cycles_left--) { | ||
330 | scl_out(dd, i2c_line_high); | ||
331 | |||
332 | if (sda_in(dd, 0)) { | ||
333 | sda_out(dd, i2c_line_low); | ||
334 | scl_out(dd, i2c_line_low); | ||
335 | ret = 0; | ||
336 | goto bail; | ||
337 | } | ||
338 | |||
339 | scl_out(dd, i2c_line_low); | ||
340 | } | ||
341 | |||
342 | ret = 1; | ||
343 | |||
344 | bail: | ||
345 | return ret; | ||
346 | } | ||
347 | |||
348 | /** | ||
349 | * ipath_eeprom_read - receives bytes from the eeprom via I2C | ||
350 | * @dd: the infinipath device | ||
351 | * @eeprom_offset: address to read from | ||
352 | * @buffer: where to store result | ||
353 | * @len: number of bytes to receive | ||
354 | */ | ||
355 | |||
356 | int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, | ||
357 | void *buffer, int len) | ||
358 | { | ||
359 | /* compiler complains unless initialized */ | ||
360 | u8 single_byte = 0; | ||
361 | int bit_cntr; | ||
362 | int ret; | ||
363 | |||
364 | if (!eeprom_init) | ||
365 | eeprom_reset(dd); | ||
366 | |||
367 | eeprom_offset = (eeprom_offset << 1) | READ_CMD; | ||
368 | |||
369 | if (i2c_startcmd(dd, eeprom_offset)) { | ||
370 | ipath_dbg("Failed startcmd\n"); | ||
371 | stop_cmd(dd); | ||
372 | ret = 1; | ||
373 | goto bail; | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * eeprom keeps clocking data out as long as we ack, automatically | ||
378 | * incrementing the address. | ||
379 | */ | ||
380 | while (len-- > 0) { | ||
381 | /* get data */ | ||
382 | single_byte = 0; | ||
383 | for (bit_cntr = 8; bit_cntr; bit_cntr--) { | ||
384 | u8 bit; | ||
385 | scl_out(dd, i2c_line_high); | ||
386 | bit = sda_in(dd, 0); | ||
387 | single_byte |= bit << (bit_cntr - 1); | ||
388 | scl_out(dd, i2c_line_low); | ||
389 | } | ||
390 | |||
391 | /* send ack if not the last byte */ | ||
392 | if (len) | ||
393 | send_ack(dd); | ||
394 | |||
395 | *((u8 *) buffer) = single_byte; | ||
396 | buffer++; | ||
397 | } | ||
398 | |||
399 | stop_cmd(dd); | ||
400 | |||
401 | ret = 0; | ||
402 | |||
403 | bail: | ||
404 | return ret; | ||
405 | } | ||
406 | |||
407 | /** | ||
408 | * ipath_eeprom_write - writes data to the eeprom via I2C | ||
409 | * @dd: the infinipath device | ||
410 | * @eeprom_offset: where to place data | ||
411 | * @buffer: data to write | ||
412 | * @len: number of bytes to write | ||
413 | */ | ||
414 | int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, | ||
415 | const void *buffer, int len) | ||
416 | { | ||
417 | u8 single_byte; | ||
418 | int sub_len; | ||
419 | const u8 *bp = buffer; | ||
420 | int max_wait_time, i; | ||
421 | int ret; | ||
422 | |||
423 | if (!eeprom_init) | ||
424 | eeprom_reset(dd); | ||
425 | |||
426 | while (len > 0) { | ||
427 | if (i2c_startcmd(dd, (eeprom_offset << 1) | WRITE_CMD)) { | ||
428 | ipath_dbg("Failed to start cmd offset %u\n", | ||
429 | eeprom_offset); | ||
430 | goto failed_write; | ||
431 | } | ||
432 | |||
433 | sub_len = min(len, 4); | ||
434 | eeprom_offset += sub_len; | ||
435 | len -= sub_len; | ||
436 | |||
437 | for (i = 0; i < sub_len; i++) { | ||
438 | if (wr_byte(dd, *bp++)) { | ||
439 | ipath_dbg("no ack after byte %u/%u (%u " | ||
440 | "total remain)\n", i, sub_len, | ||
441 | len + sub_len - i); | ||
442 | goto failed_write; | ||
443 | } | ||
444 | } | ||
445 | |||
446 | stop_cmd(dd); | ||
447 | |||
448 | /* | ||
449 | * wait for write complete by waiting for a successful | ||
450 | * read (the chip replies with a zero after the write | ||
451 | * cmd completes, and before it writes to the eeprom. | ||
452 | * The startcmd for the read will fail the ack until | ||
453 | * the writes have completed. We do this inline to avoid | ||
454 | * the debug prints that are in the real read routine | ||
455 | * if the startcmd fails. | ||
456 | */ | ||
457 | max_wait_time = 100; | ||
458 | while (i2c_startcmd(dd, READ_CMD)) { | ||
459 | stop_cmd(dd); | ||
460 | if (!--max_wait_time) { | ||
461 | ipath_dbg("Did not get successful read to " | ||
462 | "complete write\n"); | ||
463 | goto failed_write; | ||
464 | } | ||
465 | } | ||
466 | /* now read the zero byte */ | ||
467 | for (i = single_byte = 0; i < 8; i++) { | ||
468 | u8 bit; | ||
469 | scl_out(dd, i2c_line_high); | ||
470 | bit = sda_in(dd, 0); | ||
471 | scl_out(dd, i2c_line_low); | ||
472 | single_byte <<= 1; | ||
473 | single_byte |= bit; | ||
474 | } | ||
475 | stop_cmd(dd); | ||
476 | } | ||
477 | |||
478 | ret = 0; | ||
479 | goto bail; | ||
480 | |||
481 | failed_write: | ||
482 | stop_cmd(dd); | ||
483 | ret = 1; | ||
484 | |||
485 | bail: | ||
486 | return ret; | ||
487 | } | ||
488 | |||
489 | static u8 flash_csum(struct ipath_flash *ifp, int adjust) | ||
490 | { | ||
491 | u8 *ip = (u8 *) ifp; | ||
492 | u8 csum = 0, len; | ||
493 | |||
494 | for (len = 0; len < ifp->if_length; len++) | ||
495 | csum += *ip++; | ||
496 | csum -= ifp->if_csum; | ||
497 | csum = ~csum; | ||
498 | if (adjust) | ||
499 | ifp->if_csum = csum; | ||
500 | |||
501 | return csum; | ||
502 | } | ||
503 | |||
504 | /** | ||
505 | * ipath_get_guid - get the GUID from the i2c device | ||
506 | * @dd: the infinipath device | ||
507 | * | ||
508 | * When we add the multi-chip support, we will probably have to add | ||
509 | * the ability to use the number of guids field, and get the guid from | ||
510 | * the first chip's flash, to use for all of them. | ||
511 | */ | ||
512 | void ipath_get_guid(struct ipath_devdata *dd) | ||
513 | { | ||
514 | void *buf; | ||
515 | struct ipath_flash *ifp; | ||
516 | __be64 guid; | ||
517 | int len; | ||
518 | u8 csum, *bguid; | ||
519 | int t = dd->ipath_unit; | ||
520 | struct ipath_devdata *dd0 = ipath_lookup(0); | ||
521 | |||
522 | if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) { | ||
523 | u8 *bguid, oguid; | ||
524 | dd->ipath_guid = dd0->ipath_guid; | ||
525 | bguid = (u8 *) & dd->ipath_guid; | ||
526 | |||
527 | oguid = bguid[7]; | ||
528 | bguid[7] += t; | ||
529 | if (oguid > bguid[7]) { | ||
530 | if (bguid[6] == 0xff) { | ||
531 | if (bguid[5] == 0xff) { | ||
532 | ipath_dev_err( | ||
533 | dd, | ||
534 | "Can't set %s GUID from " | ||
535 | "base, wraps to OUI!\n", | ||
536 | ipath_get_unit_name(t)); | ||
537 | dd->ipath_guid = 0; | ||
538 | goto bail; | ||
539 | } | ||
540 | bguid[5]++; | ||
541 | } | ||
542 | bguid[6]++; | ||
543 | } | ||
544 | dd->ipath_nguid = 1; | ||
545 | |||
546 | ipath_dbg("nguid %u, so adding %u to device 0 guid, " | ||
547 | "for %llx\n", | ||
548 | dd0->ipath_nguid, t, | ||
549 | (unsigned long long) be64_to_cpu(dd->ipath_guid)); | ||
550 | goto bail; | ||
551 | } | ||
552 | |||
553 | len = offsetof(struct ipath_flash, if_future); | ||
554 | buf = vmalloc(len); | ||
555 | if (!buf) { | ||
556 | ipath_dev_err(dd, "Couldn't allocate memory to read %u " | ||
557 | "bytes from eeprom for GUID\n", len); | ||
558 | goto bail; | ||
559 | } | ||
560 | |||
561 | if (ipath_eeprom_read(dd, 0, buf, len)) { | ||
562 | ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); | ||
563 | goto done; | ||
564 | } | ||
565 | ifp = (struct ipath_flash *)buf; | ||
566 | |||
567 | csum = flash_csum(ifp, 0); | ||
568 | if (csum != ifp->if_csum) { | ||
569 | dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: " | ||
570 | "0x%x, not 0x%x\n", csum, ifp->if_csum); | ||
571 | goto done; | ||
572 | } | ||
573 | if (*(__be64 *) ifp->if_guid == 0ULL || | ||
574 | *(__be64 *) ifp->if_guid == __constant_cpu_to_be64(-1LL)) { | ||
575 | ipath_dev_err(dd, "Invalid GUID %llx from flash; " | ||
576 | "ignoring\n", | ||
577 | *(unsigned long long *) ifp->if_guid); | ||
578 | /* don't allow GUID if all 0 or all 1's */ | ||
579 | goto done; | ||
580 | } | ||
581 | |||
582 | /* complain, but allow it */ | ||
583 | if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) | ||
584 | dev_info(&dd->pcidev->dev, "Warning, GUID %llx is " | ||
585 | "default, probably not correct!\n", | ||
586 | *(unsigned long long *) ifp->if_guid); | ||
587 | |||
588 | bguid = ifp->if_guid; | ||
589 | if (!bguid[0] && !bguid[1] && !bguid[2]) { | ||
590 | /* original incorrect GUID format in flash; fix in | ||
591 | * core copy, by shifting up 2 octets; don't need to | ||
592 | * change top octet, since both it and shifted are | ||
593 | * 0.. */ | ||
594 | bguid[1] = bguid[3]; | ||
595 | bguid[2] = bguid[4]; | ||
596 | bguid[3] = bguid[4] = 0; | ||
597 | guid = *(__be64 *) ifp->if_guid; | ||
598 | ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, " | ||
599 | "shifting 2 octets\n"); | ||
600 | } else | ||
601 | guid = *(__be64 *) ifp->if_guid; | ||
602 | dd->ipath_guid = guid; | ||
603 | dd->ipath_nguid = ifp->if_numguid; | ||
604 | memcpy(dd->ipath_serial, ifp->if_serial, | ||
605 | sizeof(ifp->if_serial)); | ||
606 | ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", | ||
607 | (unsigned long long) be64_to_cpu(dd->ipath_guid)); | ||
608 | |||
609 | done: | ||
610 | vfree(buf); | ||
611 | |||
612 | bail:; | ||
613 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c new file mode 100644 index 000000000000..c347191f02bf --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c | |||
@@ -0,0 +1,1910 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/pci.h> | ||
34 | #include <linux/poll.h> | ||
35 | #include <linux/cdev.h> | ||
36 | #include <linux/swap.h> | ||
37 | #include <linux/vmalloc.h> | ||
38 | #include <asm/pgtable.h> | ||
39 | |||
40 | #include "ipath_kernel.h" | ||
41 | #include "ips_common.h" | ||
42 | #include "ipath_layer.h" | ||
43 | |||
44 | static int ipath_open(struct inode *, struct file *); | ||
45 | static int ipath_close(struct inode *, struct file *); | ||
46 | static ssize_t ipath_write(struct file *, const char __user *, size_t, | ||
47 | loff_t *); | ||
48 | static unsigned int ipath_poll(struct file *, struct poll_table_struct *); | ||
49 | static int ipath_mmap(struct file *, struct vm_area_struct *); | ||
50 | |||
51 | static struct file_operations ipath_file_ops = { | ||
52 | .owner = THIS_MODULE, | ||
53 | .write = ipath_write, | ||
54 | .open = ipath_open, | ||
55 | .release = ipath_close, | ||
56 | .poll = ipath_poll, | ||
57 | .mmap = ipath_mmap | ||
58 | }; | ||
59 | |||
60 | static int ipath_get_base_info(struct ipath_portdata *pd, | ||
61 | void __user *ubase, size_t ubase_size) | ||
62 | { | ||
63 | int ret = 0; | ||
64 | struct ipath_base_info *kinfo = NULL; | ||
65 | struct ipath_devdata *dd = pd->port_dd; | ||
66 | |||
67 | if (ubase_size < sizeof(*kinfo)) { | ||
68 | ipath_cdbg(PROC, | ||
69 | "Base size %lu, need %lu (version mismatch?)\n", | ||
70 | (unsigned long) ubase_size, | ||
71 | (unsigned long) sizeof(*kinfo)); | ||
72 | ret = -EINVAL; | ||
73 | goto bail; | ||
74 | } | ||
75 | |||
76 | kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL); | ||
77 | if (kinfo == NULL) { | ||
78 | ret = -ENOMEM; | ||
79 | goto bail; | ||
80 | } | ||
81 | |||
82 | ret = dd->ipath_f_get_base_info(pd, kinfo); | ||
83 | if (ret < 0) | ||
84 | goto bail; | ||
85 | |||
86 | kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt; | ||
87 | kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize; | ||
88 | kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt; | ||
89 | kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize; | ||
90 | /* | ||
91 | * have to mmap whole thing | ||
92 | */ | ||
93 | kinfo->spi_rcv_egrbuftotlen = | ||
94 | pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; | ||
95 | kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; | ||
96 | kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / | ||
97 | pd->port_rcvegrbuf_chunks; | ||
98 | kinfo->spi_tidcnt = dd->ipath_rcvtidcnt; | ||
99 | /* | ||
100 | * for this use, may be ipath_cfgports summed over all chips that | ||
101 | * are are configured and present | ||
102 | */ | ||
103 | kinfo->spi_nports = dd->ipath_cfgports; | ||
104 | /* unit (chip/board) our port is on */ | ||
105 | kinfo->spi_unit = dd->ipath_unit; | ||
106 | /* for now, only a single page */ | ||
107 | kinfo->spi_tid_maxsize = PAGE_SIZE; | ||
108 | |||
109 | /* | ||
110 | * Doing this per port, and based on the skip value, etc. This has | ||
111 | * to be the actual buffer size, since the protocol code treats it | ||
112 | * as an array. | ||
113 | * | ||
114 | * These have to be set to user addresses in the user code via mmap. | ||
115 | * These values are used on return to user code for the mmap target | ||
116 | * addresses only. For 32 bit, same 44 bit address problem, so use | ||
117 | * the physical address, not virtual. Before 2.6.11, using the | ||
118 | * page_address() macro worked, but in 2.6.11, even that returns the | ||
119 | * full 64 bit address (upper bits all 1's). So far, using the | ||
120 | * physical addresses (or chip offsets, for chip mapping) works, but | ||
121 | * no doubt some future kernel release will chang that, and we'll be | ||
122 | * on to yet another method of dealing with this | ||
123 | */ | ||
124 | kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; | ||
125 | kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; | ||
126 | kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; | ||
127 | kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + | ||
128 | (void *) dd->ipath_statusp - | ||
129 | (void *) dd->ipath_pioavailregs_dma; | ||
130 | kinfo->spi_piobufbase = (u64) pd->port_piobufs; | ||
131 | kinfo->__spi_uregbase = | ||
132 | dd->ipath_uregbase + dd->ipath_palign * pd->port_port; | ||
133 | |||
134 | kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1); | ||
135 | kinfo->spi_piocnt = dd->ipath_pbufsport; | ||
136 | kinfo->spi_pioalign = dd->ipath_palign; | ||
137 | |||
138 | kinfo->spi_qpair = IPATH_KD_QP; | ||
139 | kinfo->spi_piosize = dd->ipath_ibmaxlen; | ||
140 | kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ | ||
141 | kinfo->spi_port = pd->port_port; | ||
142 | kinfo->spi_sw_version = IPATH_USER_SWVERSION; | ||
143 | kinfo->spi_hw_version = dd->ipath_revision; | ||
144 | |||
145 | if (copy_to_user(ubase, kinfo, sizeof(*kinfo))) | ||
146 | ret = -EFAULT; | ||
147 | |||
148 | bail: | ||
149 | kfree(kinfo); | ||
150 | return ret; | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * ipath_tid_update - update a port TID | ||
155 | * @pd: the port | ||
156 | * @ti: the TID information | ||
157 | * | ||
158 | * The new implementation as of Oct 2004 is that the driver assigns | ||
159 | * the tid and returns it to the caller. To make it easier to | ||
160 | * catch bugs, and to reduce search time, we keep a cursor for | ||
161 | * each port, walking the shadow tid array to find one that's not | ||
162 | * in use. | ||
163 | * | ||
164 | * For now, if we can't allocate the full list, we fail, although | ||
165 | * in the long run, we'll allocate as many as we can, and the | ||
166 | * caller will deal with that by trying the remaining pages later. | ||
167 | * That means that when we fail, we have to mark the tids as not in | ||
168 | * use again, in our shadow copy. | ||
169 | * | ||
170 | * It's up to the caller to free the tids when they are done. | ||
171 | * We'll unlock the pages as they free them. | ||
172 | * | ||
173 | * Also, right now we are locking one page at a time, but since | ||
174 | * the intended use of this routine is for a single group of | ||
175 | * virtually contiguous pages, that should change to improve | ||
176 | * performance. | ||
177 | */ | ||
178 | static int ipath_tid_update(struct ipath_portdata *pd, | ||
179 | const struct ipath_tid_info *ti) | ||
180 | { | ||
181 | int ret = 0, ntids; | ||
182 | u32 tid, porttid, cnt, i, tidcnt; | ||
183 | u16 *tidlist; | ||
184 | struct ipath_devdata *dd = pd->port_dd; | ||
185 | u64 physaddr; | ||
186 | unsigned long vaddr; | ||
187 | u64 __iomem *tidbase; | ||
188 | unsigned long tidmap[8]; | ||
189 | struct page **pagep = NULL; | ||
190 | |||
191 | if (!dd->ipath_pageshadow) { | ||
192 | ret = -ENOMEM; | ||
193 | goto done; | ||
194 | } | ||
195 | |||
196 | cnt = ti->tidcnt; | ||
197 | if (!cnt) { | ||
198 | ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n", | ||
199 | (unsigned long long) ti->tidlist); | ||
200 | /* | ||
201 | * Should we treat as success? likely a bug | ||
202 | */ | ||
203 | ret = -EFAULT; | ||
204 | goto done; | ||
205 | } | ||
206 | tidcnt = dd->ipath_rcvtidcnt; | ||
207 | if (cnt >= tidcnt) { | ||
208 | /* make sure it all fits in port_tid_pg_list */ | ||
209 | dev_info(&dd->pcidev->dev, "Process tried to allocate %u " | ||
210 | "TIDs, only trying max (%u)\n", cnt, tidcnt); | ||
211 | cnt = tidcnt; | ||
212 | } | ||
213 | pagep = (struct page **)pd->port_tid_pg_list; | ||
214 | tidlist = (u16 *) (&pagep[cnt]); | ||
215 | |||
216 | memset(tidmap, 0, sizeof(tidmap)); | ||
217 | tid = pd->port_tidcursor; | ||
218 | /* before decrement; chip actual # */ | ||
219 | porttid = pd->port_port * tidcnt; | ||
220 | ntids = tidcnt; | ||
221 | tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + | ||
222 | dd->ipath_rcvtidbase + | ||
223 | porttid * sizeof(*tidbase)); | ||
224 | |||
225 | ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n", | ||
226 | pd->port_port, cnt, tid, tidbase); | ||
227 | |||
228 | /* virtual address of first page in transfer */ | ||
229 | vaddr = ti->tidvaddr; | ||
230 | if (!access_ok(VERIFY_WRITE, (void __user *) vaddr, | ||
231 | cnt * PAGE_SIZE)) { | ||
232 | ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n", | ||
233 | (void *)vaddr, cnt); | ||
234 | ret = -EFAULT; | ||
235 | goto done; | ||
236 | } | ||
237 | ret = ipath_get_user_pages(vaddr, cnt, pagep); | ||
238 | if (ret) { | ||
239 | if (ret == -EBUSY) { | ||
240 | ipath_dbg("Failed to lock addr %p, %u pages " | ||
241 | "(already locked)\n", | ||
242 | (void *) vaddr, cnt); | ||
243 | /* | ||
244 | * for now, continue, and see what happens but with | ||
245 | * the new implementation, this should never happen, | ||
246 | * unless perhaps the user has mpin'ed the pages | ||
247 | * themselves (something we need to test) | ||
248 | */ | ||
249 | ret = 0; | ||
250 | } else { | ||
251 | dev_info(&dd->pcidev->dev, | ||
252 | "Failed to lock addr %p, %u pages: " | ||
253 | "errno %d\n", (void *) vaddr, cnt, -ret); | ||
254 | goto done; | ||
255 | } | ||
256 | } | ||
257 | for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { | ||
258 | for (; ntids--; tid++) { | ||
259 | if (tid == tidcnt) | ||
260 | tid = 0; | ||
261 | if (!dd->ipath_pageshadow[porttid + tid]) | ||
262 | break; | ||
263 | } | ||
264 | if (ntids < 0) { | ||
265 | /* | ||
266 | * oops, wrapped all the way through their TIDs, | ||
267 | * and didn't have enough free; see comments at | ||
268 | * start of routine | ||
269 | */ | ||
270 | ipath_dbg("Not enough free TIDs for %u pages " | ||
271 | "(index %d), failing\n", cnt, i); | ||
272 | i--; /* last tidlist[i] not filled in */ | ||
273 | ret = -ENOMEM; | ||
274 | break; | ||
275 | } | ||
276 | tidlist[i] = tid; | ||
277 | ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " | ||
278 | "vaddr %lx\n", i, tid, vaddr); | ||
279 | /* we "know" system pages and TID pages are same size */ | ||
280 | dd->ipath_pageshadow[porttid + tid] = pagep[i]; | ||
281 | /* | ||
282 | * don't need atomic or it's overhead | ||
283 | */ | ||
284 | __set_bit(tid, tidmap); | ||
285 | physaddr = page_to_phys(pagep[i]); | ||
286 | ipath_stats.sps_pagelocks++; | ||
287 | ipath_cdbg(VERBOSE, | ||
288 | "TID %u, vaddr %lx, physaddr %llx pgp %p\n", | ||
289 | tid, vaddr, (unsigned long long) physaddr, | ||
290 | pagep[i]); | ||
291 | dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr); | ||
292 | /* | ||
293 | * don't check this tid in ipath_portshadow, since we | ||
294 | * just filled it in; start with the next one. | ||
295 | */ | ||
296 | tid++; | ||
297 | } | ||
298 | |||
299 | if (ret) { | ||
300 | u32 limit; | ||
301 | cleanup: | ||
302 | /* jump here if copy out of updated info failed... */ | ||
303 | ipath_dbg("After failure (ret=%d), undo %d of %d entries\n", | ||
304 | -ret, i, cnt); | ||
305 | /* same code that's in ipath_free_tid() */ | ||
306 | limit = sizeof(tidmap) * BITS_PER_BYTE; | ||
307 | if (limit > tidcnt) | ||
308 | /* just in case size changes in future */ | ||
309 | limit = tidcnt; | ||
310 | tid = find_first_bit((const unsigned long *)tidmap, limit); | ||
311 | for (; tid < limit; tid++) { | ||
312 | if (!test_bit(tid, tidmap)) | ||
313 | continue; | ||
314 | if (dd->ipath_pageshadow[porttid + tid]) { | ||
315 | ipath_cdbg(VERBOSE, "Freeing TID %u\n", | ||
316 | tid); | ||
317 | dd->ipath_f_put_tid(dd, &tidbase[tid], 1, | ||
318 | dd->ipath_tidinvalid); | ||
319 | dd->ipath_pageshadow[porttid + tid] = NULL; | ||
320 | ipath_stats.sps_pageunlocks++; | ||
321 | } | ||
322 | } | ||
323 | ipath_release_user_pages(pagep, cnt); | ||
324 | } else { | ||
325 | /* | ||
326 | * Copy the updated array, with ipath_tid's filled in, back | ||
327 | * to user. Since we did the copy in already, this "should | ||
328 | * never fail" If it does, we have to clean up... | ||
329 | */ | ||
330 | if (copy_to_user((void __user *) | ||
331 | (unsigned long) ti->tidlist, | ||
332 | tidlist, cnt * sizeof(*tidlist))) { | ||
333 | ret = -EFAULT; | ||
334 | goto cleanup; | ||
335 | } | ||
336 | if (copy_to_user((void __user *) (unsigned long) ti->tidmap, | ||
337 | tidmap, sizeof tidmap)) { | ||
338 | ret = -EFAULT; | ||
339 | goto cleanup; | ||
340 | } | ||
341 | if (tid == tidcnt) | ||
342 | tid = 0; | ||
343 | pd->port_tidcursor = tid; | ||
344 | } | ||
345 | |||
346 | done: | ||
347 | if (ret) | ||
348 | ipath_dbg("Failed to map %u TID pages, failing with %d\n", | ||
349 | ti->tidcnt, -ret); | ||
350 | return ret; | ||
351 | } | ||
352 | |||
353 | /** | ||
354 | * ipath_tid_free - free a port TID | ||
355 | * @pd: the port | ||
356 | * @ti: the TID info | ||
357 | * | ||
358 | * right now we are unlocking one page at a time, but since | ||
359 | * the intended use of this routine is for a single group of | ||
360 | * virtually contiguous pages, that should change to improve | ||
361 | * performance. We check that the TID is in range for this port | ||
362 | * but otherwise don't check validity; if user has an error and | ||
363 | * frees the wrong tid, it's only their own data that can thereby | ||
364 | * be corrupted. We do check that the TID was in use, for sanity | ||
365 | * We always use our idea of the saved address, not the address that | ||
366 | * they pass in to us. | ||
367 | */ | ||
368 | |||
369 | static int ipath_tid_free(struct ipath_portdata *pd, | ||
370 | const struct ipath_tid_info *ti) | ||
371 | { | ||
372 | int ret = 0; | ||
373 | u32 tid, porttid, cnt, limit, tidcnt; | ||
374 | struct ipath_devdata *dd = pd->port_dd; | ||
375 | u64 __iomem *tidbase; | ||
376 | unsigned long tidmap[8]; | ||
377 | |||
378 | if (!dd->ipath_pageshadow) { | ||
379 | ret = -ENOMEM; | ||
380 | goto done; | ||
381 | } | ||
382 | |||
383 | if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap, | ||
384 | sizeof tidmap)) { | ||
385 | ret = -EFAULT; | ||
386 | goto done; | ||
387 | } | ||
388 | |||
389 | porttid = pd->port_port * dd->ipath_rcvtidcnt; | ||
390 | tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + | ||
391 | dd->ipath_rcvtidbase + | ||
392 | porttid * sizeof(*tidbase)); | ||
393 | |||
394 | tidcnt = dd->ipath_rcvtidcnt; | ||
395 | limit = sizeof(tidmap) * BITS_PER_BYTE; | ||
396 | if (limit > tidcnt) | ||
397 | /* just in case size changes in future */ | ||
398 | limit = tidcnt; | ||
399 | tid = find_first_bit(tidmap, limit); | ||
400 | ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) " | ||
401 | "set is %d, porttid %u\n", pd->port_port, ti->tidcnt, | ||
402 | limit, tid, porttid); | ||
403 | for (cnt = 0; tid < limit; tid++) { | ||
404 | /* | ||
405 | * small optimization; if we detect a run of 3 or so without | ||
406 | * any set, use find_first_bit again. That's mainly to | ||
407 | * accelerate the case where we wrapped, so we have some at | ||
408 | * the beginning, and some at the end, and a big gap | ||
409 | * in the middle. | ||
410 | */ | ||
411 | if (!test_bit(tid, tidmap)) | ||
412 | continue; | ||
413 | cnt++; | ||
414 | if (dd->ipath_pageshadow[porttid + tid]) { | ||
415 | ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", | ||
416 | pd->port_pid, tid); | ||
417 | dd->ipath_f_put_tid(dd, &tidbase[tid], 1, | ||
418 | dd->ipath_tidinvalid); | ||
419 | ipath_release_user_pages( | ||
420 | &dd->ipath_pageshadow[porttid + tid], 1); | ||
421 | dd->ipath_pageshadow[porttid + tid] = NULL; | ||
422 | ipath_stats.sps_pageunlocks++; | ||
423 | } else | ||
424 | ipath_dbg("Unused tid %u, ignoring\n", tid); | ||
425 | } | ||
426 | if (cnt != ti->tidcnt) | ||
427 | ipath_dbg("passed in tidcnt %d, only %d bits set in map\n", | ||
428 | ti->tidcnt, cnt); | ||
429 | done: | ||
430 | if (ret) | ||
431 | ipath_dbg("Failed to unmap %u TID pages, failing with %d\n", | ||
432 | ti->tidcnt, -ret); | ||
433 | return ret; | ||
434 | } | ||
435 | |||
436 | /** | ||
437 | * ipath_set_part_key - set a partition key | ||
438 | * @pd: the port | ||
439 | * @key: the key | ||
440 | * | ||
441 | * We can have up to 4 active at a time (other than the default, which is | ||
442 | * always allowed). This is somewhat tricky, since multiple ports may set | ||
443 | * the same key, so we reference count them, and clean up at exit. All 4 | ||
444 | * partition keys are packed into a single infinipath register. It's an | ||
445 | * error for a process to set the same pkey multiple times. We provide no | ||
446 | * mechanism to de-allocate a pkey at this time, we may eventually need to | ||
447 | * do that. I've used the atomic operations, and no locking, and only make | ||
448 | * a single pass through what's available. This should be more than | ||
449 | * adequate for some time. I'll think about spinlocks or the like if and as | ||
450 | * it's necessary. | ||
451 | */ | ||
452 | static int ipath_set_part_key(struct ipath_portdata *pd, u16 key) | ||
453 | { | ||
454 | struct ipath_devdata *dd = pd->port_dd; | ||
455 | int i, any = 0, pidx = -1; | ||
456 | u16 lkey = key & 0x7FFF; | ||
457 | int ret; | ||
458 | |||
459 | if (lkey == (IPS_DEFAULT_P_KEY & 0x7FFF)) { | ||
460 | /* nothing to do; this key always valid */ | ||
461 | ret = 0; | ||
462 | goto bail; | ||
463 | } | ||
464 | |||
465 | ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys " | ||
466 | "%hx:%x %hx:%x %hx:%x %hx:%x\n", | ||
467 | pd->port_port, key, dd->ipath_pkeys[0], | ||
468 | atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1], | ||
469 | atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2], | ||
470 | atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3], | ||
471 | atomic_read(&dd->ipath_pkeyrefs[3])); | ||
472 | |||
473 | if (!lkey) { | ||
474 | ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n", | ||
475 | pd->port_port); | ||
476 | ret = -EINVAL; | ||
477 | goto bail; | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * Set the full membership bit, because it has to be | ||
482 | * set in the register or the packet, and it seems | ||
483 | * cleaner to set in the register than to force all | ||
484 | * callers to set it. (see bug 4331) | ||
485 | */ | ||
486 | key |= 0x8000; | ||
487 | |||
488 | for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { | ||
489 | if (!pd->port_pkeys[i] && pidx == -1) | ||
490 | pidx = i; | ||
491 | if (pd->port_pkeys[i] == key) { | ||
492 | ipath_cdbg(VERBOSE, "p%u tries to set same pkey " | ||
493 | "(%x) more than once\n", | ||
494 | pd->port_port, key); | ||
495 | ret = -EEXIST; | ||
496 | goto bail; | ||
497 | } | ||
498 | } | ||
499 | if (pidx == -1) { | ||
500 | ipath_dbg("All pkeys for port %u already in use, " | ||
501 | "can't set %x\n", pd->port_port, key); | ||
502 | ret = -EBUSY; | ||
503 | goto bail; | ||
504 | } | ||
505 | for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { | ||
506 | if (!dd->ipath_pkeys[i]) { | ||
507 | any++; | ||
508 | continue; | ||
509 | } | ||
510 | if (dd->ipath_pkeys[i] == key) { | ||
511 | atomic_t *pkrefs = &dd->ipath_pkeyrefs[i]; | ||
512 | |||
513 | if (atomic_inc_return(pkrefs) > 1) { | ||
514 | pd->port_pkeys[pidx] = key; | ||
515 | ipath_cdbg(VERBOSE, "p%u set key %x " | ||
516 | "matches #%d, count now %d\n", | ||
517 | pd->port_port, key, i, | ||
518 | atomic_read(pkrefs)); | ||
519 | ret = 0; | ||
520 | goto bail; | ||
521 | } else { | ||
522 | /* | ||
523 | * lost race, decrement count, catch below | ||
524 | */ | ||
525 | atomic_dec(pkrefs); | ||
526 | ipath_cdbg(VERBOSE, "Lost race, count was " | ||
527 | "0, after dec, it's %d\n", | ||
528 | atomic_read(pkrefs)); | ||
529 | any++; | ||
530 | } | ||
531 | } | ||
532 | if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { | ||
533 | /* | ||
534 | * It makes no sense to have both the limited and | ||
535 | * full membership PKEY set at the same time since | ||
536 | * the unlimited one will disable the limited one. | ||
537 | */ | ||
538 | ret = -EEXIST; | ||
539 | goto bail; | ||
540 | } | ||
541 | } | ||
542 | if (!any) { | ||
543 | ipath_dbg("port %u, all pkeys already in use, " | ||
544 | "can't set %x\n", pd->port_port, key); | ||
545 | ret = -EBUSY; | ||
546 | goto bail; | ||
547 | } | ||
548 | for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { | ||
549 | if (!dd->ipath_pkeys[i] && | ||
550 | atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { | ||
551 | u64 pkey; | ||
552 | |||
553 | /* for ipathstats, etc. */ | ||
554 | ipath_stats.sps_pkeys[i] = lkey; | ||
555 | pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key; | ||
556 | pkey = | ||
557 | (u64) dd->ipath_pkeys[0] | | ||
558 | ((u64) dd->ipath_pkeys[1] << 16) | | ||
559 | ((u64) dd->ipath_pkeys[2] << 32) | | ||
560 | ((u64) dd->ipath_pkeys[3] << 48); | ||
561 | ipath_cdbg(PROC, "p%u set key %x in #%d, " | ||
562 | "portidx %d, new pkey reg %llx\n", | ||
563 | pd->port_port, key, i, pidx, | ||
564 | (unsigned long long) pkey); | ||
565 | ipath_write_kreg( | ||
566 | dd, dd->ipath_kregs->kr_partitionkey, pkey); | ||
567 | |||
568 | ret = 0; | ||
569 | goto bail; | ||
570 | } | ||
571 | } | ||
572 | ipath_dbg("port %u, all pkeys already in use 2nd pass, " | ||
573 | "can't set %x\n", pd->port_port, key); | ||
574 | ret = -EBUSY; | ||
575 | |||
576 | bail: | ||
577 | return ret; | ||
578 | } | ||
579 | |||
580 | /** | ||
581 | * ipath_manage_rcvq - manage a port's receive queue | ||
582 | * @pd: the port | ||
583 | * @start_stop: action to carry out | ||
584 | * | ||
585 | * start_stop == 0 disables receive on the port, for use in queue | ||
586 | * overflow conditions. start_stop==1 re-enables, to be used to | ||
587 | * re-init the software copy of the head register | ||
588 | */ | ||
589 | static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop) | ||
590 | { | ||
591 | struct ipath_devdata *dd = pd->port_dd; | ||
592 | u64 tval; | ||
593 | |||
594 | ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n", | ||
595 | start_stop ? "en" : "dis", dd->ipath_unit, | ||
596 | pd->port_port); | ||
597 | /* atomically clear receive enable port. */ | ||
598 | if (start_stop) { | ||
599 | /* | ||
600 | * On enable, force in-memory copy of the tail register to | ||
601 | * 0, so that protocol code doesn't have to worry about | ||
602 | * whether or not the chip has yet updated the in-memory | ||
603 | * copy or not on return from the system call. The chip | ||
604 | * always resets it's tail register back to 0 on a | ||
605 | * transition from disabled to enabled. This could cause a | ||
606 | * problem if software was broken, and did the enable w/o | ||
607 | * the disable, but eventually the in-memory copy will be | ||
608 | * updated and correct itself, even in the face of software | ||
609 | * bugs. | ||
610 | */ | ||
611 | *pd->port_rcvhdrtail_kvaddr = 0; | ||
612 | set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, | ||
613 | &dd->ipath_rcvctrl); | ||
614 | } else | ||
615 | clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, | ||
616 | &dd->ipath_rcvctrl); | ||
617 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, | ||
618 | dd->ipath_rcvctrl); | ||
619 | /* now be sure chip saw it before we return */ | ||
620 | tval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); | ||
621 | if (start_stop) { | ||
622 | /* | ||
623 | * And try to be sure that tail reg update has happened too. | ||
624 | * This should in theory interlock with the RXE changes to | ||
625 | * the tail register. Don't assign it to the tail register | ||
626 | * in memory copy, since we could overwrite an update by the | ||
627 | * chip if we did. | ||
628 | */ | ||
629 | tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); | ||
630 | } | ||
631 | /* always; new head should be equal to new tail; see above */ | ||
632 | return 0; | ||
633 | } | ||
634 | |||
635 | static void ipath_clean_part_key(struct ipath_portdata *pd, | ||
636 | struct ipath_devdata *dd) | ||
637 | { | ||
638 | int i, j, pchanged = 0; | ||
639 | u64 oldpkey; | ||
640 | |||
641 | /* for debugging only */ | ||
642 | oldpkey = (u64) dd->ipath_pkeys[0] | | ||
643 | ((u64) dd->ipath_pkeys[1] << 16) | | ||
644 | ((u64) dd->ipath_pkeys[2] << 32) | | ||
645 | ((u64) dd->ipath_pkeys[3] << 48); | ||
646 | |||
647 | for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { | ||
648 | if (!pd->port_pkeys[i]) | ||
649 | continue; | ||
650 | ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i, | ||
651 | pd->port_pkeys[i]); | ||
652 | for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) { | ||
653 | /* check for match independent of the global bit */ | ||
654 | if ((dd->ipath_pkeys[j] & 0x7fff) != | ||
655 | (pd->port_pkeys[i] & 0x7fff)) | ||
656 | continue; | ||
657 | if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) { | ||
658 | ipath_cdbg(VERBOSE, "p%u clear key " | ||
659 | "%x matches #%d\n", | ||
660 | pd->port_port, | ||
661 | pd->port_pkeys[i], j); | ||
662 | ipath_stats.sps_pkeys[j] = | ||
663 | dd->ipath_pkeys[j] = 0; | ||
664 | pchanged++; | ||
665 | } | ||
666 | else ipath_cdbg( | ||
667 | VERBOSE, "p%u key %x matches #%d, " | ||
668 | "but ref still %d\n", pd->port_port, | ||
669 | pd->port_pkeys[i], j, | ||
670 | atomic_read(&dd->ipath_pkeyrefs[j])); | ||
671 | break; | ||
672 | } | ||
673 | pd->port_pkeys[i] = 0; | ||
674 | } | ||
675 | if (pchanged) { | ||
676 | u64 pkey = (u64) dd->ipath_pkeys[0] | | ||
677 | ((u64) dd->ipath_pkeys[1] << 16) | | ||
678 | ((u64) dd->ipath_pkeys[2] << 32) | | ||
679 | ((u64) dd->ipath_pkeys[3] << 48); | ||
680 | ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, " | ||
681 | "new pkey reg %llx\n", pd->port_port, | ||
682 | (unsigned long long) oldpkey, | ||
683 | (unsigned long long) pkey); | ||
684 | ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, | ||
685 | pkey); | ||
686 | } | ||
687 | } | ||
688 | |||
689 | /** | ||
690 | * ipath_create_user_egr - allocate eager TID buffers | ||
691 | * @pd: the port to allocate TID buffers for | ||
692 | * | ||
693 | * This routine is now quite different for user and kernel, because | ||
694 | * the kernel uses skb's, for the accelerated network performance | ||
695 | * This is the user port version | ||
696 | * | ||
697 | * Allocate the eager TID buffers and program them into infinipath | ||
698 | * They are no longer completely contiguous, we do multiple allocation | ||
699 | * calls. | ||
700 | */ | ||
701 | static int ipath_create_user_egr(struct ipath_portdata *pd) | ||
702 | { | ||
703 | struct ipath_devdata *dd = pd->port_dd; | ||
704 | unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff; | ||
705 | size_t size; | ||
706 | int ret; | ||
707 | |||
708 | egrcnt = dd->ipath_rcvegrcnt; | ||
709 | /* TID number offset for this port */ | ||
710 | egroff = pd->port_port * egrcnt; | ||
711 | egrsize = dd->ipath_rcvegrbufsize; | ||
712 | ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " | ||
713 | "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); | ||
714 | |||
715 | /* | ||
716 | * to avoid wasting a lot of memory, we allocate 32KB chunks of | ||
717 | * physically contiguous memory, advance through it until used up | ||
718 | * and then allocate more. Of course, we need memory to store those | ||
719 | * extra pointers, now. Started out with 256KB, but under heavy | ||
720 | * memory pressure (creating large files and then copying them over | ||
721 | * NFS while doing lots of MPI jobs), we hit some allocation | ||
722 | * failures, even though we can sleep... (2.6.10) Still get | ||
723 | * failures at 64K. 32K is the lowest we can go without waiting | ||
724 | * more memory again. It seems likely that the coalescing in | ||
725 | * free_pages, etc. still has issues (as it has had previously | ||
726 | * during 2.6.x development). | ||
727 | */ | ||
728 | size = 0x8000; | ||
729 | alloced = ALIGN(egrsize * egrcnt, size); | ||
730 | egrperchunk = size / egrsize; | ||
731 | chunk = (egrcnt + egrperchunk - 1) / egrperchunk; | ||
732 | pd->port_rcvegrbuf_chunks = chunk; | ||
733 | pd->port_rcvegrbufs_perchunk = egrperchunk; | ||
734 | pd->port_rcvegrbuf_size = size; | ||
735 | pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0])); | ||
736 | if (!pd->port_rcvegrbuf) { | ||
737 | ret = -ENOMEM; | ||
738 | goto bail; | ||
739 | } | ||
740 | pd->port_rcvegrbuf_phys = | ||
741 | vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0])); | ||
742 | if (!pd->port_rcvegrbuf_phys) { | ||
743 | ret = -ENOMEM; | ||
744 | goto bail_rcvegrbuf; | ||
745 | } | ||
746 | for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { | ||
747 | /* | ||
748 | * GFP_USER, but without GFP_FS, so buffer cache can be | ||
749 | * coalesced (we hope); otherwise, even at order 4, | ||
750 | * heavy filesystem activity makes these fail | ||
751 | */ | ||
752 | gfp_t gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; | ||
753 | |||
754 | pd->port_rcvegrbuf[e] = dma_alloc_coherent( | ||
755 | &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e], | ||
756 | gfp_flags); | ||
757 | |||
758 | if (!pd->port_rcvegrbuf[e]) { | ||
759 | ret = -ENOMEM; | ||
760 | goto bail_rcvegrbuf_phys; | ||
761 | } | ||
762 | } | ||
763 | |||
764 | pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0]; | ||
765 | |||
766 | for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) { | ||
767 | dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk]; | ||
768 | unsigned i; | ||
769 | |||
770 | for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { | ||
771 | dd->ipath_f_put_tid(dd, e + egroff + | ||
772 | (u64 __iomem *) | ||
773 | ((char __iomem *) | ||
774 | dd->ipath_kregbase + | ||
775 | dd->ipath_rcvegrbase), 0, pa); | ||
776 | pa += egrsize; | ||
777 | } | ||
778 | cond_resched(); /* don't hog the cpu */ | ||
779 | } | ||
780 | |||
781 | ret = 0; | ||
782 | goto bail; | ||
783 | |||
784 | bail_rcvegrbuf_phys: | ||
785 | for (e = 0; e < pd->port_rcvegrbuf_chunks && | ||
786 | pd->port_rcvegrbuf[e]; e++) | ||
787 | dma_free_coherent(&dd->pcidev->dev, size, | ||
788 | pd->port_rcvegrbuf[e], | ||
789 | pd->port_rcvegrbuf_phys[e]); | ||
790 | |||
791 | vfree(pd->port_rcvegrbuf_phys); | ||
792 | pd->port_rcvegrbuf_phys = NULL; | ||
793 | bail_rcvegrbuf: | ||
794 | vfree(pd->port_rcvegrbuf); | ||
795 | pd->port_rcvegrbuf = NULL; | ||
796 | bail: | ||
797 | return ret; | ||
798 | } | ||
799 | |||
800 | static int ipath_do_user_init(struct ipath_portdata *pd, | ||
801 | const struct ipath_user_info *uinfo) | ||
802 | { | ||
803 | int ret = 0; | ||
804 | struct ipath_devdata *dd = pd->port_dd; | ||
805 | u64 physaddr, uaddr, off, atmp; | ||
806 | struct page *pagep; | ||
807 | u32 head32; | ||
808 | u64 head; | ||
809 | |||
810 | /* for now, if major version is different, bail */ | ||
811 | if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { | ||
812 | dev_info(&dd->pcidev->dev, | ||
813 | "User major version %d not same as driver " | ||
814 | "major %d\n", uinfo->spu_userversion >> 16, | ||
815 | IPATH_USER_SWMAJOR); | ||
816 | ret = -ENODEV; | ||
817 | goto done; | ||
818 | } | ||
819 | |||
820 | if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) | ||
821 | ipath_dbg("User minor version %d not same as driver " | ||
822 | "minor %d\n", uinfo->spu_userversion & 0xffff, | ||
823 | IPATH_USER_SWMINOR); | ||
824 | |||
825 | if (uinfo->spu_rcvhdrsize) { | ||
826 | ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); | ||
827 | if (ret) | ||
828 | goto done; | ||
829 | } | ||
830 | |||
831 | /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ | ||
832 | |||
833 | /* set up for the rcvhdr Q tail register writeback to user memory */ | ||
834 | if (!uinfo->spu_rcvhdraddr || | ||
835 | !access_ok(VERIFY_WRITE, (u64 __user *) (unsigned long) | ||
836 | uinfo->spu_rcvhdraddr, sizeof(u64))) { | ||
837 | ipath_dbg("Port %d rcvhdrtail addr %llx not valid\n", | ||
838 | pd->port_port, | ||
839 | (unsigned long long) uinfo->spu_rcvhdraddr); | ||
840 | ret = -EINVAL; | ||
841 | goto done; | ||
842 | } | ||
843 | |||
844 | off = offset_in_page(uinfo->spu_rcvhdraddr); | ||
845 | uaddr = PAGE_MASK & (unsigned long) uinfo->spu_rcvhdraddr; | ||
846 | ret = ipath_get_user_pages_nocopy(uaddr, &pagep); | ||
847 | if (ret) { | ||
848 | dev_info(&dd->pcidev->dev, "Failed to lookup and lock " | ||
849 | "address %llx for rcvhdrtail: errno %d\n", | ||
850 | (unsigned long long) uinfo->spu_rcvhdraddr, -ret); | ||
851 | goto done; | ||
852 | } | ||
853 | ipath_stats.sps_pagelocks++; | ||
854 | pd->port_rcvhdrtail_uaddr = uaddr; | ||
855 | pd->port_rcvhdrtail_pagep = pagep; | ||
856 | pd->port_rcvhdrtail_kvaddr = | ||
857 | page_address(pagep); | ||
858 | pd->port_rcvhdrtail_kvaddr += off; | ||
859 | physaddr = page_to_phys(pagep) + off; | ||
860 | ipath_cdbg(VERBOSE, "port %d user addr %llx hdrtailaddr, %llx " | ||
861 | "physical (off=%llx)\n", | ||
862 | pd->port_port, | ||
863 | (unsigned long long) uinfo->spu_rcvhdraddr, | ||
864 | (unsigned long long) physaddr, (unsigned long long) off); | ||
865 | ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, | ||
866 | pd->port_port, physaddr); | ||
867 | atmp = ipath_read_kreg64_port(dd, | ||
868 | dd->ipath_kregs->kr_rcvhdrtailaddr, | ||
869 | pd->port_port); | ||
870 | if (physaddr != atmp) { | ||
871 | ipath_dev_err(dd, | ||
872 | "Catastrophic software error, " | ||
873 | "RcvHdrTailAddr%u written as %llx, " | ||
874 | "read back as %llx\n", pd->port_port, | ||
875 | (unsigned long long) physaddr, | ||
876 | (unsigned long long) atmp); | ||
877 | ret = -EINVAL; | ||
878 | goto done; | ||
879 | } | ||
880 | |||
881 | /* for right now, kernel piobufs are at end, so port 1 is at 0 */ | ||
882 | pd->port_piobufs = dd->ipath_piobufbase + | ||
883 | dd->ipath_pbufsport * (pd->port_port - | ||
884 | 1) * dd->ipath_palign; | ||
885 | ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", | ||
886 | pd->port_port, pd->port_piobufs); | ||
887 | |||
888 | /* | ||
889 | * Now allocate the rcvhdr Q and eager TIDs; skip the TID | ||
890 | * array for time being. If pd->port_port > chip-supported, | ||
891 | * we need to do extra stuff here to handle by handling overflow | ||
892 | * through port 0, someday | ||
893 | */ | ||
894 | ret = ipath_create_rcvhdrq(dd, pd); | ||
895 | if (!ret) | ||
896 | ret = ipath_create_user_egr(pd); | ||
897 | if (ret) | ||
898 | goto done; | ||
899 | /* enable receives now */ | ||
900 | /* atomically set enable bit for this port */ | ||
901 | set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, | ||
902 | &dd->ipath_rcvctrl); | ||
903 | |||
904 | /* | ||
905 | * set the head registers for this port to the current values | ||
906 | * of the tail pointers, since we don't know if they were | ||
907 | * updated on last use of the port. | ||
908 | */ | ||
909 | head32 = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); | ||
910 | head = (u64) head32; | ||
911 | ipath_write_ureg(dd, ur_rcvhdrhead, head, pd->port_port); | ||
912 | head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); | ||
913 | ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); | ||
914 | dd->ipath_lastegrheads[pd->port_port] = -1; | ||
915 | dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; | ||
916 | ipath_cdbg(VERBOSE, "Wrote port%d head %llx, egrhead %x from " | ||
917 | "tail regs\n", pd->port_port, | ||
918 | (unsigned long long) head, head32); | ||
919 | pd->port_tidcursor = 0; /* start at beginning after open */ | ||
920 | /* | ||
921 | * now enable the port; the tail registers will be written to memory | ||
922 | * by the chip as soon as it sees the write to | ||
923 | * dd->ipath_kregs->kr_rcvctrl. The update only happens on | ||
924 | * transition from 0 to 1, so clear it first, then set it as part of | ||
925 | * enabling the port. This will (very briefly) affect any other | ||
926 | * open ports, but it shouldn't be long enough to be an issue. | ||
927 | */ | ||
928 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, | ||
929 | dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); | ||
930 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, | ||
931 | dd->ipath_rcvctrl); | ||
932 | |||
933 | done: | ||
934 | return ret; | ||
935 | } | ||
936 | |||
937 | static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, | ||
938 | u64 ureg) | ||
939 | { | ||
940 | unsigned long phys; | ||
941 | int ret; | ||
942 | |||
943 | /* it's the real hardware, so io_remap works */ | ||
944 | |||
945 | if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { | ||
946 | dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen " | ||
947 | "%lx > PAGE\n", vma->vm_end - vma->vm_start); | ||
948 | ret = -EFAULT; | ||
949 | } else { | ||
950 | phys = dd->ipath_physaddr + ureg; | ||
951 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | ||
952 | |||
953 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; | ||
954 | ret = io_remap_pfn_range(vma, vma->vm_start, | ||
955 | phys >> PAGE_SHIFT, | ||
956 | vma->vm_end - vma->vm_start, | ||
957 | vma->vm_page_prot); | ||
958 | } | ||
959 | return ret; | ||
960 | } | ||
961 | |||
962 | static int mmap_piobufs(struct vm_area_struct *vma, | ||
963 | struct ipath_devdata *dd, | ||
964 | struct ipath_portdata *pd) | ||
965 | { | ||
966 | unsigned long phys; | ||
967 | int ret; | ||
968 | |||
969 | /* | ||
970 | * When we map the PIO buffers, we want to map them as writeonly, no | ||
971 | * read possible. | ||
972 | */ | ||
973 | |||
974 | if ((vma->vm_end - vma->vm_start) > | ||
975 | (dd->ipath_pbufsport * dd->ipath_palign)) { | ||
976 | dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " | ||
977 | "reqlen %lx > PAGE\n", | ||
978 | vma->vm_end - vma->vm_start); | ||
979 | ret = -EFAULT; | ||
980 | goto bail; | ||
981 | } | ||
982 | |||
983 | phys = dd->ipath_physaddr + pd->port_piobufs; | ||
984 | /* | ||
985 | * Do *NOT* mark this as non-cached (PWT bit), or we don't get the | ||
986 | * write combining behavior we want on the PIO buffers! | ||
987 | * vma->vm_page_prot = | ||
988 | * pgprot_noncached(vma->vm_page_prot); | ||
989 | */ | ||
990 | |||
991 | if (vma->vm_flags & VM_READ) { | ||
992 | dev_info(&dd->pcidev->dev, | ||
993 | "Can't map piobufs as readable (flags=%lx)\n", | ||
994 | vma->vm_flags); | ||
995 | ret = -EPERM; | ||
996 | goto bail; | ||
997 | } | ||
998 | |||
999 | /* don't allow them to later change to readable with mprotect */ | ||
1000 | |||
1001 | vma->vm_flags &= ~VM_MAYWRITE; | ||
1002 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; | ||
1003 | |||
1004 | ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, | ||
1005 | vma->vm_end - vma->vm_start, | ||
1006 | vma->vm_page_prot); | ||
1007 | bail: | ||
1008 | return ret; | ||
1009 | } | ||
1010 | |||
1011 | static int mmap_rcvegrbufs(struct vm_area_struct *vma, | ||
1012 | struct ipath_portdata *pd) | ||
1013 | { | ||
1014 | struct ipath_devdata *dd = pd->port_dd; | ||
1015 | unsigned long start, size; | ||
1016 | size_t total_size, i; | ||
1017 | dma_addr_t *phys; | ||
1018 | int ret; | ||
1019 | |||
1020 | if (!pd->port_rcvegrbuf) { | ||
1021 | ret = -EFAULT; | ||
1022 | goto bail; | ||
1023 | } | ||
1024 | |||
1025 | size = pd->port_rcvegrbuf_size; | ||
1026 | total_size = pd->port_rcvegrbuf_chunks * size; | ||
1027 | if ((vma->vm_end - vma->vm_start) > total_size) { | ||
1028 | dev_info(&dd->pcidev->dev, "FAIL on egr bufs: " | ||
1029 | "reqlen %lx > actual %lx\n", | ||
1030 | vma->vm_end - vma->vm_start, | ||
1031 | (unsigned long) total_size); | ||
1032 | ret = -EFAULT; | ||
1033 | goto bail; | ||
1034 | } | ||
1035 | |||
1036 | if (vma->vm_flags & VM_WRITE) { | ||
1037 | dev_info(&dd->pcidev->dev, "Can't map eager buffers as " | ||
1038 | "writable (flags=%lx)\n", vma->vm_flags); | ||
1039 | ret = -EPERM; | ||
1040 | goto bail; | ||
1041 | } | ||
1042 | |||
1043 | start = vma->vm_start; | ||
1044 | phys = pd->port_rcvegrbuf_phys; | ||
1045 | |||
1046 | /* don't allow them to later change to writeable with mprotect */ | ||
1047 | vma->vm_flags &= ~VM_MAYWRITE; | ||
1048 | |||
1049 | for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { | ||
1050 | ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT, | ||
1051 | size, vma->vm_page_prot); | ||
1052 | if (ret < 0) | ||
1053 | goto bail; | ||
1054 | } | ||
1055 | ret = 0; | ||
1056 | |||
1057 | bail: | ||
1058 | return ret; | ||
1059 | } | ||
1060 | |||
1061 | static int mmap_rcvhdrq(struct vm_area_struct *vma, | ||
1062 | struct ipath_portdata *pd) | ||
1063 | { | ||
1064 | struct ipath_devdata *dd = pd->port_dd; | ||
1065 | size_t total_size; | ||
1066 | int ret; | ||
1067 | |||
1068 | /* | ||
1069 | * kmalloc'ed memory, physically contiguous; this is from | ||
1070 | * spi_rcvhdr_base; we allow user to map read-write so they can | ||
1071 | * write hdrq entries to allow protocol code to directly poll | ||
1072 | * whether a hdrq entry has been written. | ||
1073 | */ | ||
1074 | total_size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * | ||
1075 | sizeof(u32), PAGE_SIZE); | ||
1076 | if ((vma->vm_end - vma->vm_start) > total_size) { | ||
1077 | dev_info(&dd->pcidev->dev, | ||
1078 | "FAIL on rcvhdrq: reqlen %lx > actual %lx\n", | ||
1079 | vma->vm_end - vma->vm_start, | ||
1080 | (unsigned long) total_size); | ||
1081 | ret = -EFAULT; | ||
1082 | goto bail; | ||
1083 | } | ||
1084 | |||
1085 | ret = remap_pfn_range(vma, vma->vm_start, | ||
1086 | pd->port_rcvhdrq_phys >> PAGE_SHIFT, | ||
1087 | vma->vm_end - vma->vm_start, | ||
1088 | vma->vm_page_prot); | ||
1089 | bail: | ||
1090 | return ret; | ||
1091 | } | ||
1092 | |||
1093 | static int mmap_pioavailregs(struct vm_area_struct *vma, | ||
1094 | struct ipath_portdata *pd) | ||
1095 | { | ||
1096 | struct ipath_devdata *dd = pd->port_dd; | ||
1097 | int ret; | ||
1098 | |||
1099 | /* | ||
1100 | * when we map the PIO bufferavail registers, we want to map them as | ||
1101 | * readonly, no write possible. | ||
1102 | * | ||
1103 | * kmalloc'ed memory, physically contiguous, one page only, readonly | ||
1104 | */ | ||
1105 | |||
1106 | if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { | ||
1107 | dev_info(&dd->pcidev->dev, "FAIL on pioavailregs_dma: " | ||
1108 | "reqlen %lx > actual %lx\n", | ||
1109 | vma->vm_end - vma->vm_start, | ||
1110 | (unsigned long) PAGE_SIZE); | ||
1111 | ret = -EFAULT; | ||
1112 | goto bail; | ||
1113 | } | ||
1114 | |||
1115 | if (vma->vm_flags & VM_WRITE) { | ||
1116 | dev_info(&dd->pcidev->dev, | ||
1117 | "Can't map pioavailregs as writable (flags=%lx)\n", | ||
1118 | vma->vm_flags); | ||
1119 | ret = -EPERM; | ||
1120 | goto bail; | ||
1121 | } | ||
1122 | |||
1123 | /* don't allow them to later change with mprotect */ | ||
1124 | vma->vm_flags &= ~VM_MAYWRITE; | ||
1125 | |||
1126 | ret = remap_pfn_range(vma, vma->vm_start, | ||
1127 | dd->ipath_pioavailregs_phys >> PAGE_SHIFT, | ||
1128 | PAGE_SIZE, vma->vm_page_prot); | ||
1129 | bail: | ||
1130 | return ret; | ||
1131 | } | ||
1132 | |||
1133 | /** | ||
1134 | * ipath_mmap - mmap various structures into user space | ||
1135 | * @fp: the file pointer | ||
1136 | * @vma: the VM area | ||
1137 | * | ||
1138 | * We use this to have a shared buffer between the kernel and the user code | ||
1139 | * for the rcvhdr queue, egr buffers, and the per-port user regs and pio | ||
1140 | * buffers in the chip. We have the open and close entries so we can bump | ||
1141 | * the ref count and keep the driver from being unloaded while still mapped. | ||
1142 | */ | ||
1143 | static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) | ||
1144 | { | ||
1145 | struct ipath_portdata *pd; | ||
1146 | struct ipath_devdata *dd; | ||
1147 | u64 pgaddr, ureg; | ||
1148 | int ret; | ||
1149 | |||
1150 | pd = port_fp(fp); | ||
1151 | dd = pd->port_dd; | ||
1152 | /* | ||
1153 | * This is the ipath_do_user_init() code, mapping the shared buffers | ||
1154 | * into the user process. The address referred to by vm_pgoff is the | ||
1155 | * virtual, not physical, address; we only do one mmap for each | ||
1156 | * space mapped. | ||
1157 | */ | ||
1158 | pgaddr = vma->vm_pgoff << PAGE_SHIFT; | ||
1159 | |||
1160 | /* | ||
1161 | * note that ureg does *NOT* have the kregvirt as part of it, to be | ||
1162 | * sure that for 32 bit programs, we don't end up trying to map a > | ||
1163 | * 44 address. Has to match ipath_get_base_info() code that sets | ||
1164 | * __spi_uregbase | ||
1165 | */ | ||
1166 | |||
1167 | ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; | ||
1168 | |||
1169 | ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n", | ||
1170 | (unsigned long long) pgaddr, vma->vm_start, | ||
1171 | vma->vm_end - vma->vm_start); | ||
1172 | |||
1173 | if (pgaddr == ureg) | ||
1174 | ret = mmap_ureg(vma, dd, ureg); | ||
1175 | else if (pgaddr == pd->port_piobufs) | ||
1176 | ret = mmap_piobufs(vma, dd, pd); | ||
1177 | else if (pgaddr == (u64) pd->port_rcvegr_phys) | ||
1178 | ret = mmap_rcvegrbufs(vma, pd); | ||
1179 | else if (pgaddr == (u64) pd->port_rcvhdrq_phys) | ||
1180 | ret = mmap_rcvhdrq(vma, pd); | ||
1181 | else if (pgaddr == dd->ipath_pioavailregs_phys) | ||
1182 | ret = mmap_pioavailregs(vma, pd); | ||
1183 | else | ||
1184 | ret = -EINVAL; | ||
1185 | |||
1186 | vma->vm_private_data = NULL; | ||
1187 | |||
1188 | if (ret < 0) | ||
1189 | dev_info(&dd->pcidev->dev, | ||
1190 | "Failure %d on addr %lx, off %lx\n", | ||
1191 | -ret, vma->vm_start, vma->vm_pgoff); | ||
1192 | |||
1193 | return ret; | ||
1194 | } | ||
1195 | |||
1196 | static unsigned int ipath_poll(struct file *fp, | ||
1197 | struct poll_table_struct *pt) | ||
1198 | { | ||
1199 | struct ipath_portdata *pd; | ||
1200 | u32 head, tail; | ||
1201 | int bit; | ||
1202 | struct ipath_devdata *dd; | ||
1203 | |||
1204 | pd = port_fp(fp); | ||
1205 | dd = pd->port_dd; | ||
1206 | |||
1207 | bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT; | ||
1208 | set_bit(bit, &dd->ipath_rcvctrl); | ||
1209 | |||
1210 | /* | ||
1211 | * Before blocking, make sure that head is still == tail, | ||
1212 | * reading from the chip, so we can be sure the interrupt | ||
1213 | * enable has made it to the chip. If not equal, disable | ||
1214 | * interrupt again and return immediately. This avoids races, | ||
1215 | * and the overhead of the chip read doesn't matter much at | ||
1216 | * this point, since we are waiting for something anyway. | ||
1217 | */ | ||
1218 | |||
1219 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, | ||
1220 | dd->ipath_rcvctrl); | ||
1221 | |||
1222 | head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); | ||
1223 | tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); | ||
1224 | |||
1225 | if (tail == head) { | ||
1226 | set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); | ||
1227 | poll_wait(fp, &pd->port_wait, pt); | ||
1228 | |||
1229 | if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) { | ||
1230 | /* timed out, no packets received */ | ||
1231 | clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); | ||
1232 | pd->port_rcvwait_to++; | ||
1233 | } | ||
1234 | } | ||
1235 | else { | ||
1236 | /* it's already happened; don't do wait_event overhead */ | ||
1237 | pd->port_rcvnowait++; | ||
1238 | } | ||
1239 | |||
1240 | clear_bit(bit, &dd->ipath_rcvctrl); | ||
1241 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, | ||
1242 | dd->ipath_rcvctrl); | ||
1243 | |||
1244 | return 0; | ||
1245 | } | ||
1246 | |||
1247 | static int try_alloc_port(struct ipath_devdata *dd, int port, | ||
1248 | struct file *fp) | ||
1249 | { | ||
1250 | int ret; | ||
1251 | |||
1252 | if (!dd->ipath_pd[port]) { | ||
1253 | void *p, *ptmp; | ||
1254 | |||
1255 | p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); | ||
1256 | |||
1257 | /* | ||
1258 | * Allocate memory for use in ipath_tid_update() just once | ||
1259 | * at open, not per call. Reduces cost of expected send | ||
1260 | * setup. | ||
1261 | */ | ||
1262 | ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) + | ||
1263 | dd->ipath_rcvtidcnt * sizeof(struct page **), | ||
1264 | GFP_KERNEL); | ||
1265 | if (!p || !ptmp) { | ||
1266 | ipath_dev_err(dd, "Unable to allocate portdata " | ||
1267 | "memory, failing open\n"); | ||
1268 | ret = -ENOMEM; | ||
1269 | kfree(p); | ||
1270 | kfree(ptmp); | ||
1271 | goto bail; | ||
1272 | } | ||
1273 | dd->ipath_pd[port] = p; | ||
1274 | dd->ipath_pd[port]->port_port = port; | ||
1275 | dd->ipath_pd[port]->port_dd = dd; | ||
1276 | dd->ipath_pd[port]->port_tid_pg_list = ptmp; | ||
1277 | init_waitqueue_head(&dd->ipath_pd[port]->port_wait); | ||
1278 | } | ||
1279 | if (!dd->ipath_pd[port]->port_cnt) { | ||
1280 | dd->ipath_pd[port]->port_cnt = 1; | ||
1281 | fp->private_data = (void *) dd->ipath_pd[port]; | ||
1282 | ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n", | ||
1283 | current->comm, current->pid, dd->ipath_unit, | ||
1284 | port); | ||
1285 | dd->ipath_pd[port]->port_pid = current->pid; | ||
1286 | strncpy(dd->ipath_pd[port]->port_comm, current->comm, | ||
1287 | sizeof(dd->ipath_pd[port]->port_comm)); | ||
1288 | ipath_stats.sps_ports++; | ||
1289 | ret = 0; | ||
1290 | goto bail; | ||
1291 | } | ||
1292 | ret = -EBUSY; | ||
1293 | |||
1294 | bail: | ||
1295 | return ret; | ||
1296 | } | ||
1297 | |||
1298 | static inline int usable(struct ipath_devdata *dd) | ||
1299 | { | ||
1300 | return dd && | ||
1301 | (dd->ipath_flags & IPATH_PRESENT) && | ||
1302 | dd->ipath_kregbase && | ||
1303 | dd->ipath_lid && | ||
1304 | !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED | ||
1305 | | IPATH_LINKUNK)); | ||
1306 | } | ||
1307 | |||
1308 | static int find_free_port(int unit, struct file *fp) | ||
1309 | { | ||
1310 | struct ipath_devdata *dd = ipath_lookup(unit); | ||
1311 | int ret, i; | ||
1312 | |||
1313 | if (!dd) { | ||
1314 | ret = -ENODEV; | ||
1315 | goto bail; | ||
1316 | } | ||
1317 | |||
1318 | if (!usable(dd)) { | ||
1319 | ret = -ENETDOWN; | ||
1320 | goto bail; | ||
1321 | } | ||
1322 | |||
1323 | for (i = 0; i < dd->ipath_cfgports; i++) { | ||
1324 | ret = try_alloc_port(dd, i, fp); | ||
1325 | if (ret != -EBUSY) | ||
1326 | goto bail; | ||
1327 | } | ||
1328 | ret = -EBUSY; | ||
1329 | |||
1330 | bail: | ||
1331 | return ret; | ||
1332 | } | ||
1333 | |||
1334 | static int find_best_unit(struct file *fp) | ||
1335 | { | ||
1336 | int ret = 0, i, prefunit = -1, devmax; | ||
1337 | int maxofallports, npresent, nup; | ||
1338 | int ndev; | ||
1339 | |||
1340 | (void) ipath_count_units(&npresent, &nup, &maxofallports); | ||
1341 | |||
1342 | /* | ||
1343 | * This code is present to allow a knowledgeable person to | ||
1344 | * specify the layout of processes to processors before opening | ||
1345 | * this driver, and then we'll assign the process to the "closest" | ||
1346 | * HT-400 to that processor (we assume reasonable connectivity, | ||
1347 | * for now). This code assumes that if affinity has been set | ||
1348 | * before this point, that at most one cpu is set; for now this | ||
1349 | * is reasonable. I check for both cpus_empty() and cpus_full(), | ||
1350 | * in case some kernel variant sets none of the bits when no | ||
1351 | * affinity is set. 2.6.11 and 12 kernels have all present | ||
1352 | * cpus set. Some day we'll have to fix it up further to handle | ||
1353 | * a cpu subset. This algorithm fails for two HT-400's connected | ||
1354 | * in tunnel fashion. Eventually this needs real topology | ||
1355 | * information. There may be some issues with dual core numbering | ||
1356 | * as well. This needs more work prior to release. | ||
1357 | */ | ||
1358 | if (!cpus_empty(current->cpus_allowed) && | ||
1359 | !cpus_full(current->cpus_allowed)) { | ||
1360 | int ncpus = num_online_cpus(), curcpu = -1; | ||
1361 | for (i = 0; i < ncpus; i++) | ||
1362 | if (cpu_isset(i, current->cpus_allowed)) { | ||
1363 | ipath_cdbg(PROC, "%s[%u] affinity set for " | ||
1364 | "cpu %d\n", current->comm, | ||
1365 | current->pid, i); | ||
1366 | curcpu = i; | ||
1367 | } | ||
1368 | if (curcpu != -1) { | ||
1369 | if (npresent) { | ||
1370 | prefunit = curcpu / (ncpus / npresent); | ||
1371 | ipath_dbg("%s[%u] %d chips, %d cpus, " | ||
1372 | "%d cpus/chip, select unit %d\n", | ||
1373 | current->comm, current->pid, | ||
1374 | npresent, ncpus, ncpus / npresent, | ||
1375 | prefunit); | ||
1376 | } | ||
1377 | } | ||
1378 | } | ||
1379 | |||
1380 | /* | ||
1381 | * user ports start at 1, kernel port is 0 | ||
1382 | * For now, we do round-robin access across all chips | ||
1383 | */ | ||
1384 | |||
1385 | if (prefunit != -1) | ||
1386 | devmax = prefunit + 1; | ||
1387 | else | ||
1388 | devmax = ipath_count_units(NULL, NULL, NULL); | ||
1389 | recheck: | ||
1390 | for (i = 1; i < maxofallports; i++) { | ||
1391 | for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax; | ||
1392 | ndev++) { | ||
1393 | struct ipath_devdata *dd = ipath_lookup(ndev); | ||
1394 | |||
1395 | if (!usable(dd)) | ||
1396 | continue; /* can't use this unit */ | ||
1397 | if (i >= dd->ipath_cfgports) | ||
1398 | /* | ||
1399 | * Maxed out on users of this unit. Try | ||
1400 | * next. | ||
1401 | */ | ||
1402 | continue; | ||
1403 | ret = try_alloc_port(dd, i, fp); | ||
1404 | if (!ret) | ||
1405 | goto done; | ||
1406 | } | ||
1407 | } | ||
1408 | |||
1409 | if (npresent) { | ||
1410 | if (nup == 0) { | ||
1411 | ret = -ENETDOWN; | ||
1412 | ipath_dbg("No ports available (none initialized " | ||
1413 | "and ready)\n"); | ||
1414 | } else { | ||
1415 | if (prefunit > 0) { | ||
1416 | /* if started above 0, retry from 0 */ | ||
1417 | ipath_cdbg(PROC, | ||
1418 | "%s[%u] no ports on prefunit " | ||
1419 | "%d, clear and re-check\n", | ||
1420 | current->comm, current->pid, | ||
1421 | prefunit); | ||
1422 | devmax = ipath_count_units(NULL, NULL, | ||
1423 | NULL); | ||
1424 | prefunit = -1; | ||
1425 | goto recheck; | ||
1426 | } | ||
1427 | ret = -EBUSY; | ||
1428 | ipath_dbg("No ports available\n"); | ||
1429 | } | ||
1430 | } else { | ||
1431 | ret = -ENXIO; | ||
1432 | ipath_dbg("No boards found\n"); | ||
1433 | } | ||
1434 | |||
1435 | done: | ||
1436 | return ret; | ||
1437 | } | ||
1438 | |||
1439 | static int ipath_open(struct inode *in, struct file *fp) | ||
1440 | { | ||
1441 | int ret, minor; | ||
1442 | |||
1443 | mutex_lock(&ipath_mutex); | ||
1444 | |||
1445 | minor = iminor(in); | ||
1446 | ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", | ||
1447 | (long)in->i_rdev, minor); | ||
1448 | |||
1449 | if (minor) | ||
1450 | ret = find_free_port(minor - 1, fp); | ||
1451 | else | ||
1452 | ret = find_best_unit(fp); | ||
1453 | |||
1454 | mutex_unlock(&ipath_mutex); | ||
1455 | return ret; | ||
1456 | } | ||
1457 | |||
1458 | /** | ||
1459 | * unlock_exptid - unlock any expected TID entries port still had in use | ||
1460 | * @pd: port | ||
1461 | * | ||
1462 | * We don't actually update the chip here, because we do a bulk update | ||
1463 | * below, using ipath_f_clear_tids. | ||
1464 | */ | ||
1465 | static void unlock_expected_tids(struct ipath_portdata *pd) | ||
1466 | { | ||
1467 | struct ipath_devdata *dd = pd->port_dd; | ||
1468 | int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt; | ||
1469 | int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt; | ||
1470 | |||
1471 | ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n", | ||
1472 | pd->port_port); | ||
1473 | for (i = port_tidbase; i < maxtid; i++) { | ||
1474 | if (!dd->ipath_pageshadow[i]) | ||
1475 | continue; | ||
1476 | |||
1477 | ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i], | ||
1478 | 1); | ||
1479 | dd->ipath_pageshadow[i] = NULL; | ||
1480 | cnt++; | ||
1481 | ipath_stats.sps_pageunlocks++; | ||
1482 | } | ||
1483 | if (cnt) | ||
1484 | ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n", | ||
1485 | pd->port_port, cnt); | ||
1486 | |||
1487 | if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks) | ||
1488 | ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n", | ||
1489 | (unsigned long long) ipath_stats.sps_pagelocks, | ||
1490 | (unsigned long long) | ||
1491 | ipath_stats.sps_pageunlocks); | ||
1492 | } | ||
1493 | |||
1494 | static int ipath_close(struct inode *in, struct file *fp) | ||
1495 | { | ||
1496 | int ret = 0; | ||
1497 | struct ipath_portdata *pd; | ||
1498 | struct ipath_devdata *dd; | ||
1499 | unsigned port; | ||
1500 | |||
1501 | ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", | ||
1502 | (long)in->i_rdev, fp->private_data); | ||
1503 | |||
1504 | mutex_lock(&ipath_mutex); | ||
1505 | |||
1506 | pd = port_fp(fp); | ||
1507 | port = pd->port_port; | ||
1508 | fp->private_data = NULL; | ||
1509 | dd = pd->port_dd; | ||
1510 | |||
1511 | if (pd->port_hdrqfull) { | ||
1512 | ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " | ||
1513 | "during run\n", pd->port_comm, pd->port_pid, | ||
1514 | pd->port_hdrqfull); | ||
1515 | pd->port_hdrqfull = 0; | ||
1516 | } | ||
1517 | |||
1518 | if (pd->port_rcvwait_to || pd->port_piowait_to | ||
1519 | || pd->port_rcvnowait || pd->port_pionowait) { | ||
1520 | ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; " | ||
1521 | "%u rcv %u, pio already\n", | ||
1522 | pd->port_port, pd->port_rcvwait_to, | ||
1523 | pd->port_piowait_to, pd->port_rcvnowait, | ||
1524 | pd->port_pionowait); | ||
1525 | pd->port_rcvwait_to = pd->port_piowait_to = | ||
1526 | pd->port_rcvnowait = pd->port_pionowait = 0; | ||
1527 | } | ||
1528 | if (pd->port_flag) { | ||
1529 | ipath_dbg("port %u port_flag still set to 0x%lx\n", | ||
1530 | pd->port_port, pd->port_flag); | ||
1531 | pd->port_flag = 0; | ||
1532 | } | ||
1533 | |||
1534 | if (dd->ipath_kregbase) { | ||
1535 | if (pd->port_rcvhdrtail_uaddr) { | ||
1536 | pd->port_rcvhdrtail_uaddr = 0; | ||
1537 | pd->port_rcvhdrtail_kvaddr = NULL; | ||
1538 | ipath_release_user_pages_on_close( | ||
1539 | &pd->port_rcvhdrtail_pagep, 1); | ||
1540 | pd->port_rcvhdrtail_pagep = NULL; | ||
1541 | ipath_stats.sps_pageunlocks++; | ||
1542 | } | ||
1543 | ipath_write_kreg_port( | ||
1544 | dd, dd->ipath_kregs->kr_rcvhdrtailaddr, | ||
1545 | port, 0ULL); | ||
1546 | ipath_write_kreg_port( | ||
1547 | dd, dd->ipath_kregs->kr_rcvhdraddr, | ||
1548 | pd->port_port, 0); | ||
1549 | |||
1550 | /* clean up the pkeys for this port user */ | ||
1551 | ipath_clean_part_key(pd, dd); | ||
1552 | |||
1553 | if (port < dd->ipath_cfgports) { | ||
1554 | int i = dd->ipath_pbufsport * (port - 1); | ||
1555 | ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); | ||
1556 | |||
1557 | /* atomically clear receive enable port. */ | ||
1558 | clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port, | ||
1559 | &dd->ipath_rcvctrl); | ||
1560 | ipath_write_kreg( | ||
1561 | dd, | ||
1562 | dd->ipath_kregs->kr_rcvctrl, | ||
1563 | dd->ipath_rcvctrl); | ||
1564 | |||
1565 | if (dd->ipath_pageshadow) | ||
1566 | unlock_expected_tids(pd); | ||
1567 | ipath_stats.sps_ports--; | ||
1568 | ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", | ||
1569 | pd->port_comm, pd->port_pid, | ||
1570 | dd->ipath_unit, port); | ||
1571 | } | ||
1572 | } | ||
1573 | |||
1574 | pd->port_cnt = 0; | ||
1575 | pd->port_pid = 0; | ||
1576 | |||
1577 | dd->ipath_f_clear_tids(dd, pd->port_port); | ||
1578 | |||
1579 | ipath_free_pddata(dd, pd->port_port, 0); | ||
1580 | |||
1581 | mutex_unlock(&ipath_mutex); | ||
1582 | |||
1583 | return ret; | ||
1584 | } | ||
1585 | |||
1586 | static int ipath_port_info(struct ipath_portdata *pd, | ||
1587 | struct ipath_port_info __user *uinfo) | ||
1588 | { | ||
1589 | struct ipath_port_info info; | ||
1590 | int nup; | ||
1591 | int ret; | ||
1592 | |||
1593 | (void) ipath_count_units(NULL, &nup, NULL); | ||
1594 | info.num_active = nup; | ||
1595 | info.unit = pd->port_dd->ipath_unit; | ||
1596 | info.port = pd->port_port; | ||
1597 | |||
1598 | if (copy_to_user(uinfo, &info, sizeof(info))) { | ||
1599 | ret = -EFAULT; | ||
1600 | goto bail; | ||
1601 | } | ||
1602 | ret = 0; | ||
1603 | |||
1604 | bail: | ||
1605 | return ret; | ||
1606 | } | ||
1607 | |||
1608 | static ssize_t ipath_write(struct file *fp, const char __user *data, | ||
1609 | size_t count, loff_t *off) | ||
1610 | { | ||
1611 | const struct ipath_cmd __user *ucmd; | ||
1612 | struct ipath_portdata *pd; | ||
1613 | const void __user *src; | ||
1614 | size_t consumed, copy; | ||
1615 | struct ipath_cmd cmd; | ||
1616 | ssize_t ret = 0; | ||
1617 | void *dest; | ||
1618 | |||
1619 | if (count < sizeof(cmd.type)) { | ||
1620 | ret = -EINVAL; | ||
1621 | goto bail; | ||
1622 | } | ||
1623 | |||
1624 | ucmd = (const struct ipath_cmd __user *) data; | ||
1625 | |||
1626 | if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) { | ||
1627 | ret = -EFAULT; | ||
1628 | goto bail; | ||
1629 | } | ||
1630 | |||
1631 | consumed = sizeof(cmd.type); | ||
1632 | |||
1633 | switch (cmd.type) { | ||
1634 | case IPATH_CMD_USER_INIT: | ||
1635 | copy = sizeof(cmd.cmd.user_info); | ||
1636 | dest = &cmd.cmd.user_info; | ||
1637 | src = &ucmd->cmd.user_info; | ||
1638 | break; | ||
1639 | case IPATH_CMD_RECV_CTRL: | ||
1640 | copy = sizeof(cmd.cmd.recv_ctrl); | ||
1641 | dest = &cmd.cmd.recv_ctrl; | ||
1642 | src = &ucmd->cmd.recv_ctrl; | ||
1643 | break; | ||
1644 | case IPATH_CMD_PORT_INFO: | ||
1645 | copy = sizeof(cmd.cmd.port_info); | ||
1646 | dest = &cmd.cmd.port_info; | ||
1647 | src = &ucmd->cmd.port_info; | ||
1648 | break; | ||
1649 | case IPATH_CMD_TID_UPDATE: | ||
1650 | case IPATH_CMD_TID_FREE: | ||
1651 | copy = sizeof(cmd.cmd.tid_info); | ||
1652 | dest = &cmd.cmd.tid_info; | ||
1653 | src = &ucmd->cmd.tid_info; | ||
1654 | break; | ||
1655 | case IPATH_CMD_SET_PART_KEY: | ||
1656 | copy = sizeof(cmd.cmd.part_key); | ||
1657 | dest = &cmd.cmd.part_key; | ||
1658 | src = &ucmd->cmd.part_key; | ||
1659 | break; | ||
1660 | default: | ||
1661 | ret = -EINVAL; | ||
1662 | goto bail; | ||
1663 | } | ||
1664 | |||
1665 | if ((count - consumed) < copy) { | ||
1666 | ret = -EINVAL; | ||
1667 | goto bail; | ||
1668 | } | ||
1669 | |||
1670 | if (copy_from_user(dest, src, copy)) { | ||
1671 | ret = -EFAULT; | ||
1672 | goto bail; | ||
1673 | } | ||
1674 | |||
1675 | consumed += copy; | ||
1676 | pd = port_fp(fp); | ||
1677 | |||
1678 | switch (cmd.type) { | ||
1679 | case IPATH_CMD_USER_INIT: | ||
1680 | ret = ipath_do_user_init(pd, &cmd.cmd.user_info); | ||
1681 | if (ret < 0) | ||
1682 | goto bail; | ||
1683 | ret = ipath_get_base_info( | ||
1684 | pd, (void __user *) (unsigned long) | ||
1685 | cmd.cmd.user_info.spu_base_info, | ||
1686 | cmd.cmd.user_info.spu_base_info_size); | ||
1687 | break; | ||
1688 | case IPATH_CMD_RECV_CTRL: | ||
1689 | ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl); | ||
1690 | break; | ||
1691 | case IPATH_CMD_PORT_INFO: | ||
1692 | ret = ipath_port_info(pd, | ||
1693 | (struct ipath_port_info __user *) | ||
1694 | (unsigned long) cmd.cmd.port_info); | ||
1695 | break; | ||
1696 | case IPATH_CMD_TID_UPDATE: | ||
1697 | ret = ipath_tid_update(pd, &cmd.cmd.tid_info); | ||
1698 | break; | ||
1699 | case IPATH_CMD_TID_FREE: | ||
1700 | ret = ipath_tid_free(pd, &cmd.cmd.tid_info); | ||
1701 | break; | ||
1702 | case IPATH_CMD_SET_PART_KEY: | ||
1703 | ret = ipath_set_part_key(pd, cmd.cmd.part_key); | ||
1704 | break; | ||
1705 | } | ||
1706 | |||
1707 | if (ret >= 0) | ||
1708 | ret = consumed; | ||
1709 | |||
1710 | bail: | ||
1711 | return ret; | ||
1712 | } | ||
1713 | |||
1714 | static struct class *ipath_class; | ||
1715 | |||
1716 | static int init_cdev(int minor, char *name, struct file_operations *fops, | ||
1717 | struct cdev **cdevp, struct class_device **class_devp) | ||
1718 | { | ||
1719 | const dev_t dev = MKDEV(IPATH_MAJOR, minor); | ||
1720 | struct cdev *cdev = NULL; | ||
1721 | struct class_device *class_dev = NULL; | ||
1722 | int ret; | ||
1723 | |||
1724 | cdev = cdev_alloc(); | ||
1725 | if (!cdev) { | ||
1726 | printk(KERN_ERR IPATH_DRV_NAME | ||
1727 | ": Could not allocate cdev for minor %d, %s\n", | ||
1728 | minor, name); | ||
1729 | ret = -ENOMEM; | ||
1730 | goto done; | ||
1731 | } | ||
1732 | |||
1733 | cdev->owner = THIS_MODULE; | ||
1734 | cdev->ops = fops; | ||
1735 | kobject_set_name(&cdev->kobj, name); | ||
1736 | |||
1737 | ret = cdev_add(cdev, dev, 1); | ||
1738 | if (ret < 0) { | ||
1739 | printk(KERN_ERR IPATH_DRV_NAME | ||
1740 | ": Could not add cdev for minor %d, %s (err %d)\n", | ||
1741 | minor, name, -ret); | ||
1742 | goto err_cdev; | ||
1743 | } | ||
1744 | |||
1745 | class_dev = class_device_create(ipath_class, NULL, dev, NULL, name); | ||
1746 | |||
1747 | if (IS_ERR(class_dev)) { | ||
1748 | ret = PTR_ERR(class_dev); | ||
1749 | printk(KERN_ERR IPATH_DRV_NAME ": Could not create " | ||
1750 | "class_dev for minor %d, %s (err %d)\n", | ||
1751 | minor, name, -ret); | ||
1752 | goto err_cdev; | ||
1753 | } | ||
1754 | |||
1755 | goto done; | ||
1756 | |||
1757 | err_cdev: | ||
1758 | cdev_del(cdev); | ||
1759 | cdev = NULL; | ||
1760 | |||
1761 | done: | ||
1762 | if (ret >= 0) { | ||
1763 | *cdevp = cdev; | ||
1764 | *class_devp = class_dev; | ||
1765 | } else { | ||
1766 | *cdevp = NULL; | ||
1767 | *class_devp = NULL; | ||
1768 | } | ||
1769 | |||
1770 | return ret; | ||
1771 | } | ||
1772 | |||
1773 | int ipath_cdev_init(int minor, char *name, struct file_operations *fops, | ||
1774 | struct cdev **cdevp, struct class_device **class_devp) | ||
1775 | { | ||
1776 | return init_cdev(minor, name, fops, cdevp, class_devp); | ||
1777 | } | ||
1778 | |||
1779 | static void cleanup_cdev(struct cdev **cdevp, | ||
1780 | struct class_device **class_devp) | ||
1781 | { | ||
1782 | struct class_device *class_dev = *class_devp; | ||
1783 | |||
1784 | if (class_dev) { | ||
1785 | class_device_unregister(class_dev); | ||
1786 | *class_devp = NULL; | ||
1787 | } | ||
1788 | |||
1789 | if (*cdevp) { | ||
1790 | cdev_del(*cdevp); | ||
1791 | *cdevp = NULL; | ||
1792 | } | ||
1793 | } | ||
1794 | |||
1795 | void ipath_cdev_cleanup(struct cdev **cdevp, | ||
1796 | struct class_device **class_devp) | ||
1797 | { | ||
1798 | cleanup_cdev(cdevp, class_devp); | ||
1799 | } | ||
1800 | |||
1801 | static struct cdev *wildcard_cdev; | ||
1802 | static struct class_device *wildcard_class_dev; | ||
1803 | |||
1804 | static const dev_t dev = MKDEV(IPATH_MAJOR, 0); | ||
1805 | |||
1806 | static int user_init(void) | ||
1807 | { | ||
1808 | int ret; | ||
1809 | |||
1810 | ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME); | ||
1811 | if (ret < 0) { | ||
1812 | printk(KERN_ERR IPATH_DRV_NAME ": Could not register " | ||
1813 | "chrdev region (err %d)\n", -ret); | ||
1814 | goto done; | ||
1815 | } | ||
1816 | |||
1817 | ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME); | ||
1818 | |||
1819 | if (IS_ERR(ipath_class)) { | ||
1820 | ret = PTR_ERR(ipath_class); | ||
1821 | printk(KERN_ERR IPATH_DRV_NAME ": Could not create " | ||
1822 | "device class (err %d)\n", -ret); | ||
1823 | goto bail; | ||
1824 | } | ||
1825 | |||
1826 | goto done; | ||
1827 | bail: | ||
1828 | unregister_chrdev_region(dev, IPATH_NMINORS); | ||
1829 | done: | ||
1830 | return ret; | ||
1831 | } | ||
1832 | |||
1833 | static void user_cleanup(void) | ||
1834 | { | ||
1835 | if (ipath_class) { | ||
1836 | class_destroy(ipath_class); | ||
1837 | ipath_class = NULL; | ||
1838 | } | ||
1839 | |||
1840 | unregister_chrdev_region(dev, IPATH_NMINORS); | ||
1841 | } | ||
1842 | |||
1843 | static atomic_t user_count = ATOMIC_INIT(0); | ||
1844 | static atomic_t user_setup = ATOMIC_INIT(0); | ||
1845 | |||
1846 | int ipath_user_add(struct ipath_devdata *dd) | ||
1847 | { | ||
1848 | char name[10]; | ||
1849 | int ret; | ||
1850 | |||
1851 | if (atomic_inc_return(&user_count) == 1) { | ||
1852 | ret = user_init(); | ||
1853 | if (ret < 0) { | ||
1854 | ipath_dev_err(dd, "Unable to set up user support: " | ||
1855 | "error %d\n", -ret); | ||
1856 | goto bail; | ||
1857 | } | ||
1858 | ret = ipath_diag_init(); | ||
1859 | if (ret < 0) { | ||
1860 | ipath_dev_err(dd, "Unable to set up diag support: " | ||
1861 | "error %d\n", -ret); | ||
1862 | goto bail_sma; | ||
1863 | } | ||
1864 | |||
1865 | ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev, | ||
1866 | &wildcard_class_dev); | ||
1867 | if (ret < 0) { | ||
1868 | ipath_dev_err(dd, "Could not create wildcard " | ||
1869 | "minor: error %d\n", -ret); | ||
1870 | goto bail_diag; | ||
1871 | } | ||
1872 | |||
1873 | atomic_set(&user_setup, 1); | ||
1874 | } | ||
1875 | |||
1876 | snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit); | ||
1877 | |||
1878 | ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops, | ||
1879 | &dd->cdev, &dd->class_dev); | ||
1880 | if (ret < 0) | ||
1881 | ipath_dev_err(dd, "Could not create user minor %d, %s\n", | ||
1882 | dd->ipath_unit + 1, name); | ||
1883 | |||
1884 | goto bail; | ||
1885 | |||
1886 | bail_diag: | ||
1887 | ipath_diag_cleanup(); | ||
1888 | bail_sma: | ||
1889 | user_cleanup(); | ||
1890 | bail: | ||
1891 | return ret; | ||
1892 | } | ||
1893 | |||
1894 | void ipath_user_del(struct ipath_devdata *dd) | ||
1895 | { | ||
1896 | cleanup_cdev(&dd->cdev, &dd->class_dev); | ||
1897 | |||
1898 | if (atomic_dec_return(&user_count) == 0) { | ||
1899 | if (atomic_read(&user_setup) == 0) | ||
1900 | goto bail; | ||
1901 | |||
1902 | cleanup_cdev(&wildcard_cdev, &wildcard_class_dev); | ||
1903 | ipath_diag_cleanup(); | ||
1904 | user_cleanup(); | ||
1905 | |||
1906 | atomic_set(&user_setup, 0); | ||
1907 | } | ||
1908 | bail: | ||
1909 | return; | ||
1910 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c new file mode 100644 index 000000000000..e274120567e1 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_fs.c | |||
@@ -0,0 +1,605 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/version.h> | ||
34 | #include <linux/config.h> | ||
35 | #include <linux/module.h> | ||
36 | #include <linux/fs.h> | ||
37 | #include <linux/mount.h> | ||
38 | #include <linux/pagemap.h> | ||
39 | #include <linux/init.h> | ||
40 | #include <linux/namei.h> | ||
41 | #include <linux/pci.h> | ||
42 | |||
43 | #include "ipath_kernel.h" | ||
44 | |||
45 | #define IPATHFS_MAGIC 0x726a77 | ||
46 | |||
47 | static struct super_block *ipath_super; | ||
48 | |||
49 | static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, | ||
50 | int mode, struct file_operations *fops, | ||
51 | void *data) | ||
52 | { | ||
53 | int error; | ||
54 | struct inode *inode = new_inode(dir->i_sb); | ||
55 | |||
56 | if (!inode) { | ||
57 | error = -EPERM; | ||
58 | goto bail; | ||
59 | } | ||
60 | |||
61 | inode->i_mode = mode; | ||
62 | inode->i_uid = 0; | ||
63 | inode->i_gid = 0; | ||
64 | inode->i_blksize = PAGE_CACHE_SIZE; | ||
65 | inode->i_blocks = 0; | ||
66 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
67 | inode->u.generic_ip = data; | ||
68 | if ((mode & S_IFMT) == S_IFDIR) { | ||
69 | inode->i_op = &simple_dir_inode_operations; | ||
70 | inode->i_nlink++; | ||
71 | dir->i_nlink++; | ||
72 | } | ||
73 | |||
74 | inode->i_fop = fops; | ||
75 | |||
76 | d_instantiate(dentry, inode); | ||
77 | error = 0; | ||
78 | |||
79 | bail: | ||
80 | return error; | ||
81 | } | ||
82 | |||
83 | static int create_file(const char *name, mode_t mode, | ||
84 | struct dentry *parent, struct dentry **dentry, | ||
85 | struct file_operations *fops, void *data) | ||
86 | { | ||
87 | int error; | ||
88 | |||
89 | *dentry = NULL; | ||
90 | mutex_lock(&parent->d_inode->i_mutex); | ||
91 | *dentry = lookup_one_len(name, parent, strlen(name)); | ||
92 | if (!IS_ERR(dentry)) | ||
93 | error = ipathfs_mknod(parent->d_inode, *dentry, | ||
94 | mode, fops, data); | ||
95 | else | ||
96 | error = PTR_ERR(dentry); | ||
97 | mutex_unlock(&parent->d_inode->i_mutex); | ||
98 | |||
99 | return error; | ||
100 | } | ||
101 | |||
102 | static ssize_t atomic_stats_read(struct file *file, char __user *buf, | ||
103 | size_t count, loff_t *ppos) | ||
104 | { | ||
105 | return simple_read_from_buffer(buf, count, ppos, &ipath_stats, | ||
106 | sizeof ipath_stats); | ||
107 | } | ||
108 | |||
109 | static struct file_operations atomic_stats_ops = { | ||
110 | .read = atomic_stats_read, | ||
111 | }; | ||
112 | |||
113 | #define NUM_COUNTERS sizeof(struct infinipath_counters) / sizeof(u64) | ||
114 | |||
115 | static ssize_t atomic_counters_read(struct file *file, char __user *buf, | ||
116 | size_t count, loff_t *ppos) | ||
117 | { | ||
118 | u64 counters[NUM_COUNTERS]; | ||
119 | u16 i; | ||
120 | struct ipath_devdata *dd; | ||
121 | |||
122 | dd = file->f_dentry->d_inode->u.generic_ip; | ||
123 | |||
124 | for (i = 0; i < NUM_COUNTERS; i++) | ||
125 | counters[i] = ipath_snap_cntr(dd, i); | ||
126 | |||
127 | return simple_read_from_buffer(buf, count, ppos, counters, | ||
128 | sizeof counters); | ||
129 | } | ||
130 | |||
131 | static struct file_operations atomic_counters_ops = { | ||
132 | .read = atomic_counters_read, | ||
133 | }; | ||
134 | |||
135 | static ssize_t atomic_node_info_read(struct file *file, char __user *buf, | ||
136 | size_t count, loff_t *ppos) | ||
137 | { | ||
138 | u32 nodeinfo[10]; | ||
139 | struct ipath_devdata *dd; | ||
140 | u64 guid; | ||
141 | |||
142 | dd = file->f_dentry->d_inode->u.generic_ip; | ||
143 | |||
144 | guid = be64_to_cpu(dd->ipath_guid); | ||
145 | |||
146 | nodeinfo[0] = /* BaseVersion is SMA */ | ||
147 | /* ClassVersion is SMA */ | ||
148 | (1 << 8) /* NodeType */ | ||
149 | | (1 << 0); /* NumPorts */ | ||
150 | nodeinfo[1] = (u32) (guid >> 32); | ||
151 | nodeinfo[2] = (u32) (guid & 0xffffffff); | ||
152 | /* PortGUID == SystemImageGUID for us */ | ||
153 | nodeinfo[3] = nodeinfo[1]; | ||
154 | /* PortGUID == SystemImageGUID for us */ | ||
155 | nodeinfo[4] = nodeinfo[2]; | ||
156 | /* PortGUID == NodeGUID for us */ | ||
157 | nodeinfo[5] = nodeinfo[3]; | ||
158 | /* PortGUID == NodeGUID for us */ | ||
159 | nodeinfo[6] = nodeinfo[4]; | ||
160 | nodeinfo[7] = (4 << 16) /* we support 4 pkeys */ | ||
161 | | (dd->ipath_deviceid << 0); | ||
162 | /* our chip version as 16 bits major, 16 bits minor */ | ||
163 | nodeinfo[8] = dd->ipath_minrev | (dd->ipath_majrev << 16); | ||
164 | nodeinfo[9] = (dd->ipath_unit << 24) | (dd->ipath_vendorid << 0); | ||
165 | |||
166 | return simple_read_from_buffer(buf, count, ppos, nodeinfo, | ||
167 | sizeof nodeinfo); | ||
168 | } | ||
169 | |||
170 | static struct file_operations atomic_node_info_ops = { | ||
171 | .read = atomic_node_info_read, | ||
172 | }; | ||
173 | |||
174 | static ssize_t atomic_port_info_read(struct file *file, char __user *buf, | ||
175 | size_t count, loff_t *ppos) | ||
176 | { | ||
177 | u32 portinfo[13]; | ||
178 | u32 tmp, tmp2; | ||
179 | struct ipath_devdata *dd; | ||
180 | |||
181 | dd = file->f_dentry->d_inode->u.generic_ip; | ||
182 | |||
183 | /* so we only initialize non-zero fields. */ | ||
184 | memset(portinfo, 0, sizeof portinfo); | ||
185 | |||
186 | /* | ||
187 | * Notimpl yet M_Key (64) | ||
188 | * Notimpl yet GID (64) | ||
189 | */ | ||
190 | |||
191 | portinfo[4] = (dd->ipath_lid << 16); | ||
192 | |||
193 | /* | ||
194 | * Notimpl yet SMLID (should we store this in the driver, in case | ||
195 | * SMA dies?) CapabilityMask is 0, we don't support any of these | ||
196 | * DiagCode is 0; we don't store any diag info for now Notimpl yet | ||
197 | * M_KeyLeasePeriod (we don't support M_Key) | ||
198 | */ | ||
199 | |||
200 | /* LocalPortNum is whichever port number they ask for */ | ||
201 | portinfo[7] = (dd->ipath_unit << 24) | ||
202 | /* LinkWidthEnabled */ | ||
203 | | (2 << 16) | ||
204 | /* LinkWidthSupported (really 2, but not IB valid) */ | ||
205 | | (3 << 8) | ||
206 | /* LinkWidthActive */ | ||
207 | | (2 << 0); | ||
208 | tmp = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK; | ||
209 | tmp2 = 5; | ||
210 | if (tmp == IPATH_IBSTATE_INIT) | ||
211 | tmp = 2; | ||
212 | else if (tmp == IPATH_IBSTATE_ARM) | ||
213 | tmp = 3; | ||
214 | else if (tmp == IPATH_IBSTATE_ACTIVE) | ||
215 | tmp = 4; | ||
216 | else { | ||
217 | tmp = 0; /* down */ | ||
218 | tmp2 = tmp & 0xf; | ||
219 | } | ||
220 | |||
221 | portinfo[8] = (1 << 28) /* LinkSpeedSupported */ | ||
222 | | (tmp << 24) /* PortState */ | ||
223 | | (tmp2 << 20) /* PortPhysicalState */ | ||
224 | | (2 << 16) | ||
225 | |||
226 | /* LinkDownDefaultState */ | ||
227 | /* M_KeyProtectBits == 0 */ | ||
228 | /* NotImpl yet LMC == 0 (we can support all values) */ | ||
229 | | (1 << 4) /* LinkSpeedActive */ | ||
230 | | (1 << 0); /* LinkSpeedEnabled */ | ||
231 | switch (dd->ipath_ibmtu) { | ||
232 | case 4096: | ||
233 | tmp = 5; | ||
234 | break; | ||
235 | case 2048: | ||
236 | tmp = 4; | ||
237 | break; | ||
238 | case 1024: | ||
239 | tmp = 3; | ||
240 | break; | ||
241 | case 512: | ||
242 | tmp = 2; | ||
243 | break; | ||
244 | case 256: | ||
245 | tmp = 1; | ||
246 | break; | ||
247 | default: /* oops, something is wrong */ | ||
248 | ipath_dbg("Problem, ipath_ibmtu 0x%x not a valid IB MTU, " | ||
249 | "treat as 2048\n", dd->ipath_ibmtu); | ||
250 | tmp = 4; | ||
251 | break; | ||
252 | } | ||
253 | portinfo[9] = (tmp << 28) | ||
254 | /* NeighborMTU */ | ||
255 | /* Notimpl MasterSMSL */ | ||
256 | | (1 << 20) | ||
257 | |||
258 | /* VLCap */ | ||
259 | /* Notimpl InitType (actually, an SMA decision) */ | ||
260 | /* VLHighLimit is 0 (only one VL) */ | ||
261 | ; /* VLArbitrationHighCap is 0 (only one VL) */ | ||
262 | portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */ | ||
263 | /* InitTypeReply is SMA decision */ | ||
264 | (5 << 16) /* MTUCap 4096 */ | ||
265 | | (7 << 13) /* VLStallCount */ | ||
266 | | (0x1f << 8) /* HOQLife */ | ||
267 | | (1 << 4) | ||
268 | |||
269 | /* OperationalVLs 0 */ | ||
270 | /* PartitionEnforcementInbound */ | ||
271 | /* PartitionEnforcementOutbound not enforced */ | ||
272 | /* FilterRawinbound not enforced */ | ||
273 | ; /* FilterRawOutbound not enforced */ | ||
274 | /* M_KeyViolations are not counted by hardware, SMA can count */ | ||
275 | tmp = ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); | ||
276 | /* P_KeyViolations are counted by hardware. */ | ||
277 | portinfo[11] = ((tmp & 0xffff) << 0); | ||
278 | portinfo[12] = | ||
279 | /* Q_KeyViolations are not counted by hardware */ | ||
280 | (1 << 8) | ||
281 | |||
282 | /* GUIDCap */ | ||
283 | /* SubnetTimeOut handled by SMA */ | ||
284 | /* RespTimeValue handled by SMA */ | ||
285 | ; | ||
286 | /* LocalPhyErrors are programmed to max */ | ||
287 | portinfo[12] |= (0xf << 20) | ||
288 | | (0xf << 16) /* OverRunErrors are programmed to max */ | ||
289 | ; | ||
290 | |||
291 | return simple_read_from_buffer(buf, count, ppos, portinfo, | ||
292 | sizeof portinfo); | ||
293 | } | ||
294 | |||
295 | static struct file_operations atomic_port_info_ops = { | ||
296 | .read = atomic_port_info_read, | ||
297 | }; | ||
298 | |||
299 | static ssize_t flash_read(struct file *file, char __user *buf, | ||
300 | size_t count, loff_t *ppos) | ||
301 | { | ||
302 | struct ipath_devdata *dd; | ||
303 | ssize_t ret; | ||
304 | loff_t pos; | ||
305 | char *tmp; | ||
306 | |||
307 | pos = *ppos; | ||
308 | |||
309 | if ( pos < 0) { | ||
310 | ret = -EINVAL; | ||
311 | goto bail; | ||
312 | } | ||
313 | |||
314 | if (pos >= sizeof(struct ipath_flash)) { | ||
315 | ret = 0; | ||
316 | goto bail; | ||
317 | } | ||
318 | |||
319 | if (count > sizeof(struct ipath_flash) - pos) | ||
320 | count = sizeof(struct ipath_flash) - pos; | ||
321 | |||
322 | tmp = kmalloc(count, GFP_KERNEL); | ||
323 | if (!tmp) { | ||
324 | ret = -ENOMEM; | ||
325 | goto bail; | ||
326 | } | ||
327 | |||
328 | dd = file->f_dentry->d_inode->u.generic_ip; | ||
329 | if (ipath_eeprom_read(dd, pos, tmp, count)) { | ||
330 | ipath_dev_err(dd, "failed to read from flash\n"); | ||
331 | ret = -ENXIO; | ||
332 | goto bail_tmp; | ||
333 | } | ||
334 | |||
335 | if (copy_to_user(buf, tmp, count)) { | ||
336 | ret = -EFAULT; | ||
337 | goto bail_tmp; | ||
338 | } | ||
339 | |||
340 | *ppos = pos + count; | ||
341 | ret = count; | ||
342 | |||
343 | bail_tmp: | ||
344 | kfree(tmp); | ||
345 | |||
346 | bail: | ||
347 | return ret; | ||
348 | } | ||
349 | |||
350 | static ssize_t flash_write(struct file *file, const char __user *buf, | ||
351 | size_t count, loff_t *ppos) | ||
352 | { | ||
353 | struct ipath_devdata *dd; | ||
354 | ssize_t ret; | ||
355 | loff_t pos; | ||
356 | char *tmp; | ||
357 | |||
358 | pos = *ppos; | ||
359 | |||
360 | if ( pos < 0) { | ||
361 | ret = -EINVAL; | ||
362 | goto bail; | ||
363 | } | ||
364 | |||
365 | if (pos >= sizeof(struct ipath_flash)) { | ||
366 | ret = 0; | ||
367 | goto bail; | ||
368 | } | ||
369 | |||
370 | if (count > sizeof(struct ipath_flash) - pos) | ||
371 | count = sizeof(struct ipath_flash) - pos; | ||
372 | |||
373 | tmp = kmalloc(count, GFP_KERNEL); | ||
374 | if (!tmp) { | ||
375 | ret = -ENOMEM; | ||
376 | goto bail; | ||
377 | } | ||
378 | |||
379 | if (copy_from_user(tmp, buf, count)) { | ||
380 | ret = -EFAULT; | ||
381 | goto bail_tmp; | ||
382 | } | ||
383 | |||
384 | dd = file->f_dentry->d_inode->u.generic_ip; | ||
385 | if (ipath_eeprom_write(dd, pos, tmp, count)) { | ||
386 | ret = -ENXIO; | ||
387 | ipath_dev_err(dd, "failed to write to flash\n"); | ||
388 | goto bail_tmp; | ||
389 | } | ||
390 | |||
391 | *ppos = pos + count; | ||
392 | ret = count; | ||
393 | |||
394 | bail_tmp: | ||
395 | kfree(tmp); | ||
396 | |||
397 | bail: | ||
398 | return ret; | ||
399 | } | ||
400 | |||
401 | static struct file_operations flash_ops = { | ||
402 | .read = flash_read, | ||
403 | .write = flash_write, | ||
404 | }; | ||
405 | |||
406 | static int create_device_files(struct super_block *sb, | ||
407 | struct ipath_devdata *dd) | ||
408 | { | ||
409 | struct dentry *dir, *tmp; | ||
410 | char unit[10]; | ||
411 | int ret; | ||
412 | |||
413 | snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); | ||
414 | ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, | ||
415 | (struct file_operations *) &simple_dir_operations, | ||
416 | dd); | ||
417 | if (ret) { | ||
418 | printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret); | ||
419 | goto bail; | ||
420 | } | ||
421 | |||
422 | ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp, | ||
423 | &atomic_counters_ops, dd); | ||
424 | if (ret) { | ||
425 | printk(KERN_ERR "create_file(%s/atomic_counters) " | ||
426 | "failed: %d\n", unit, ret); | ||
427 | goto bail; | ||
428 | } | ||
429 | |||
430 | ret = create_file("node_info", S_IFREG|S_IRUGO, dir, &tmp, | ||
431 | &atomic_node_info_ops, dd); | ||
432 | if (ret) { | ||
433 | printk(KERN_ERR "create_file(%s/node_info) " | ||
434 | "failed: %d\n", unit, ret); | ||
435 | goto bail; | ||
436 | } | ||
437 | |||
438 | ret = create_file("port_info", S_IFREG|S_IRUGO, dir, &tmp, | ||
439 | &atomic_port_info_ops, dd); | ||
440 | if (ret) { | ||
441 | printk(KERN_ERR "create_file(%s/port_info) " | ||
442 | "failed: %d\n", unit, ret); | ||
443 | goto bail; | ||
444 | } | ||
445 | |||
446 | ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, | ||
447 | &flash_ops, dd); | ||
448 | if (ret) { | ||
449 | printk(KERN_ERR "create_file(%s/flash) " | ||
450 | "failed: %d\n", unit, ret); | ||
451 | goto bail; | ||
452 | } | ||
453 | |||
454 | bail: | ||
455 | return ret; | ||
456 | } | ||
457 | |||
458 | static void remove_file(struct dentry *parent, char *name) | ||
459 | { | ||
460 | struct dentry *tmp; | ||
461 | |||
462 | tmp = lookup_one_len(name, parent, strlen(name)); | ||
463 | |||
464 | spin_lock(&dcache_lock); | ||
465 | spin_lock(&tmp->d_lock); | ||
466 | if (!(d_unhashed(tmp) && tmp->d_inode)) { | ||
467 | dget_locked(tmp); | ||
468 | __d_drop(tmp); | ||
469 | spin_unlock(&tmp->d_lock); | ||
470 | spin_unlock(&dcache_lock); | ||
471 | simple_unlink(parent->d_inode, tmp); | ||
472 | } else { | ||
473 | spin_unlock(&tmp->d_lock); | ||
474 | spin_unlock(&dcache_lock); | ||
475 | } | ||
476 | } | ||
477 | |||
478 | static int remove_device_files(struct super_block *sb, | ||
479 | struct ipath_devdata *dd) | ||
480 | { | ||
481 | struct dentry *dir, *root; | ||
482 | char unit[10]; | ||
483 | int ret; | ||
484 | |||
485 | root = dget(sb->s_root); | ||
486 | mutex_lock(&root->d_inode->i_mutex); | ||
487 | snprintf(unit, sizeof unit, "%02d", dd->ipath_unit); | ||
488 | dir = lookup_one_len(unit, root, strlen(unit)); | ||
489 | |||
490 | if (IS_ERR(dir)) { | ||
491 | ret = PTR_ERR(dir); | ||
492 | printk(KERN_ERR "Lookup of %s failed\n", unit); | ||
493 | goto bail; | ||
494 | } | ||
495 | |||
496 | remove_file(dir, "flash"); | ||
497 | remove_file(dir, "port_info"); | ||
498 | remove_file(dir, "node_info"); | ||
499 | remove_file(dir, "atomic_counters"); | ||
500 | d_delete(dir); | ||
501 | ret = simple_rmdir(root->d_inode, dir); | ||
502 | |||
503 | bail: | ||
504 | mutex_unlock(&root->d_inode->i_mutex); | ||
505 | dput(root); | ||
506 | return ret; | ||
507 | } | ||
508 | |||
509 | static int ipathfs_fill_super(struct super_block *sb, void *data, | ||
510 | int silent) | ||
511 | { | ||
512 | struct ipath_devdata *dd, *tmp; | ||
513 | unsigned long flags; | ||
514 | int ret; | ||
515 | |||
516 | static struct tree_descr files[] = { | ||
517 | [1] = {"atomic_stats", &atomic_stats_ops, S_IRUGO}, | ||
518 | {""}, | ||
519 | }; | ||
520 | |||
521 | ret = simple_fill_super(sb, IPATHFS_MAGIC, files); | ||
522 | if (ret) { | ||
523 | printk(KERN_ERR "simple_fill_super failed: %d\n", ret); | ||
524 | goto bail; | ||
525 | } | ||
526 | |||
527 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
528 | |||
529 | list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { | ||
530 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
531 | ret = create_device_files(sb, dd); | ||
532 | if (ret) { | ||
533 | deactivate_super(sb); | ||
534 | goto bail; | ||
535 | } | ||
536 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
537 | } | ||
538 | |||
539 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
540 | |||
541 | bail: | ||
542 | return ret; | ||
543 | } | ||
544 | |||
545 | static struct super_block *ipathfs_get_sb(struct file_system_type *fs_type, | ||
546 | int flags, const char *dev_name, | ||
547 | void *data) | ||
548 | { | ||
549 | ipath_super = get_sb_single(fs_type, flags, data, | ||
550 | ipathfs_fill_super); | ||
551 | return ipath_super; | ||
552 | } | ||
553 | |||
554 | static void ipathfs_kill_super(struct super_block *s) | ||
555 | { | ||
556 | kill_litter_super(s); | ||
557 | ipath_super = NULL; | ||
558 | } | ||
559 | |||
560 | int ipathfs_add_device(struct ipath_devdata *dd) | ||
561 | { | ||
562 | int ret; | ||
563 | |||
564 | if (ipath_super == NULL) { | ||
565 | ret = 0; | ||
566 | goto bail; | ||
567 | } | ||
568 | |||
569 | ret = create_device_files(ipath_super, dd); | ||
570 | |||
571 | bail: | ||
572 | return ret; | ||
573 | } | ||
574 | |||
575 | int ipathfs_remove_device(struct ipath_devdata *dd) | ||
576 | { | ||
577 | int ret; | ||
578 | |||
579 | if (ipath_super == NULL) { | ||
580 | ret = 0; | ||
581 | goto bail; | ||
582 | } | ||
583 | |||
584 | ret = remove_device_files(ipath_super, dd); | ||
585 | |||
586 | bail: | ||
587 | return ret; | ||
588 | } | ||
589 | |||
590 | static struct file_system_type ipathfs_fs_type = { | ||
591 | .owner = THIS_MODULE, | ||
592 | .name = "ipathfs", | ||
593 | .get_sb = ipathfs_get_sb, | ||
594 | .kill_sb = ipathfs_kill_super, | ||
595 | }; | ||
596 | |||
597 | int __init ipath_init_ipathfs(void) | ||
598 | { | ||
599 | return register_filesystem(&ipathfs_fs_type); | ||
600 | } | ||
601 | |||
602 | void __exit ipath_exit_ipathfs(void) | ||
603 | { | ||
604 | unregister_filesystem(&ipathfs_fs_type); | ||
605 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_ht400.c new file mode 100644 index 000000000000..4652435998f3 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_ht400.c | |||
@@ -0,0 +1,1586 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | * This file contains all of the code that is specific to the InfiniPath | ||
35 | * HT-400 chip. | ||
36 | */ | ||
37 | |||
38 | #include <linux/pci.h> | ||
39 | #include <linux/delay.h> | ||
40 | |||
41 | #include "ipath_kernel.h" | ||
42 | #include "ipath_registers.h" | ||
43 | |||
44 | /* | ||
45 | * This lists the InfiniPath HT400 registers, in the actual chip layout. | ||
46 | * This structure should never be directly accessed. | ||
47 | * | ||
48 | * The names are in InterCap form because they're taken straight from | ||
49 | * the chip specification. Since they're only used in this file, they | ||
50 | * don't pollute the rest of the source. | ||
51 | */ | ||
52 | |||
53 | struct _infinipath_do_not_use_kernel_regs { | ||
54 | unsigned long long Revision; | ||
55 | unsigned long long Control; | ||
56 | unsigned long long PageAlign; | ||
57 | unsigned long long PortCnt; | ||
58 | unsigned long long DebugPortSelect; | ||
59 | unsigned long long DebugPort; | ||
60 | unsigned long long SendRegBase; | ||
61 | unsigned long long UserRegBase; | ||
62 | unsigned long long CounterRegBase; | ||
63 | unsigned long long Scratch; | ||
64 | unsigned long long ReservedMisc1; | ||
65 | unsigned long long InterruptConfig; | ||
66 | unsigned long long IntBlocked; | ||
67 | unsigned long long IntMask; | ||
68 | unsigned long long IntStatus; | ||
69 | unsigned long long IntClear; | ||
70 | unsigned long long ErrorMask; | ||
71 | unsigned long long ErrorStatus; | ||
72 | unsigned long long ErrorClear; | ||
73 | unsigned long long HwErrMask; | ||
74 | unsigned long long HwErrStatus; | ||
75 | unsigned long long HwErrClear; | ||
76 | unsigned long long HwDiagCtrl; | ||
77 | unsigned long long MDIO; | ||
78 | unsigned long long IBCStatus; | ||
79 | unsigned long long IBCCtrl; | ||
80 | unsigned long long ExtStatus; | ||
81 | unsigned long long ExtCtrl; | ||
82 | unsigned long long GPIOOut; | ||
83 | unsigned long long GPIOMask; | ||
84 | unsigned long long GPIOStatus; | ||
85 | unsigned long long GPIOClear; | ||
86 | unsigned long long RcvCtrl; | ||
87 | unsigned long long RcvBTHQP; | ||
88 | unsigned long long RcvHdrSize; | ||
89 | unsigned long long RcvHdrCnt; | ||
90 | unsigned long long RcvHdrEntSize; | ||
91 | unsigned long long RcvTIDBase; | ||
92 | unsigned long long RcvTIDCnt; | ||
93 | unsigned long long RcvEgrBase; | ||
94 | unsigned long long RcvEgrCnt; | ||
95 | unsigned long long RcvBufBase; | ||
96 | unsigned long long RcvBufSize; | ||
97 | unsigned long long RxIntMemBase; | ||
98 | unsigned long long RxIntMemSize; | ||
99 | unsigned long long RcvPartitionKey; | ||
100 | unsigned long long ReservedRcv[10]; | ||
101 | unsigned long long SendCtrl; | ||
102 | unsigned long long SendPIOBufBase; | ||
103 | unsigned long long SendPIOSize; | ||
104 | unsigned long long SendPIOBufCnt; | ||
105 | unsigned long long SendPIOAvailAddr; | ||
106 | unsigned long long TxIntMemBase; | ||
107 | unsigned long long TxIntMemSize; | ||
108 | unsigned long long ReservedSend[9]; | ||
109 | unsigned long long SendBufferError; | ||
110 | unsigned long long SendBufferErrorCONT1; | ||
111 | unsigned long long SendBufferErrorCONT2; | ||
112 | unsigned long long SendBufferErrorCONT3; | ||
113 | unsigned long long ReservedSBE[4]; | ||
114 | unsigned long long RcvHdrAddr0; | ||
115 | unsigned long long RcvHdrAddr1; | ||
116 | unsigned long long RcvHdrAddr2; | ||
117 | unsigned long long RcvHdrAddr3; | ||
118 | unsigned long long RcvHdrAddr4; | ||
119 | unsigned long long RcvHdrAddr5; | ||
120 | unsigned long long RcvHdrAddr6; | ||
121 | unsigned long long RcvHdrAddr7; | ||
122 | unsigned long long RcvHdrAddr8; | ||
123 | unsigned long long ReservedRHA[7]; | ||
124 | unsigned long long RcvHdrTailAddr0; | ||
125 | unsigned long long RcvHdrTailAddr1; | ||
126 | unsigned long long RcvHdrTailAddr2; | ||
127 | unsigned long long RcvHdrTailAddr3; | ||
128 | unsigned long long RcvHdrTailAddr4; | ||
129 | unsigned long long RcvHdrTailAddr5; | ||
130 | unsigned long long RcvHdrTailAddr6; | ||
131 | unsigned long long RcvHdrTailAddr7; | ||
132 | unsigned long long RcvHdrTailAddr8; | ||
133 | unsigned long long ReservedRHTA[7]; | ||
134 | unsigned long long Sync; /* Software only */ | ||
135 | unsigned long long Dump; /* Software only */ | ||
136 | unsigned long long SimVer; /* Software only */ | ||
137 | unsigned long long ReservedSW[5]; | ||
138 | unsigned long long SerdesConfig0; | ||
139 | unsigned long long SerdesConfig1; | ||
140 | unsigned long long SerdesStatus; | ||
141 | unsigned long long XGXSConfig; | ||
142 | unsigned long long ReservedSW2[4]; | ||
143 | }; | ||
144 | |||
145 | #define IPATH_KREG_OFFSET(field) (offsetof(struct \ | ||
146 | _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) | ||
147 | #define IPATH_CREG_OFFSET(field) (offsetof( \ | ||
148 | struct infinipath_counters, field) / sizeof(u64)) | ||
149 | |||
150 | static const struct ipath_kregs ipath_ht_kregs = { | ||
151 | .kr_control = IPATH_KREG_OFFSET(Control), | ||
152 | .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase), | ||
153 | .kr_debugport = IPATH_KREG_OFFSET(DebugPort), | ||
154 | .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect), | ||
155 | .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear), | ||
156 | .kr_errormask = IPATH_KREG_OFFSET(ErrorMask), | ||
157 | .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus), | ||
158 | .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl), | ||
159 | .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus), | ||
160 | .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear), | ||
161 | .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask), | ||
162 | .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut), | ||
163 | .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus), | ||
164 | .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl), | ||
165 | .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear), | ||
166 | .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask), | ||
167 | .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus), | ||
168 | .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl), | ||
169 | .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus), | ||
170 | .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked), | ||
171 | .kr_intclear = IPATH_KREG_OFFSET(IntClear), | ||
172 | .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig), | ||
173 | .kr_intmask = IPATH_KREG_OFFSET(IntMask), | ||
174 | .kr_intstatus = IPATH_KREG_OFFSET(IntStatus), | ||
175 | .kr_mdio = IPATH_KREG_OFFSET(MDIO), | ||
176 | .kr_pagealign = IPATH_KREG_OFFSET(PageAlign), | ||
177 | .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey), | ||
178 | .kr_portcnt = IPATH_KREG_OFFSET(PortCnt), | ||
179 | .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP), | ||
180 | .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase), | ||
181 | .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize), | ||
182 | .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl), | ||
183 | .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase), | ||
184 | .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt), | ||
185 | .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt), | ||
186 | .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize), | ||
187 | .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize), | ||
188 | .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase), | ||
189 | .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize), | ||
190 | .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase), | ||
191 | .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt), | ||
192 | .kr_revision = IPATH_KREG_OFFSET(Revision), | ||
193 | .kr_scratch = IPATH_KREG_OFFSET(Scratch), | ||
194 | .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError), | ||
195 | .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl), | ||
196 | .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr), | ||
197 | .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase), | ||
198 | .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt), | ||
199 | .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize), | ||
200 | .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase), | ||
201 | .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase), | ||
202 | .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize), | ||
203 | .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase), | ||
204 | .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0), | ||
205 | .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1), | ||
206 | .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), | ||
207 | .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), | ||
208 | /* | ||
209 | * These should not be used directly via ipath_read_kreg64(), | ||
210 | * use them with ipath_read_kreg64_port(), | ||
211 | */ | ||
212 | .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), | ||
213 | .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0) | ||
214 | }; | ||
215 | |||
216 | static const struct ipath_cregs ipath_ht_cregs = { | ||
217 | .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt), | ||
218 | .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt), | ||
219 | .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt), | ||
220 | .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt), | ||
221 | .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt), | ||
222 | .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt), | ||
223 | .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt), | ||
224 | .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt), | ||
225 | .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt), | ||
226 | .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt), | ||
227 | .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt), | ||
228 | .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt), | ||
229 | /* calc from Reg_CounterRegBase + offset */ | ||
230 | .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt), | ||
231 | .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt), | ||
232 | .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt), | ||
233 | .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt), | ||
234 | .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt), | ||
235 | .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt), | ||
236 | .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt), | ||
237 | .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt), | ||
238 | .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt), | ||
239 | .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt), | ||
240 | .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt), | ||
241 | .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt), | ||
242 | .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt), | ||
243 | .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt), | ||
244 | .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt), | ||
245 | .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt), | ||
246 | .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt), | ||
247 | .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt), | ||
248 | .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt), | ||
249 | .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt), | ||
250 | .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt) | ||
251 | }; | ||
252 | |||
253 | /* kr_intstatus, kr_intclear, kr_intmask bits */ | ||
254 | #define INFINIPATH_I_RCVURG_MASK 0x1FF | ||
255 | #define INFINIPATH_I_RCVAVAIL_MASK 0x1FF | ||
256 | |||
257 | /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ | ||
258 | #define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0 | ||
259 | #define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL | ||
260 | #define INFINIPATH_HWE_HTCLNKABYTE0CRCERR 0x0000000000800000ULL | ||
261 | #define INFINIPATH_HWE_HTCLNKABYTE1CRCERR 0x0000000001000000ULL | ||
262 | #define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR 0x0000000002000000ULL | ||
263 | #define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR 0x0000000004000000ULL | ||
264 | #define INFINIPATH_HWE_HTCMISCERR4 0x0000000008000000ULL | ||
265 | #define INFINIPATH_HWE_HTCMISCERR5 0x0000000010000000ULL | ||
266 | #define INFINIPATH_HWE_HTCMISCERR6 0x0000000020000000ULL | ||
267 | #define INFINIPATH_HWE_HTCMISCERR7 0x0000000040000000ULL | ||
268 | #define INFINIPATH_HWE_HTCBUSTREQPARITYERR 0x0000000080000000ULL | ||
269 | #define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL | ||
270 | #define INFINIPATH_HWE_HTCBUSIREQPARITYERR 0x0000000200000000ULL | ||
271 | #define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL | ||
272 | #define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL | ||
273 | #define INFINIPATH_HWE_HTBPLL_FBSLIP 0x0200000000000000ULL | ||
274 | #define INFINIPATH_HWE_HTBPLL_RFSLIP 0x0400000000000000ULL | ||
275 | #define INFINIPATH_HWE_HTAPLL_FBSLIP 0x0800000000000000ULL | ||
276 | #define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL | ||
277 | #define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL | ||
278 | |||
279 | /* kr_extstatus bits */ | ||
280 | #define INFINIPATH_EXTS_FREQSEL 0x2 | ||
281 | #define INFINIPATH_EXTS_SERDESSEL 0x4 | ||
282 | #define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 | ||
283 | #define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 | ||
284 | |||
285 | /* | ||
286 | * masks and bits that are different in different chips, or present only | ||
287 | * in one | ||
288 | */ | ||
289 | static const ipath_err_t infinipath_hwe_htcmemparityerr_mask = | ||
290 | INFINIPATH_HWE_HTCMEMPARITYERR_MASK; | ||
291 | static const ipath_err_t infinipath_hwe_htcmemparityerr_shift = | ||
292 | INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT; | ||
293 | |||
294 | static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr = | ||
295 | INFINIPATH_HWE_HTCLNKABYTE0CRCERR; | ||
296 | static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr = | ||
297 | INFINIPATH_HWE_HTCLNKABYTE1CRCERR; | ||
298 | static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr = | ||
299 | INFINIPATH_HWE_HTCLNKBBYTE0CRCERR; | ||
300 | static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr = | ||
301 | INFINIPATH_HWE_HTCLNKBBYTE1CRCERR; | ||
302 | |||
303 | #define _IPATH_GPIO_SDA_NUM 1 | ||
304 | #define _IPATH_GPIO_SCL_NUM 0 | ||
305 | |||
306 | #define IPATH_GPIO_SDA \ | ||
307 | (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) | ||
308 | #define IPATH_GPIO_SCL \ | ||
309 | (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) | ||
310 | |||
311 | /* keep the code below somewhat more readonable; not used elsewhere */ | ||
312 | #define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ | ||
313 | infinipath_hwe_htclnkabyte1crcerr) | ||
314 | #define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \ | ||
315 | infinipath_hwe_htclnkbbyte1crcerr) | ||
316 | #define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ | ||
317 | infinipath_hwe_htclnkbbyte0crcerr) | ||
318 | #define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr | \ | ||
319 | infinipath_hwe_htclnkbbyte1crcerr) | ||
320 | |||
321 | static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs, | ||
322 | char *msg, size_t msgl) | ||
323 | { | ||
324 | char bitsmsg[64]; | ||
325 | ipath_err_t crcbits = hwerrs & | ||
326 | (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS); | ||
327 | /* don't check if 8bit HT */ | ||
328 | if (dd->ipath_flags & IPATH_8BIT_IN_HT0) | ||
329 | crcbits &= ~infinipath_hwe_htclnkabyte1crcerr; | ||
330 | /* don't check if 8bit HT */ | ||
331 | if (dd->ipath_flags & IPATH_8BIT_IN_HT1) | ||
332 | crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr; | ||
333 | /* | ||
334 | * we'll want to ignore link errors on link that is | ||
335 | * not in use, if any. For now, complain about both | ||
336 | */ | ||
337 | if (crcbits) { | ||
338 | u16 ctrl0, ctrl1; | ||
339 | snprintf(bitsmsg, sizeof bitsmsg, | ||
340 | "[HT%s lane %s CRC (%llx); ignore till reload]", | ||
341 | !(crcbits & _IPATH_HTLINK1_CRCBITS) ? | ||
342 | "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS) | ||
343 | ? "1 (B)" : "0+1 (A+B)"), | ||
344 | !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0" | ||
345 | : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" : | ||
346 | "0+1"), (unsigned long long) crcbits); | ||
347 | strlcat(msg, bitsmsg, msgl); | ||
348 | |||
349 | /* | ||
350 | * print extra info for debugging. slave/primary | ||
351 | * config word 4, 8 (link control 0, 1) | ||
352 | */ | ||
353 | |||
354 | if (pci_read_config_word(dd->pcidev, | ||
355 | dd->ipath_ht_slave_off + 0x4, | ||
356 | &ctrl0)) | ||
357 | dev_info(&dd->pcidev->dev, "Couldn't read " | ||
358 | "linkctrl0 of slave/primary " | ||
359 | "config block\n"); | ||
360 | else if (!(ctrl0 & 1 << 6)) | ||
361 | /* not if EOC bit set */ | ||
362 | ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0, | ||
363 | ((ctrl0 >> 8) & 7) ? " CRC" : "", | ||
364 | ((ctrl0 >> 4) & 1) ? "linkfail" : | ||
365 | ""); | ||
366 | if (pci_read_config_word(dd->pcidev, | ||
367 | dd->ipath_ht_slave_off + 0x8, | ||
368 | &ctrl1)) | ||
369 | dev_info(&dd->pcidev->dev, "Couldn't read " | ||
370 | "linkctrl1 of slave/primary " | ||
371 | "config block\n"); | ||
372 | else if (!(ctrl1 & 1 << 6)) | ||
373 | /* not if EOC bit set */ | ||
374 | ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1, | ||
375 | ((ctrl1 >> 8) & 7) ? " CRC" : "", | ||
376 | ((ctrl1 >> 4) & 1) ? "linkfail" : | ||
377 | ""); | ||
378 | |||
379 | /* disable until driver reloaded */ | ||
380 | dd->ipath_hwerrmask &= ~crcbits; | ||
381 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, | ||
382 | dd->ipath_hwerrmask); | ||
383 | ipath_dbg("HT crc errs: %s\n", msg); | ||
384 | } else | ||
385 | ipath_dbg("ignoring HT crc errors 0x%llx, " | ||
386 | "not in use\n", (unsigned long long) | ||
387 | (hwerrs & (_IPATH_HTLINK0_CRCBITS | | ||
388 | _IPATH_HTLINK1_CRCBITS))); | ||
389 | } | ||
390 | |||
391 | /** | ||
392 | * ipath_ht_handle_hwerrors - display hardware errors | ||
393 | * @dd: the infinipath device | ||
394 | * @msg: the output buffer | ||
395 | * @msgl: the size of the output buffer | ||
396 | * | ||
397 | * Use same msg buffer as regular errors to avoid | ||
398 | * excessive stack use. Most hardware errors are catastrophic, but for | ||
399 | * right now, we'll print them and continue. | ||
400 | * We reuse the same message buffer as ipath_handle_errors() to avoid | ||
401 | * excessive stack usage. | ||
402 | */ | ||
403 | static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, | ||
404 | size_t msgl) | ||
405 | { | ||
406 | ipath_err_t hwerrs; | ||
407 | u32 bits, ctrl; | ||
408 | int isfatal = 0; | ||
409 | char bitsmsg[64]; | ||
410 | |||
411 | hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); | ||
412 | |||
413 | if (!hwerrs) { | ||
414 | ipath_cdbg(VERBOSE, "Called but no hardware errors set\n"); | ||
415 | /* | ||
416 | * better than printing cofusing messages | ||
417 | * This seems to be related to clearing the crc error, or | ||
418 | * the pll error during init. | ||
419 | */ | ||
420 | goto bail; | ||
421 | } else if (hwerrs == -1LL) { | ||
422 | ipath_dev_err(dd, "Read of hardware error status failed " | ||
423 | "(all bits set); ignoring\n"); | ||
424 | goto bail; | ||
425 | } | ||
426 | ipath_stats.sps_hwerrs++; | ||
427 | |||
428 | /* Always clear the error status register, except MEMBISTFAIL, | ||
429 | * regardless of whether we continue or stop using the chip. | ||
430 | * We want that set so we know it failed, even across driver reload. | ||
431 | * We'll still ignore it in the hwerrmask. We do this partly for | ||
432 | * diagnostics, but also for support */ | ||
433 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, | ||
434 | hwerrs&~INFINIPATH_HWE_MEMBISTFAILED); | ||
435 | |||
436 | hwerrs &= dd->ipath_hwerrmask; | ||
437 | |||
438 | /* | ||
439 | * make sure we get this much out, unless told to be quiet, | ||
440 | * or it's occurred within the last 5 seconds | ||
441 | */ | ||
442 | if ((hwerrs & ~dd->ipath_lasthwerror) || | ||
443 | (ipath_debug & __IPATH_VERBDBG)) | ||
444 | dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " | ||
445 | "(cleared)\n", (unsigned long long) hwerrs); | ||
446 | dd->ipath_lasthwerror |= hwerrs; | ||
447 | |||
448 | if (hwerrs & ~infinipath_hwe_bitsextant) | ||
449 | ipath_dev_err(dd, "hwerror interrupt with unknown errors " | ||
450 | "%llx set\n", (unsigned long long) | ||
451 | (hwerrs & ~infinipath_hwe_bitsextant)); | ||
452 | |||
453 | ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); | ||
454 | if (ctrl & INFINIPATH_C_FREEZEMODE) { | ||
455 | if (hwerrs) { | ||
456 | /* | ||
457 | * if any set that we aren't ignoring; only | ||
458 | * make the complaint once, in case it's stuck | ||
459 | * or recurring, and we get here multiple | ||
460 | * times. | ||
461 | */ | ||
462 | if (dd->ipath_flags & IPATH_INITTED) { | ||
463 | ipath_dev_err(dd, "Fatal Error (freeze " | ||
464 | "mode), no longer usable\n"); | ||
465 | isfatal = 1; | ||
466 | } | ||
467 | *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; | ||
468 | /* mark as having had error */ | ||
469 | *dd->ipath_statusp |= IPATH_STATUS_HWERROR; | ||
470 | /* | ||
471 | * mark as not usable, at a minimum until driver | ||
472 | * is reloaded, probably until reboot, since no | ||
473 | * other reset is possible. | ||
474 | */ | ||
475 | dd->ipath_flags &= ~IPATH_INITTED; | ||
476 | } else { | ||
477 | ipath_dbg("Clearing freezemode on ignored hardware " | ||
478 | "error\n"); | ||
479 | ctrl &= ~INFINIPATH_C_FREEZEMODE; | ||
480 | ipath_write_kreg(dd, dd->ipath_kregs->kr_control, | ||
481 | ctrl); | ||
482 | } | ||
483 | } | ||
484 | |||
485 | *msg = '\0'; | ||
486 | |||
487 | /* | ||
488 | * may someday want to decode into which bits are which | ||
489 | * functional area for parity errors, etc. | ||
490 | */ | ||
491 | if (hwerrs & (infinipath_hwe_htcmemparityerr_mask | ||
492 | << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) { | ||
493 | bits = (u32) ((hwerrs >> | ||
494 | INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) & | ||
495 | INFINIPATH_HWE_HTCMEMPARITYERR_MASK); | ||
496 | snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ", | ||
497 | bits); | ||
498 | strlcat(msg, bitsmsg, msgl); | ||
499 | } | ||
500 | if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK | ||
501 | << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) { | ||
502 | bits = (u32) ((hwerrs >> | ||
503 | INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) & | ||
504 | INFINIPATH_HWE_RXEMEMPARITYERR_MASK); | ||
505 | snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ", | ||
506 | bits); | ||
507 | strlcat(msg, bitsmsg, msgl); | ||
508 | } | ||
509 | if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK | ||
510 | << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { | ||
511 | bits = (u32) ((hwerrs >> | ||
512 | INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) & | ||
513 | INFINIPATH_HWE_TXEMEMPARITYERR_MASK); | ||
514 | snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ", | ||
515 | bits); | ||
516 | strlcat(msg, bitsmsg, msgl); | ||
517 | } | ||
518 | if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR) | ||
519 | strlcat(msg, "[IB2IPATH Parity]", msgl); | ||
520 | if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR) | ||
521 | strlcat(msg, "[IPATH2IB Parity]", msgl); | ||
522 | if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR) | ||
523 | strlcat(msg, "[HTC Ireq Parity]", msgl); | ||
524 | if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR) | ||
525 | strlcat(msg, "[HTC Treq Parity]", msgl); | ||
526 | if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR) | ||
527 | strlcat(msg, "[HTC Tresp Parity]", msgl); | ||
528 | |||
529 | if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS)) | ||
530 | hwerr_crcbits(dd, hwerrs, msg, msgl); | ||
531 | |||
532 | if (hwerrs & INFINIPATH_HWE_HTCMISCERR5) | ||
533 | strlcat(msg, "[HT core Misc5]", msgl); | ||
534 | if (hwerrs & INFINIPATH_HWE_HTCMISCERR6) | ||
535 | strlcat(msg, "[HT core Misc6]", msgl); | ||
536 | if (hwerrs & INFINIPATH_HWE_HTCMISCERR7) | ||
537 | strlcat(msg, "[HT core Misc7]", msgl); | ||
538 | if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { | ||
539 | strlcat(msg, "[Memory BIST test failed, HT-400 unusable]", | ||
540 | msgl); | ||
541 | /* ignore from now on, so disable until driver reloaded */ | ||
542 | dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; | ||
543 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, | ||
544 | dd->ipath_hwerrmask); | ||
545 | } | ||
546 | #define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \ | ||
547 | INFINIPATH_HWE_COREPLL_RFSLIP | \ | ||
548 | INFINIPATH_HWE_HTBPLL_FBSLIP | \ | ||
549 | INFINIPATH_HWE_HTBPLL_RFSLIP | \ | ||
550 | INFINIPATH_HWE_HTAPLL_FBSLIP | \ | ||
551 | INFINIPATH_HWE_HTAPLL_RFSLIP) | ||
552 | |||
553 | if (hwerrs & _IPATH_PLL_FAIL) { | ||
554 | snprintf(bitsmsg, sizeof bitsmsg, | ||
555 | "[PLL failed (%llx), HT-400 unusable]", | ||
556 | (unsigned long long) (hwerrs & _IPATH_PLL_FAIL)); | ||
557 | strlcat(msg, bitsmsg, msgl); | ||
558 | /* ignore from now on, so disable until driver reloaded */ | ||
559 | dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL); | ||
560 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, | ||
561 | dd->ipath_hwerrmask); | ||
562 | } | ||
563 | |||
564 | if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) { | ||
565 | /* | ||
566 | * If it occurs, it is left masked since the eternal | ||
567 | * interface is unused | ||
568 | */ | ||
569 | dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED; | ||
570 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, | ||
571 | dd->ipath_hwerrmask); | ||
572 | } | ||
573 | |||
574 | if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR) | ||
575 | strlcat(msg, "[Rx Dsync]", msgl); | ||
576 | if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) | ||
577 | strlcat(msg, "[SerDes PLL]", msgl); | ||
578 | |||
579 | ipath_dev_err(dd, "%s hardware error\n", msg); | ||
580 | if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) | ||
581 | /* | ||
582 | * for status file; if no trailing brace is copied, | ||
583 | * we'll know it was truncated. | ||
584 | */ | ||
585 | snprintf(dd->ipath_freezemsg, | ||
586 | dd->ipath_freezelen, "{%s}", msg); | ||
587 | |||
588 | bail:; | ||
589 | } | ||
590 | |||
591 | /** | ||
592 | * ipath_ht_boardname - fill in the board name | ||
593 | * @dd: the infinipath device | ||
594 | * @name: the output buffer | ||
595 | * @namelen: the size of the output buffer | ||
596 | * | ||
597 | * fill in the board name, based on the board revision register | ||
598 | */ | ||
599 | static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, | ||
600 | size_t namelen) | ||
601 | { | ||
602 | char *n = NULL; | ||
603 | u8 boardrev = dd->ipath_boardrev; | ||
604 | int ret; | ||
605 | |||
606 | switch (boardrev) { | ||
607 | case 4: /* Ponderosa is one of the bringup boards */ | ||
608 | n = "Ponderosa"; | ||
609 | break; | ||
610 | case 5: /* HT-460 original production board */ | ||
611 | n = "InfiniPath_HT-460"; | ||
612 | break; | ||
613 | case 6: | ||
614 | n = "OEM_Board_3"; | ||
615 | break; | ||
616 | case 7: | ||
617 | /* HT-460 small form factor production board */ | ||
618 | n = "InfiniPath_HT-465"; | ||
619 | break; | ||
620 | case 8: | ||
621 | n = "LS/X-1"; | ||
622 | break; | ||
623 | case 9: /* Comstock bringup test board */ | ||
624 | n = "Comstock"; | ||
625 | break; | ||
626 | case 10: | ||
627 | n = "OEM_Board_2"; | ||
628 | break; | ||
629 | case 11: | ||
630 | n = "InfiniPath_HT-470"; | ||
631 | break; | ||
632 | case 12: | ||
633 | n = "OEM_Board_4"; | ||
634 | break; | ||
635 | default: /* don't know, just print the number */ | ||
636 | ipath_dev_err(dd, "Don't yet know about board " | ||
637 | "with ID %u\n", boardrev); | ||
638 | snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u", | ||
639 | boardrev); | ||
640 | break; | ||
641 | } | ||
642 | if (n) | ||
643 | snprintf(name, namelen, "%s", n); | ||
644 | |||
645 | if (dd->ipath_majrev != 3 || dd->ipath_minrev != 2) { | ||
646 | /* | ||
647 | * This version of the driver only supports the HT-400 | ||
648 | * Rev 3.2 | ||
649 | */ | ||
650 | ipath_dev_err(dd, | ||
651 | "Unsupported HT-400 revision %u.%u!\n", | ||
652 | dd->ipath_majrev, dd->ipath_minrev); | ||
653 | ret = 1; | ||
654 | goto bail; | ||
655 | } | ||
656 | /* | ||
657 | * pkt/word counters are 32 bit, and therefore wrap fast enough | ||
658 | * that we snapshot them from a timer, and maintain 64 bit shadow | ||
659 | * copies | ||
660 | */ | ||
661 | dd->ipath_flags |= IPATH_32BITCOUNTERS; | ||
662 | if (dd->ipath_htspeed != 800) | ||
663 | ipath_dev_err(dd, | ||
664 | "Incorrectly configured for HT @ %uMHz\n", | ||
665 | dd->ipath_htspeed); | ||
666 | if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 || | ||
667 | dd->ipath_boardrev == 6) | ||
668 | dd->ipath_flags |= IPATH_GPIO_INTR; | ||
669 | else | ||
670 | dd->ipath_flags |= IPATH_POLL_RX_INTR; | ||
671 | if (dd->ipath_boardrev == 8) { /* LS/X-1 */ | ||
672 | u64 val; | ||
673 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); | ||
674 | if (val & INFINIPATH_EXTS_SERDESSEL) { | ||
675 | /* | ||
676 | * hardware disabled | ||
677 | * | ||
678 | * This means that the chip is hardware disabled, | ||
679 | * and will not be able to bring up the link, | ||
680 | * in any case. We special case this and abort | ||
681 | * early, to avoid later messages. We also set | ||
682 | * the DISABLED status bit | ||
683 | */ | ||
684 | ipath_dbg("Unit %u is hardware-disabled\n", | ||
685 | dd->ipath_unit); | ||
686 | *dd->ipath_statusp |= IPATH_STATUS_DISABLED; | ||
687 | /* this value is handled differently */ | ||
688 | ret = 2; | ||
689 | goto bail; | ||
690 | } | ||
691 | } | ||
692 | ret = 0; | ||
693 | |||
694 | bail: | ||
695 | return ret; | ||
696 | } | ||
697 | |||
698 | static void ipath_check_htlink(struct ipath_devdata *dd) | ||
699 | { | ||
700 | u8 linkerr, link_off, i; | ||
701 | |||
702 | for (i = 0; i < 2; i++) { | ||
703 | link_off = dd->ipath_ht_slave_off + i * 4 + 0xd; | ||
704 | if (pci_read_config_byte(dd->pcidev, link_off, &linkerr)) | ||
705 | dev_info(&dd->pcidev->dev, "Couldn't read " | ||
706 | "linkerror%d of HT slave/primary block\n", | ||
707 | i); | ||
708 | else if (linkerr & 0xf0) { | ||
709 | ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, " | ||
710 | "clearing\n", linkerr >> 4, i); | ||
711 | /* | ||
712 | * writing the linkerr bits that are set should | ||
713 | * clear them | ||
714 | */ | ||
715 | if (pci_write_config_byte(dd->pcidev, link_off, | ||
716 | linkerr)) | ||
717 | ipath_dbg("Failed write to clear HT " | ||
718 | "linkerror%d\n", i); | ||
719 | if (pci_read_config_byte(dd->pcidev, link_off, | ||
720 | &linkerr)) | ||
721 | dev_info(&dd->pcidev->dev, | ||
722 | "Couldn't reread linkerror%d of " | ||
723 | "HT slave/primary block\n", i); | ||
724 | else if (linkerr & 0xf0) | ||
725 | dev_info(&dd->pcidev->dev, | ||
726 | "HT linkerror%d bits 0x%x " | ||
727 | "couldn't be cleared\n", | ||
728 | i, linkerr >> 4); | ||
729 | } | ||
730 | } | ||
731 | } | ||
732 | |||
733 | static int ipath_setup_ht_reset(struct ipath_devdata *dd) | ||
734 | { | ||
735 | ipath_dbg("No reset possible for HT-400\n"); | ||
736 | return 0; | ||
737 | } | ||
738 | |||
739 | #define HT_CAPABILITY_ID 0x08 /* HT capabilities not defined in kernel */ | ||
740 | #define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */ | ||
741 | #define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */ | ||
742 | |||
743 | /* | ||
744 | * Bits 13-15 of command==0 is slave/primary block. Clear any HT CRC | ||
745 | * errors. We only bother to do this at load time, because it's OK if | ||
746 | * it happened before we were loaded (first time after boot/reset), | ||
747 | * but any time after that, it's fatal anyway. Also need to not check | ||
748 | * for for upper byte errors if we are in 8 bit mode, so figure out | ||
749 | * our width. For now, at least, also complain if it's 8 bit. | ||
750 | */ | ||
751 | static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev, | ||
752 | int pos, u8 cap_type) | ||
753 | { | ||
754 | u8 linkwidth = 0, linkerr, link_a_b_off, link_off; | ||
755 | u16 linkctrl = 0; | ||
756 | int i; | ||
757 | |||
758 | dd->ipath_ht_slave_off = pos; | ||
759 | /* command word, master_host bit */ | ||
760 | /* master host || slave */ | ||
761 | if ((cap_type >> 2) & 1) | ||
762 | link_a_b_off = 4; | ||
763 | else | ||
764 | link_a_b_off = 0; | ||
765 | ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n", | ||
766 | link_a_b_off ? 1 : 0, | ||
767 | link_a_b_off ? 'B' : 'A'); | ||
768 | |||
769 | link_a_b_off += pos; | ||
770 | |||
771 | /* | ||
772 | * check both link control registers; clear both HT CRC sets if | ||
773 | * necessary. | ||
774 | */ | ||
775 | for (i = 0; i < 2; i++) { | ||
776 | link_off = pos + i * 4 + 0x4; | ||
777 | if (pci_read_config_word(pdev, link_off, &linkctrl)) | ||
778 | ipath_dev_err(dd, "Couldn't read HT link control%d " | ||
779 | "register\n", i); | ||
780 | else if (linkctrl & (0xf << 8)) { | ||
781 | ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error " | ||
782 | "bits %x\n", i, linkctrl & (0xf << 8)); | ||
783 | /* | ||
784 | * now write them back to clear the error. | ||
785 | */ | ||
786 | pci_write_config_byte(pdev, link_off, | ||
787 | linkctrl & (0xf << 8)); | ||
788 | } | ||
789 | } | ||
790 | |||
791 | /* | ||
792 | * As with HT CRC bits, same for protocol errors that might occur | ||
793 | * during boot. | ||
794 | */ | ||
795 | for (i = 0; i < 2; i++) { | ||
796 | link_off = pos + i * 4 + 0xd; | ||
797 | if (pci_read_config_byte(pdev, link_off, &linkerr)) | ||
798 | dev_info(&pdev->dev, "Couldn't read linkerror%d " | ||
799 | "of HT slave/primary block\n", i); | ||
800 | else if (linkerr & 0xf0) { | ||
801 | ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, " | ||
802 | "clearing\n", linkerr >> 4, i); | ||
803 | /* | ||
804 | * writing the linkerr bits that are set will clear | ||
805 | * them | ||
806 | */ | ||
807 | if (pci_write_config_byte | ||
808 | (pdev, link_off, linkerr)) | ||
809 | ipath_dbg("Failed write to clear HT " | ||
810 | "linkerror%d\n", i); | ||
811 | if (pci_read_config_byte(pdev, link_off, &linkerr)) | ||
812 | dev_info(&pdev->dev, "Couldn't reread " | ||
813 | "linkerror%d of HT slave/primary " | ||
814 | "block\n", i); | ||
815 | else if (linkerr & 0xf0) | ||
816 | dev_info(&pdev->dev, "HT linkerror%d bits " | ||
817 | "0x%x couldn't be cleared\n", | ||
818 | i, linkerr >> 4); | ||
819 | } | ||
820 | } | ||
821 | |||
822 | /* | ||
823 | * this is just for our link to the host, not devices connected | ||
824 | * through tunnel. | ||
825 | */ | ||
826 | |||
827 | if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth)) | ||
828 | ipath_dev_err(dd, "Couldn't read HT link width " | ||
829 | "config register\n"); | ||
830 | else { | ||
831 | u32 width; | ||
832 | switch (linkwidth & 7) { | ||
833 | case 5: | ||
834 | width = 4; | ||
835 | break; | ||
836 | case 4: | ||
837 | width = 2; | ||
838 | break; | ||
839 | case 3: | ||
840 | width = 32; | ||
841 | break; | ||
842 | case 1: | ||
843 | width = 16; | ||
844 | break; | ||
845 | case 0: | ||
846 | default: /* if wrong, assume 8 bit */ | ||
847 | width = 8; | ||
848 | break; | ||
849 | } | ||
850 | |||
851 | dd->ipath_htwidth = width; | ||
852 | |||
853 | if (linkwidth != 0x11) { | ||
854 | ipath_dev_err(dd, "Not configured for 16 bit HT " | ||
855 | "(%x)\n", linkwidth); | ||
856 | if (!(linkwidth & 0xf)) { | ||
857 | ipath_dbg("Will ignore HT lane1 errors\n"); | ||
858 | dd->ipath_flags |= IPATH_8BIT_IN_HT0; | ||
859 | } | ||
860 | } | ||
861 | } | ||
862 | |||
863 | /* | ||
864 | * this is just for our link to the host, not devices connected | ||
865 | * through tunnel. | ||
866 | */ | ||
867 | if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth)) | ||
868 | ipath_dev_err(dd, "Couldn't read HT link frequency " | ||
869 | "config register\n"); | ||
870 | else { | ||
871 | u32 speed; | ||
872 | switch (linkwidth & 0xf) { | ||
873 | case 6: | ||
874 | speed = 1000; | ||
875 | break; | ||
876 | case 5: | ||
877 | speed = 800; | ||
878 | break; | ||
879 | case 4: | ||
880 | speed = 600; | ||
881 | break; | ||
882 | case 3: | ||
883 | speed = 500; | ||
884 | break; | ||
885 | case 2: | ||
886 | speed = 400; | ||
887 | break; | ||
888 | case 1: | ||
889 | speed = 300; | ||
890 | break; | ||
891 | default: | ||
892 | /* | ||
893 | * assume reserved and vendor-specific are 200... | ||
894 | */ | ||
895 | case 0: | ||
896 | speed = 200; | ||
897 | break; | ||
898 | } | ||
899 | dd->ipath_htspeed = speed; | ||
900 | } | ||
901 | } | ||
902 | |||
903 | static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev, | ||
904 | int pos) | ||
905 | { | ||
906 | u32 int_handler_addr_lower; | ||
907 | u32 int_handler_addr_upper; | ||
908 | u64 ihandler; | ||
909 | u32 intvec; | ||
910 | |||
911 | /* use indirection register to get the intr handler */ | ||
912 | pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x10); | ||
913 | pci_read_config_dword(pdev, pos + 4, &int_handler_addr_lower); | ||
914 | pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x11); | ||
915 | pci_read_config_dword(pdev, pos + 4, &int_handler_addr_upper); | ||
916 | |||
917 | ihandler = (u64) int_handler_addr_lower | | ||
918 | ((u64) int_handler_addr_upper << 32); | ||
919 | |||
920 | /* | ||
921 | * kernels with CONFIG_PCI_MSI set the vector in the irq field of | ||
922 | * struct pci_device, so we use that to program the HT-400 internal | ||
923 | * interrupt register (not config space) with that value. The BIOS | ||
924 | * must still have done the basic MSI setup. | ||
925 | */ | ||
926 | intvec = pdev->irq; | ||
927 | /* | ||
928 | * clear any vector bits there; normally not set but we'll overload | ||
929 | * this for some debug purposes (setting the HTC debug register | ||
930 | * value from software, rather than GPIOs), so it might be set on a | ||
931 | * driver reload. | ||
932 | */ | ||
933 | ihandler &= ~0xff0000; | ||
934 | /* x86 vector goes in intrinfo[23:16] */ | ||
935 | ihandler |= intvec << 16; | ||
936 | ipath_cdbg(VERBOSE, "ihandler lower %x, upper %x, intvec %x, " | ||
937 | "interruptconfig %llx\n", int_handler_addr_lower, | ||
938 | int_handler_addr_upper, intvec, | ||
939 | (unsigned long long) ihandler); | ||
940 | |||
941 | /* can't program yet, so save for interrupt setup */ | ||
942 | dd->ipath_intconfig = ihandler; | ||
943 | /* keep going, so we find link control stuff also */ | ||
944 | |||
945 | return ihandler != 0; | ||
946 | } | ||
947 | |||
948 | /** | ||
949 | * ipath_setup_ht_config - setup the interruptconfig register | ||
950 | * @dd: the infinipath device | ||
951 | * @pdev: the PCI device | ||
952 | * | ||
953 | * setup the interruptconfig register from the HT config info. | ||
954 | * Also clear CRC errors in HT linkcontrol, if necessary. | ||
955 | * This is done only for the real hardware. It is done before | ||
956 | * chip address space is initted, so can't touch infinipath registers | ||
957 | */ | ||
958 | static int ipath_setup_ht_config(struct ipath_devdata *dd, | ||
959 | struct pci_dev *pdev) | ||
960 | { | ||
961 | int pos, ret = 0; | ||
962 | int ihandler = 0; | ||
963 | |||
964 | /* | ||
965 | * Read the capability info to find the interrupt info, and also | ||
966 | * handle clearing CRC errors in linkctrl register if necessary. We | ||
967 | * do this early, before we ever enable errors or hardware errors, | ||
968 | * mostly to avoid causing the chip to enter freeze mode. | ||
969 | */ | ||
970 | pos = pci_find_capability(pdev, HT_CAPABILITY_ID); | ||
971 | if (!pos) { | ||
972 | ipath_dev_err(dd, "Couldn't find HyperTransport " | ||
973 | "capability; no interrupts\n"); | ||
974 | ret = -ENODEV; | ||
975 | goto bail; | ||
976 | } | ||
977 | do { | ||
978 | u8 cap_type; | ||
979 | |||
980 | /* the HT capability type byte is 3 bytes after the | ||
981 | * capability byte. | ||
982 | */ | ||
983 | if (pci_read_config_byte(pdev, pos + 3, &cap_type)) { | ||
984 | dev_info(&pdev->dev, "Couldn't read config " | ||
985 | "command @ %d\n", pos); | ||
986 | continue; | ||
987 | } | ||
988 | if (!(cap_type & 0xE0)) | ||
989 | slave_or_pri_blk(dd, pdev, pos, cap_type); | ||
990 | else if (cap_type == HT_INTR_DISC_CONFIG) | ||
991 | ihandler = set_int_handler(dd, pdev, pos); | ||
992 | } while ((pos = pci_find_next_capability(pdev, pos, | ||
993 | HT_CAPABILITY_ID))); | ||
994 | |||
995 | if (!ihandler) { | ||
996 | ipath_dev_err(dd, "Couldn't find interrupt handler in " | ||
997 | "config space\n"); | ||
998 | ret = -ENODEV; | ||
999 | } | ||
1000 | |||
1001 | bail: | ||
1002 | return ret; | ||
1003 | } | ||
1004 | |||
1005 | /** | ||
1006 | * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff | ||
1007 | * @dd: the infinipath device | ||
1008 | * | ||
1009 | * Called during driver unload. | ||
1010 | * This is currently a nop for the HT-400, not for all chips | ||
1011 | */ | ||
1012 | static void ipath_setup_ht_cleanup(struct ipath_devdata *dd) | ||
1013 | { | ||
1014 | } | ||
1015 | |||
1016 | /** | ||
1017 | * ipath_setup_ht_setextled - set the state of the two external LEDs | ||
1018 | * @dd: the infinipath device | ||
1019 | * @lst: the L state | ||
1020 | * @ltst: the LT state | ||
1021 | * | ||
1022 | * Set the state of the two external LEDs, to indicate physical and | ||
1023 | * logical state of IB link. For this chip (at least with recommended | ||
1024 | * board pinouts), LED1 is Green (physical state), and LED2 is Yellow | ||
1025 | * (logical state) | ||
1026 | * | ||
1027 | * Note: We try to match the Mellanox HCA LED behavior as best | ||
1028 | * we can. Green indicates physical link state is OK (something is | ||
1029 | * plugged in, and we can train). | ||
1030 | * Amber indicates the link is logically up (ACTIVE). | ||
1031 | * Mellanox further blinks the amber LED to indicate data packet | ||
1032 | * activity, but we have no hardware support for that, so it would | ||
1033 | * require waking up every 10-20 msecs and checking the counters | ||
1034 | * on the chip, and then turning the LED off if appropriate. That's | ||
1035 | * visible overhead, so not something we will do. | ||
1036 | * | ||
1037 | */ | ||
1038 | static void ipath_setup_ht_setextled(struct ipath_devdata *dd, | ||
1039 | u64 lst, u64 ltst) | ||
1040 | { | ||
1041 | u64 extctl; | ||
1042 | |||
1043 | /* the diags use the LED to indicate diag info, so we leave | ||
1044 | * the external LED alone when the diags are running */ | ||
1045 | if (ipath_diag_inuse) | ||
1046 | return; | ||
1047 | |||
1048 | /* | ||
1049 | * start by setting both LED control bits to off, then turn | ||
1050 | * on the appropriate bit(s). | ||
1051 | */ | ||
1052 | if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */ | ||
1053 | /* | ||
1054 | * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF | ||
1055 | * is inverted, because it is normally used to indicate | ||
1056 | * a hardware fault at reset, if there were errors | ||
1057 | */ | ||
1058 | extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON) | ||
1059 | | INFINIPATH_EXTC_LEDGBLERR_OFF; | ||
1060 | if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) | ||
1061 | extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF; | ||
1062 | if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) | ||
1063 | extctl |= INFINIPATH_EXTC_LEDGBLOK_ON; | ||
1064 | } | ||
1065 | else { | ||
1066 | extctl = dd->ipath_extctrl & | ||
1067 | ~(INFINIPATH_EXTC_LED1PRIPORT_ON | | ||
1068 | INFINIPATH_EXTC_LED2PRIPORT_ON); | ||
1069 | if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) | ||
1070 | extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON; | ||
1071 | if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) | ||
1072 | extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON; | ||
1073 | } | ||
1074 | dd->ipath_extctrl = extctl; | ||
1075 | ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); | ||
1076 | } | ||
1077 | |||
1078 | static void ipath_init_ht_variables(void) | ||
1079 | { | ||
1080 | ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; | ||
1081 | ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; | ||
1082 | ipath_gpio_sda = IPATH_GPIO_SDA; | ||
1083 | ipath_gpio_scl = IPATH_GPIO_SCL; | ||
1084 | |||
1085 | infinipath_i_bitsextant = | ||
1086 | (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | | ||
1087 | (INFINIPATH_I_RCVAVAIL_MASK << | ||
1088 | INFINIPATH_I_RCVAVAIL_SHIFT) | | ||
1089 | INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | | ||
1090 | INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; | ||
1091 | |||
1092 | infinipath_e_bitsextant = | ||
1093 | INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | | ||
1094 | INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | | ||
1095 | INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | | ||
1096 | INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR | | ||
1097 | INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP | | ||
1098 | INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION | | ||
1099 | INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | | ||
1100 | INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN | | ||
1101 | INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK | | ||
1102 | INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN | | ||
1103 | INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN | | ||
1104 | INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT | | ||
1105 | INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | | ||
1106 | INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED | | ||
1107 | INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | | ||
1108 | INFINIPATH_E_HARDWARE; | ||
1109 | |||
1110 | infinipath_hwe_bitsextant = | ||
1111 | (INFINIPATH_HWE_HTCMEMPARITYERR_MASK << | ||
1112 | INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) | | ||
1113 | (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << | ||
1114 | INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | | ||
1115 | (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << | ||
1116 | INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | | ||
1117 | INFINIPATH_HWE_HTCLNKABYTE0CRCERR | | ||
1118 | INFINIPATH_HWE_HTCLNKABYTE1CRCERR | | ||
1119 | INFINIPATH_HWE_HTCLNKBBYTE0CRCERR | | ||
1120 | INFINIPATH_HWE_HTCLNKBBYTE1CRCERR | | ||
1121 | INFINIPATH_HWE_HTCMISCERR4 | | ||
1122 | INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 | | ||
1123 | INFINIPATH_HWE_HTCMISCERR7 | | ||
1124 | INFINIPATH_HWE_HTCBUSTREQPARITYERR | | ||
1125 | INFINIPATH_HWE_HTCBUSTRESPPARITYERR | | ||
1126 | INFINIPATH_HWE_HTCBUSIREQPARITYERR | | ||
1127 | INFINIPATH_HWE_RXDSYNCMEMPARITYERR | | ||
1128 | INFINIPATH_HWE_MEMBISTFAILED | | ||
1129 | INFINIPATH_HWE_COREPLL_FBSLIP | | ||
1130 | INFINIPATH_HWE_COREPLL_RFSLIP | | ||
1131 | INFINIPATH_HWE_HTBPLL_FBSLIP | | ||
1132 | INFINIPATH_HWE_HTBPLL_RFSLIP | | ||
1133 | INFINIPATH_HWE_HTAPLL_FBSLIP | | ||
1134 | INFINIPATH_HWE_HTAPLL_RFSLIP | | ||
1135 | INFINIPATH_HWE_SERDESPLLFAILED | | ||
1136 | INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | | ||
1137 | INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; | ||
1138 | |||
1139 | infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; | ||
1140 | infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; | ||
1141 | } | ||
1142 | |||
1143 | /** | ||
1144 | * ipath_ht_init_hwerrors - enable hardware errors | ||
1145 | * @dd: the infinipath device | ||
1146 | * | ||
1147 | * now that we have finished initializing everything that might reasonably | ||
1148 | * cause a hardware error, and cleared those errors bits as they occur, | ||
1149 | * we can enable hardware errors in the mask (potentially enabling | ||
1150 | * freeze mode), and enable hardware errors as errors (along with | ||
1151 | * everything else) in errormask | ||
1152 | */ | ||
1153 | static void ipath_ht_init_hwerrors(struct ipath_devdata *dd) | ||
1154 | { | ||
1155 | ipath_err_t val; | ||
1156 | u64 extsval; | ||
1157 | |||
1158 | extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); | ||
1159 | |||
1160 | if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) | ||
1161 | ipath_dev_err(dd, "MemBIST did not complete!\n"); | ||
1162 | |||
1163 | ipath_check_htlink(dd); | ||
1164 | |||
1165 | /* barring bugs, all hwerrors become interrupts, which can */ | ||
1166 | val = -1LL; | ||
1167 | /* don't look at crc lane1 if 8 bit */ | ||
1168 | if (dd->ipath_flags & IPATH_8BIT_IN_HT0) | ||
1169 | val &= ~infinipath_hwe_htclnkabyte1crcerr; | ||
1170 | /* don't look at crc lane1 if 8 bit */ | ||
1171 | if (dd->ipath_flags & IPATH_8BIT_IN_HT1) | ||
1172 | val &= ~infinipath_hwe_htclnkbbyte1crcerr; | ||
1173 | |||
1174 | /* | ||
1175 | * disable RXDSYNCMEMPARITY because external serdes is unused, | ||
1176 | * and therefore the logic will never be used or initialized, | ||
1177 | * and uninitialized state will normally result in this error | ||
1178 | * being asserted. Similarly for the external serdess pll | ||
1179 | * lock signal. | ||
1180 | */ | ||
1181 | val &= ~(INFINIPATH_HWE_SERDESPLLFAILED | | ||
1182 | INFINIPATH_HWE_RXDSYNCMEMPARITYERR); | ||
1183 | |||
1184 | /* | ||
1185 | * Disable MISCERR4 because of an inversion in the HT core | ||
1186 | * logic checking for errors that cause this bit to be set. | ||
1187 | * The errata can also cause the protocol error bit to be set | ||
1188 | * in the HT config space linkerror register(s). | ||
1189 | */ | ||
1190 | val &= ~INFINIPATH_HWE_HTCMISCERR4; | ||
1191 | |||
1192 | /* | ||
1193 | * PLL ignored because MDIO interface has a logic problem | ||
1194 | * for reads, on Comstock and Ponderosa. BRINGUP | ||
1195 | */ | ||
1196 | if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9) | ||
1197 | val &= ~INFINIPATH_HWE_SERDESPLLFAILED; | ||
1198 | dd->ipath_hwerrmask = val; | ||
1199 | } | ||
1200 | |||
1201 | /** | ||
1202 | * ipath_ht_bringup_serdes - bring up the serdes | ||
1203 | * @dd: the infinipath device | ||
1204 | */ | ||
1205 | static int ipath_ht_bringup_serdes(struct ipath_devdata *dd) | ||
1206 | { | ||
1207 | u64 val, config1; | ||
1208 | int ret = 0, change = 0; | ||
1209 | |||
1210 | ipath_dbg("Trying to bringup serdes\n"); | ||
1211 | |||
1212 | if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) & | ||
1213 | INFINIPATH_HWE_SERDESPLLFAILED) | ||
1214 | { | ||
1215 | ipath_dbg("At start, serdes PLL failed bit set in " | ||
1216 | "hwerrstatus, clearing and continuing\n"); | ||
1217 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, | ||
1218 | INFINIPATH_HWE_SERDESPLLFAILED); | ||
1219 | } | ||
1220 | |||
1221 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); | ||
1222 | config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1); | ||
1223 | |||
1224 | ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx " | ||
1225 | "config1=%llx, sstatus=%llx xgxs %llx\n", | ||
1226 | (unsigned long long) val, (unsigned long long) config1, | ||
1227 | (unsigned long long) | ||
1228 | ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus), | ||
1229 | (unsigned long long) | ||
1230 | ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); | ||
1231 | |||
1232 | /* force reset on */ | ||
1233 | val |= INFINIPATH_SERDC0_RESET_PLL | ||
1234 | /* | INFINIPATH_SERDC0_RESET_MASK */ | ||
1235 | ; | ||
1236 | ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); | ||
1237 | udelay(15); /* need pll reset set at least for a bit */ | ||
1238 | |||
1239 | if (val & INFINIPATH_SERDC0_RESET_PLL) { | ||
1240 | u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL; | ||
1241 | /* set lane resets, and tx idle, during pll reset */ | ||
1242 | val2 |= INFINIPATH_SERDC0_RESET_MASK | | ||
1243 | INFINIPATH_SERDC0_TXIDLE; | ||
1244 | ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing " | ||
1245 | "%llx)\n", (unsigned long long) val2); | ||
1246 | ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, | ||
1247 | val2); | ||
1248 | /* | ||
1249 | * be sure chip saw it | ||
1250 | */ | ||
1251 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); | ||
1252 | /* | ||
1253 | * need pll reset clear at least 11 usec before lane | ||
1254 | * resets cleared; give it a few more | ||
1255 | */ | ||
1256 | udelay(15); | ||
1257 | val = val2; /* for check below */ | ||
1258 | } | ||
1259 | |||
1260 | if (val & (INFINIPATH_SERDC0_RESET_PLL | | ||
1261 | INFINIPATH_SERDC0_RESET_MASK | | ||
1262 | INFINIPATH_SERDC0_TXIDLE)) { | ||
1263 | val &= ~(INFINIPATH_SERDC0_RESET_PLL | | ||
1264 | INFINIPATH_SERDC0_RESET_MASK | | ||
1265 | INFINIPATH_SERDC0_TXIDLE); | ||
1266 | /* clear them */ | ||
1267 | ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, | ||
1268 | val); | ||
1269 | } | ||
1270 | |||
1271 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); | ||
1272 | if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) & | ||
1273 | INFINIPATH_XGXS_MDIOADDR_MASK) != 3) { | ||
1274 | val &= ~(INFINIPATH_XGXS_MDIOADDR_MASK << | ||
1275 | INFINIPATH_XGXS_MDIOADDR_SHIFT); | ||
1276 | /* | ||
1277 | * we use address 3 | ||
1278 | */ | ||
1279 | val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT; | ||
1280 | change = 1; | ||
1281 | } | ||
1282 | if (val & INFINIPATH_XGXS_RESET) { | ||
1283 | /* normally true after boot */ | ||
1284 | val &= ~INFINIPATH_XGXS_RESET; | ||
1285 | change = 1; | ||
1286 | } | ||
1287 | if (change) | ||
1288 | ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); | ||
1289 | |||
1290 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); | ||
1291 | |||
1292 | /* clear current and de-emphasis bits */ | ||
1293 | config1 &= ~0x0ffffffff00ULL; | ||
1294 | /* set current to 20ma */ | ||
1295 | config1 |= 0x00000000000ULL; | ||
1296 | /* set de-emphasis to -5.68dB */ | ||
1297 | config1 |= 0x0cccc000000ULL; | ||
1298 | ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1); | ||
1299 | |||
1300 | ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx " | ||
1301 | "config1=%llx, sstatus=%llx xgxs %llx\n", | ||
1302 | (unsigned long long) val, (unsigned long long) config1, | ||
1303 | (unsigned long long) | ||
1304 | ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus), | ||
1305 | (unsigned long long) | ||
1306 | ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); | ||
1307 | |||
1308 | if (!ipath_waitfor_mdio_cmdready(dd)) { | ||
1309 | ipath_write_kreg(dd, dd->ipath_kregs->kr_mdio, | ||
1310 | ipath_mdio_req(IPATH_MDIO_CMD_READ, 31, | ||
1311 | IPATH_MDIO_CTRL_XGXS_REG_8, | ||
1312 | 0)); | ||
1313 | if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio, | ||
1314 | IPATH_MDIO_DATAVALID, &val)) | ||
1315 | ipath_dbg("Never got MDIO data for XGXS status " | ||
1316 | "read\n"); | ||
1317 | else | ||
1318 | ipath_cdbg(VERBOSE, "MDIO Read reg8, " | ||
1319 | "'bank' 31 %x\n", (u32) val); | ||
1320 | } else | ||
1321 | ipath_dbg("Never got MDIO cmdready for XGXS status read\n"); | ||
1322 | |||
1323 | return ret; /* for now, say we always succeeded */ | ||
1324 | } | ||
1325 | |||
1326 | /** | ||
1327 | * ipath_ht_quiet_serdes - set serdes to txidle | ||
1328 | * @dd: the infinipath device | ||
1329 | * driver is being unloaded | ||
1330 | */ | ||
1331 | static void ipath_ht_quiet_serdes(struct ipath_devdata *dd) | ||
1332 | { | ||
1333 | u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); | ||
1334 | |||
1335 | val |= INFINIPATH_SERDC0_TXIDLE; | ||
1336 | ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n", | ||
1337 | (unsigned long long) val); | ||
1338 | ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); | ||
1339 | } | ||
1340 | |||
1341 | static int ipath_ht_intconfig(struct ipath_devdata *dd) | ||
1342 | { | ||
1343 | int ret; | ||
1344 | |||
1345 | if (!dd->ipath_intconfig) { | ||
1346 | ipath_dev_err(dd, "No interrupts enabled, couldn't setup " | ||
1347 | "interrupt address\n"); | ||
1348 | ret = 1; | ||
1349 | goto bail; | ||
1350 | } | ||
1351 | |||
1352 | ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig, | ||
1353 | dd->ipath_intconfig); /* interrupt address */ | ||
1354 | ret = 0; | ||
1355 | |||
1356 | bail: | ||
1357 | return ret; | ||
1358 | } | ||
1359 | |||
1360 | /** | ||
1361 | * ipath_pe_put_tid - write a TID in chip | ||
1362 | * @dd: the infinipath device | ||
1363 | * @tidptr: pointer to the expected TID (in chip) to udpate | ||
1364 | * @tidtype: 0 for eager, 1 for expected | ||
1365 | * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing | ||
1366 | * | ||
1367 | * This exists as a separate routine to allow for special locking etc. | ||
1368 | * It's used for both the full cleanup on exit, as well as the normal | ||
1369 | * setup and teardown. | ||
1370 | */ | ||
1371 | static void ipath_ht_put_tid(struct ipath_devdata *dd, | ||
1372 | u64 __iomem *tidptr, u32 type, | ||
1373 | unsigned long pa) | ||
1374 | { | ||
1375 | if (pa != dd->ipath_tidinvalid) { | ||
1376 | if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { | ||
1377 | dev_info(&dd->pcidev->dev, | ||
1378 | "physaddr %lx has more than " | ||
1379 | "40 bits, using only 40!!!\n", pa); | ||
1380 | pa &= INFINIPATH_RT_ADDR_MASK; | ||
1381 | } | ||
1382 | if (type == 0) | ||
1383 | pa |= dd->ipath_tidtemplate; | ||
1384 | else { | ||
1385 | /* in words (fixed, full page). */ | ||
1386 | u64 lenvalid = PAGE_SIZE >> 2; | ||
1387 | lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT; | ||
1388 | pa |= lenvalid | INFINIPATH_RT_VALID; | ||
1389 | } | ||
1390 | } | ||
1391 | if (dd->ipath_kregbase) | ||
1392 | writeq(pa, tidptr); | ||
1393 | } | ||
1394 | |||
1395 | /** | ||
1396 | * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager | ||
1397 | * @dd: the infinipath device | ||
1398 | * @port: the port | ||
1399 | * | ||
1400 | * Used from ipath_close(), and at chip initialization. | ||
1401 | */ | ||
1402 | static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port) | ||
1403 | { | ||
1404 | u64 __iomem *tidbase; | ||
1405 | int i; | ||
1406 | |||
1407 | if (!dd->ipath_kregbase) | ||
1408 | return; | ||
1409 | |||
1410 | ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port); | ||
1411 | |||
1412 | /* | ||
1413 | * need to invalidate all of the expected TID entries for this | ||
1414 | * port, so we don't have valid entries that might somehow get | ||
1415 | * used (early in next use of this port, or through some bug) | ||
1416 | */ | ||
1417 | tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + | ||
1418 | dd->ipath_rcvtidbase + | ||
1419 | port * dd->ipath_rcvtidcnt * | ||
1420 | sizeof(*tidbase)); | ||
1421 | for (i = 0; i < dd->ipath_rcvtidcnt; i++) | ||
1422 | ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid); | ||
1423 | |||
1424 | tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + | ||
1425 | dd->ipath_rcvegrbase + | ||
1426 | port * dd->ipath_rcvegrcnt * | ||
1427 | sizeof(*tidbase)); | ||
1428 | |||
1429 | for (i = 0; i < dd->ipath_rcvegrcnt; i++) | ||
1430 | ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid); | ||
1431 | } | ||
1432 | |||
1433 | /** | ||
1434 | * ipath_ht_tidtemplate - setup constants for TID updates | ||
1435 | * @dd: the infinipath device | ||
1436 | * | ||
1437 | * We setup stuff that we use a lot, to avoid calculating each time | ||
1438 | */ | ||
1439 | static void ipath_ht_tidtemplate(struct ipath_devdata *dd) | ||
1440 | { | ||
1441 | dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2; | ||
1442 | dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT; | ||
1443 | dd->ipath_tidtemplate |= INFINIPATH_RT_VALID; | ||
1444 | |||
1445 | /* | ||
1446 | * work around chip errata bug 7358, by marking invalid tids | ||
1447 | * as having max length | ||
1448 | */ | ||
1449 | dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) << | ||
1450 | INFINIPATH_RT_BUFSIZE_SHIFT; | ||
1451 | } | ||
1452 | |||
1453 | static int ipath_ht_early_init(struct ipath_devdata *dd) | ||
1454 | { | ||
1455 | u32 __iomem *piobuf; | ||
1456 | u32 pioincr, val32, egrsize; | ||
1457 | int i; | ||
1458 | |||
1459 | /* | ||
1460 | * one cache line; long IB headers will spill over into received | ||
1461 | * buffer | ||
1462 | */ | ||
1463 | dd->ipath_rcvhdrentsize = 16; | ||
1464 | dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; | ||
1465 | |||
1466 | /* | ||
1467 | * For HT-400, we allocate a somewhat overly large eager buffer, | ||
1468 | * such that we can guarantee that we can receive the largest | ||
1469 | * packet that we can send out. To truly support a 4KB MTU, | ||
1470 | * we need to bump this to a large value. To date, other than | ||
1471 | * testing, we have never encountered an HCA that can really | ||
1472 | * send 4KB MTU packets, so we do not handle that (we'll get | ||
1473 | * errors interrupts if we ever see one). | ||
1474 | */ | ||
1475 | dd->ipath_rcvegrbufsize = dd->ipath_piosize2k; | ||
1476 | egrsize = dd->ipath_rcvegrbufsize; | ||
1477 | |||
1478 | /* | ||
1479 | * the min() check here is currently a nop, but it may not | ||
1480 | * always be, depending on just how we do ipath_rcvegrbufsize | ||
1481 | */ | ||
1482 | dd->ipath_ibmaxlen = min(dd->ipath_piosize2k, | ||
1483 | dd->ipath_rcvegrbufsize); | ||
1484 | dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; | ||
1485 | ipath_ht_tidtemplate(dd); | ||
1486 | |||
1487 | /* | ||
1488 | * zero all the TID entries at startup. We do this for sanity, | ||
1489 | * in case of a previous driver crash of some kind, and also | ||
1490 | * because the chip powers up with these memories in an unknown | ||
1491 | * state. Use portcnt, not cfgports, since this is for the | ||
1492 | * full chip, not for current (possibly different) configuration | ||
1493 | * value. | ||
1494 | * Chip Errata bug 6447 | ||
1495 | */ | ||
1496 | for (val32 = 0; val32 < dd->ipath_portcnt; val32++) | ||
1497 | ipath_ht_clear_tids(dd, val32); | ||
1498 | |||
1499 | /* | ||
1500 | * write the pbc of each buffer, to be sure it's initialized, then | ||
1501 | * cancel all the buffers, and also abort any packets that might | ||
1502 | * have been in flight for some reason (the latter is for driver | ||
1503 | * unload/reload, but isn't a bad idea at first init). PIO send | ||
1504 | * isn't enabled at this point, so there is no danger of sending | ||
1505 | * these out on the wire. | ||
1506 | * Chip Errata bug 6610 | ||
1507 | */ | ||
1508 | piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) + | ||
1509 | dd->ipath_piobufbase); | ||
1510 | pioincr = dd->ipath_palign / sizeof(*piobuf); | ||
1511 | for (i = 0; i < dd->ipath_piobcnt2k; i++) { | ||
1512 | /* | ||
1513 | * reasonable word count, just to init pbc | ||
1514 | */ | ||
1515 | writel(16, piobuf); | ||
1516 | piobuf += pioincr; | ||
1517 | } | ||
1518 | /* | ||
1519 | * self-clearing | ||
1520 | */ | ||
1521 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
1522 | INFINIPATH_S_ABORT); | ||
1523 | return 0; | ||
1524 | } | ||
1525 | |||
1526 | /** | ||
1527 | * ipath_init_ht_get_base_info - set chip-specific flags for user code | ||
1528 | * @dd: the infinipath device | ||
1529 | * @kbase: ipath_base_info pointer | ||
1530 | * | ||
1531 | * We set the PCIE flag because the lower bandwidth on PCIe vs | ||
1532 | * HyperTransport can affect some user packet algorithims. | ||
1533 | */ | ||
1534 | static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase) | ||
1535 | { | ||
1536 | struct ipath_base_info *kinfo = kbase; | ||
1537 | |||
1538 | kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT | | ||
1539 | IPATH_RUNTIME_RCVHDR_COPY; | ||
1540 | |||
1541 | return 0; | ||
1542 | } | ||
1543 | |||
1544 | /** | ||
1545 | * ipath_init_ht400_funcs - set up the chip-specific function pointers | ||
1546 | * @dd: the infinipath device | ||
1547 | * | ||
1548 | * This is global, and is called directly at init to set up the | ||
1549 | * chip-specific function pointers for later use. | ||
1550 | */ | ||
1551 | void ipath_init_ht400_funcs(struct ipath_devdata *dd) | ||
1552 | { | ||
1553 | dd->ipath_f_intrsetup = ipath_ht_intconfig; | ||
1554 | dd->ipath_f_bus = ipath_setup_ht_config; | ||
1555 | dd->ipath_f_reset = ipath_setup_ht_reset; | ||
1556 | dd->ipath_f_get_boardname = ipath_ht_boardname; | ||
1557 | dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors; | ||
1558 | dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors; | ||
1559 | dd->ipath_f_early_init = ipath_ht_early_init; | ||
1560 | dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors; | ||
1561 | dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes; | ||
1562 | dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes; | ||
1563 | dd->ipath_f_clear_tids = ipath_ht_clear_tids; | ||
1564 | dd->ipath_f_put_tid = ipath_ht_put_tid; | ||
1565 | dd->ipath_f_cleanup = ipath_setup_ht_cleanup; | ||
1566 | dd->ipath_f_setextled = ipath_setup_ht_setextled; | ||
1567 | dd->ipath_f_get_base_info = ipath_ht_get_base_info; | ||
1568 | |||
1569 | /* | ||
1570 | * initialize chip-specific variables | ||
1571 | */ | ||
1572 | dd->ipath_f_tidtemplate = ipath_ht_tidtemplate; | ||
1573 | |||
1574 | /* | ||
1575 | * setup the register offsets, since they are different for each | ||
1576 | * chip | ||
1577 | */ | ||
1578 | dd->ipath_kregs = &ipath_ht_kregs; | ||
1579 | dd->ipath_cregs = &ipath_ht_cregs; | ||
1580 | |||
1581 | /* | ||
1582 | * do very early init that is needed before ipath_f_bus is | ||
1583 | * called | ||
1584 | */ | ||
1585 | ipath_init_ht_variables(); | ||
1586 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c new file mode 100644 index 000000000000..2823ff9c0c62 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c | |||
@@ -0,0 +1,951 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/pci.h> | ||
34 | #include <linux/netdevice.h> | ||
35 | #include <linux/vmalloc.h> | ||
36 | |||
37 | #include "ipath_kernel.h" | ||
38 | #include "ips_common.h" | ||
39 | |||
40 | /* | ||
41 | * min buffers we want to have per port, after driver | ||
42 | */ | ||
43 | #define IPATH_MIN_USER_PORT_BUFCNT 8 | ||
44 | |||
45 | /* | ||
46 | * Number of ports we are configured to use (to allow for more pio | ||
47 | * buffers per port, etc.) Zero means use chip value. | ||
48 | */ | ||
49 | static ushort ipath_cfgports; | ||
50 | |||
51 | module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO); | ||
52 | MODULE_PARM_DESC(cfgports, "Set max number of ports to use"); | ||
53 | |||
54 | /* | ||
55 | * Number of buffers reserved for driver (layered drivers and SMA | ||
56 | * send). Reserved at end of buffer list. | ||
57 | */ | ||
58 | static ushort ipath_kpiobufs = 32; | ||
59 | |||
60 | static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp); | ||
61 | |||
62 | module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_uint, | ||
63 | &ipath_kpiobufs, S_IWUSR | S_IRUGO); | ||
64 | MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver"); | ||
65 | |||
66 | /** | ||
67 | * create_port0_egr - allocate the eager TID buffers | ||
68 | * @dd: the infinipath device | ||
69 | * | ||
70 | * This code is now quite different for user and kernel, because | ||
71 | * the kernel uses skb's, for the accelerated network performance. | ||
72 | * This is the kernel (port0) version. | ||
73 | * | ||
74 | * Allocate the eager TID buffers and program them into infinipath. | ||
75 | * We use the network layer alloc_skb() allocator to allocate the | ||
76 | * memory, and either use the buffers as is for things like SMA | ||
77 | * packets, or pass the buffers up to the ipath layered driver and | ||
78 | * thence the network layer, replacing them as we do so (see | ||
79 | * ipath_rcv_layer()). | ||
80 | */ | ||
81 | static int create_port0_egr(struct ipath_devdata *dd) | ||
82 | { | ||
83 | unsigned e, egrcnt; | ||
84 | struct sk_buff **skbs; | ||
85 | int ret; | ||
86 | |||
87 | egrcnt = dd->ipath_rcvegrcnt; | ||
88 | |||
89 | skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt); | ||
90 | if (skbs == NULL) { | ||
91 | ipath_dev_err(dd, "allocation error for eager TID " | ||
92 | "skb array\n"); | ||
93 | ret = -ENOMEM; | ||
94 | goto bail; | ||
95 | } | ||
96 | for (e = 0; e < egrcnt; e++) { | ||
97 | /* | ||
98 | * This is a bit tricky in that we allocate extra | ||
99 | * space for 2 bytes of the 14 byte ethernet header. | ||
100 | * These two bytes are passed in the ipath header so | ||
101 | * the rest of the data is word aligned. We allocate | ||
102 | * 4 bytes so that the data buffer stays word aligned. | ||
103 | * See ipath_kreceive() for more details. | ||
104 | */ | ||
105 | skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL); | ||
106 | if (!skbs[e]) { | ||
107 | ipath_dev_err(dd, "SKB allocation error for " | ||
108 | "eager TID %u\n", e); | ||
109 | while (e != 0) | ||
110 | dev_kfree_skb(skbs[--e]); | ||
111 | ret = -ENOMEM; | ||
112 | goto bail; | ||
113 | } | ||
114 | } | ||
115 | /* | ||
116 | * After loop above, so we can test non-NULL to see if ready | ||
117 | * to use at receive, etc. | ||
118 | */ | ||
119 | dd->ipath_port0_skbs = skbs; | ||
120 | |||
121 | for (e = 0; e < egrcnt; e++) { | ||
122 | unsigned long phys = | ||
123 | virt_to_phys(dd->ipath_port0_skbs[e]->data); | ||
124 | dd->ipath_f_put_tid(dd, e + (u64 __iomem *) | ||
125 | ((char __iomem *) dd->ipath_kregbase + | ||
126 | dd->ipath_rcvegrbase), 0, phys); | ||
127 | } | ||
128 | |||
129 | ret = 0; | ||
130 | |||
131 | bail: | ||
132 | return ret; | ||
133 | } | ||
134 | |||
135 | static int bringup_link(struct ipath_devdata *dd) | ||
136 | { | ||
137 | u64 val, ibc; | ||
138 | int ret = 0; | ||
139 | |||
140 | /* hold IBC in reset */ | ||
141 | dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; | ||
142 | ipath_write_kreg(dd, dd->ipath_kregs->kr_control, | ||
143 | dd->ipath_control); | ||
144 | |||
145 | /* | ||
146 | * Note that prior to try 14 or 15 of IB, the credit scaling | ||
147 | * wasn't working, because it was swapped for writes with the | ||
148 | * 1 bit default linkstate field | ||
149 | */ | ||
150 | |||
151 | /* ignore pbc and align word */ | ||
152 | val = dd->ipath_piosize2k - 2 * sizeof(u32); | ||
153 | /* | ||
154 | * for ICRC, which we only send in diag test pkt mode, and we | ||
155 | * don't need to worry about that for mtu | ||
156 | */ | ||
157 | val += 1; | ||
158 | /* | ||
159 | * Set the IBC maxpktlength to the size of our pio buffers the | ||
160 | * maxpktlength is in words. This is *not* the IB data MTU. | ||
161 | */ | ||
162 | ibc = (val / sizeof(u32)) << INFINIPATH_IBCC_MAXPKTLEN_SHIFT; | ||
163 | /* in KB */ | ||
164 | ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT; | ||
165 | /* | ||
166 | * How often flowctrl sent. More or less in usecs; balance against | ||
167 | * watermark value, so that in theory senders always get a flow | ||
168 | * control update in time to not let the IB link go idle. | ||
169 | */ | ||
170 | ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT; | ||
171 | /* max error tolerance */ | ||
172 | ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; | ||
173 | /* use "real" buffer space for */ | ||
174 | ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT; | ||
175 | /* IB credit flow control. */ | ||
176 | ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; | ||
177 | /* initially come up waiting for TS1, without sending anything. */ | ||
178 | dd->ipath_ibcctrl = ibc; | ||
179 | /* | ||
180 | * Want to start out with both LINKCMD and LINKINITCMD in NOP | ||
181 | * (0 and 0). Don't put linkinitcmd in ipath_ibcctrl, want that | ||
182 | * to stay a NOP | ||
183 | */ | ||
184 | ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE << | ||
185 | INFINIPATH_IBCC_LINKINITCMD_SHIFT; | ||
186 | ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n", | ||
187 | (unsigned long long) ibc); | ||
188 | ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc); | ||
189 | |||
190 | // be sure chip saw it | ||
191 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); | ||
192 | |||
193 | ret = dd->ipath_f_bringup_serdes(dd); | ||
194 | |||
195 | if (ret) | ||
196 | dev_info(&dd->pcidev->dev, "Could not initialize SerDes, " | ||
197 | "not usable\n"); | ||
198 | else { | ||
199 | /* enable IBC */ | ||
200 | dd->ipath_control |= INFINIPATH_C_LINKENABLE; | ||
201 | ipath_write_kreg(dd, dd->ipath_kregs->kr_control, | ||
202 | dd->ipath_control); | ||
203 | } | ||
204 | |||
205 | return ret; | ||
206 | } | ||
207 | |||
208 | static int init_chip_first(struct ipath_devdata *dd, | ||
209 | struct ipath_portdata **pdp) | ||
210 | { | ||
211 | struct ipath_portdata *pd = NULL; | ||
212 | int ret = 0; | ||
213 | u64 val; | ||
214 | |||
215 | /* | ||
216 | * skip cfgports stuff because we are not allocating memory, | ||
217 | * and we don't want problems if the portcnt changed due to | ||
218 | * cfgports. We do still check and report a difference, if | ||
219 | * not same (should be impossible). | ||
220 | */ | ||
221 | dd->ipath_portcnt = | ||
222 | ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); | ||
223 | if (!ipath_cfgports) | ||
224 | dd->ipath_cfgports = dd->ipath_portcnt; | ||
225 | else if (ipath_cfgports <= dd->ipath_portcnt) { | ||
226 | dd->ipath_cfgports = ipath_cfgports; | ||
227 | ipath_dbg("Configured to use %u ports out of %u in chip\n", | ||
228 | dd->ipath_cfgports, dd->ipath_portcnt); | ||
229 | } else { | ||
230 | dd->ipath_cfgports = dd->ipath_portcnt; | ||
231 | ipath_dbg("Tried to configured to use %u ports; chip " | ||
232 | "only supports %u\n", ipath_cfgports, | ||
233 | dd->ipath_portcnt); | ||
234 | } | ||
235 | dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports, | ||
236 | GFP_KERNEL); | ||
237 | |||
238 | if (!dd->ipath_pd) { | ||
239 | ipath_dev_err(dd, "Unable to allocate portdata array, " | ||
240 | "failing\n"); | ||
241 | ret = -ENOMEM; | ||
242 | goto done; | ||
243 | } | ||
244 | |||
245 | dd->ipath_lastegrheads = kzalloc(sizeof(*dd->ipath_lastegrheads) | ||
246 | * dd->ipath_cfgports, | ||
247 | GFP_KERNEL); | ||
248 | dd->ipath_lastrcvhdrqtails = | ||
249 | kzalloc(sizeof(*dd->ipath_lastrcvhdrqtails) | ||
250 | * dd->ipath_cfgports, GFP_KERNEL); | ||
251 | |||
252 | if (!dd->ipath_lastegrheads || !dd->ipath_lastrcvhdrqtails) { | ||
253 | ipath_dev_err(dd, "Unable to allocate head arrays, " | ||
254 | "failing\n"); | ||
255 | ret = -ENOMEM; | ||
256 | goto done; | ||
257 | } | ||
258 | |||
259 | dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL); | ||
260 | |||
261 | if (!dd->ipath_pd[0]) { | ||
262 | ipath_dev_err(dd, "Unable to allocate portdata for port " | ||
263 | "0, failing\n"); | ||
264 | ret = -ENOMEM; | ||
265 | goto done; | ||
266 | } | ||
267 | pd = dd->ipath_pd[0]; | ||
268 | pd->port_dd = dd; | ||
269 | pd->port_port = 0; | ||
270 | pd->port_cnt = 1; | ||
271 | /* The port 0 pkey table is used by the layer interface. */ | ||
272 | pd->port_pkeys[0] = IPS_DEFAULT_P_KEY; | ||
273 | dd->ipath_rcvtidcnt = | ||
274 | ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); | ||
275 | dd->ipath_rcvtidbase = | ||
276 | ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase); | ||
277 | dd->ipath_rcvegrcnt = | ||
278 | ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); | ||
279 | dd->ipath_rcvegrbase = | ||
280 | ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase); | ||
281 | dd->ipath_palign = | ||
282 | ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); | ||
283 | dd->ipath_piobufbase = | ||
284 | ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase); | ||
285 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize); | ||
286 | dd->ipath_piosize2k = val & ~0U; | ||
287 | dd->ipath_piosize4k = val >> 32; | ||
288 | dd->ipath_ibmtu = 4096; /* default to largest legal MTU */ | ||
289 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt); | ||
290 | dd->ipath_piobcnt2k = val & ~0U; | ||
291 | dd->ipath_piobcnt4k = val >> 32; | ||
292 | dd->ipath_pio2kbase = | ||
293 | (u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) + | ||
294 | (dd->ipath_piobufbase & 0xffffffff)); | ||
295 | if (dd->ipath_piobcnt4k) { | ||
296 | dd->ipath_pio4kbase = (u32 __iomem *) | ||
297 | (((char __iomem *) dd->ipath_kregbase) + | ||
298 | (dd->ipath_piobufbase >> 32)); | ||
299 | /* | ||
300 | * 4K buffers take 2 pages; we use roundup just to be | ||
301 | * paranoid; we calculate it once here, rather than on | ||
302 | * ever buf allocate | ||
303 | */ | ||
304 | dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k, | ||
305 | dd->ipath_palign); | ||
306 | ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p " | ||
307 | "(%x aligned)\n", | ||
308 | dd->ipath_piobcnt2k, dd->ipath_piosize2k, | ||
309 | dd->ipath_pio2kbase, dd->ipath_piobcnt4k, | ||
310 | dd->ipath_piosize4k, dd->ipath_pio4kbase, | ||
311 | dd->ipath_4kalign); | ||
312 | } | ||
313 | else ipath_dbg("%u 2k piobufs @ %p\n", | ||
314 | dd->ipath_piobcnt2k, dd->ipath_pio2kbase); | ||
315 | |||
316 | spin_lock_init(&dd->ipath_tid_lock); | ||
317 | |||
318 | done: | ||
319 | *pdp = pd; | ||
320 | return ret; | ||
321 | } | ||
322 | |||
323 | /** | ||
324 | * init_chip_reset - re-initialize after a reset, or enable | ||
325 | * @dd: the infinipath device | ||
326 | * @pdp: output for port data | ||
327 | * | ||
328 | * sanity check at least some of the values after reset, and | ||
329 | * ensure no receive or transmit (explictly, in case reset | ||
330 | * failed | ||
331 | */ | ||
332 | static int init_chip_reset(struct ipath_devdata *dd, | ||
333 | struct ipath_portdata **pdp) | ||
334 | { | ||
335 | struct ipath_portdata *pd; | ||
336 | u32 rtmp; | ||
337 | |||
338 | *pdp = pd = dd->ipath_pd[0]; | ||
339 | /* ensure chip does no sends or receives while we re-initialize */ | ||
340 | dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U; | ||
341 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 0); | ||
342 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0); | ||
343 | ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0); | ||
344 | |||
345 | rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); | ||
346 | if (dd->ipath_portcnt != rtmp) | ||
347 | dev_info(&dd->pcidev->dev, "portcnt was %u before " | ||
348 | "reset, now %u, using original\n", | ||
349 | dd->ipath_portcnt, rtmp); | ||
350 | rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt); | ||
351 | if (rtmp != dd->ipath_rcvtidcnt) | ||
352 | dev_info(&dd->pcidev->dev, "tidcnt was %u before " | ||
353 | "reset, now %u, using original\n", | ||
354 | dd->ipath_rcvtidcnt, rtmp); | ||
355 | rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase); | ||
356 | if (rtmp != dd->ipath_rcvtidbase) | ||
357 | dev_info(&dd->pcidev->dev, "tidbase was %u before " | ||
358 | "reset, now %u, using original\n", | ||
359 | dd->ipath_rcvtidbase, rtmp); | ||
360 | rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); | ||
361 | if (rtmp != dd->ipath_rcvegrcnt) | ||
362 | dev_info(&dd->pcidev->dev, "egrcnt was %u before " | ||
363 | "reset, now %u, using original\n", | ||
364 | dd->ipath_rcvegrcnt, rtmp); | ||
365 | rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase); | ||
366 | if (rtmp != dd->ipath_rcvegrbase) | ||
367 | dev_info(&dd->pcidev->dev, "egrbase was %u before " | ||
368 | "reset, now %u, using original\n", | ||
369 | dd->ipath_rcvegrbase, rtmp); | ||
370 | |||
371 | return 0; | ||
372 | } | ||
373 | |||
374 | static int init_pioavailregs(struct ipath_devdata *dd) | ||
375 | { | ||
376 | int ret; | ||
377 | |||
378 | dd->ipath_pioavailregs_dma = dma_alloc_coherent( | ||
379 | &dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys, | ||
380 | GFP_KERNEL); | ||
381 | if (!dd->ipath_pioavailregs_dma) { | ||
382 | ipath_dev_err(dd, "failed to allocate PIOavail reg area " | ||
383 | "in memory\n"); | ||
384 | ret = -ENOMEM; | ||
385 | goto done; | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * we really want L2 cache aligned, but for current CPUs of | ||
390 | * interest, they are the same. | ||
391 | */ | ||
392 | dd->ipath_statusp = (u64 *) | ||
393 | ((char *)dd->ipath_pioavailregs_dma + | ||
394 | ((2 * L1_CACHE_BYTES + | ||
395 | dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); | ||
396 | /* copy the current value now that it's really allocated */ | ||
397 | *dd->ipath_statusp = dd->_ipath_status; | ||
398 | /* | ||
399 | * setup buffer to hold freeze msg, accessible to apps, | ||
400 | * following statusp | ||
401 | */ | ||
402 | dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1]; | ||
403 | /* and its length */ | ||
404 | dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]); | ||
405 | |||
406 | if (dd->ipath_unit * 64 > (IPATH_PORT0_RCVHDRTAIL_SIZE - 64)) { | ||
407 | ipath_dev_err(dd, "unit %u too large for port 0 " | ||
408 | "rcvhdrtail buffer size\n", dd->ipath_unit); | ||
409 | ret = -ENODEV; | ||
410 | } | ||
411 | else | ||
412 | ret = 0; | ||
413 | |||
414 | /* so we can get current tail in ipath_kreceive(), per chip */ | ||
415 | dd->ipath_hdrqtailptr = &ipath_port0_rcvhdrtail[ | ||
416 | dd->ipath_unit * (64 / sizeof(*ipath_port0_rcvhdrtail))]; | ||
417 | done: | ||
418 | return ret; | ||
419 | } | ||
420 | |||
421 | /** | ||
422 | * init_shadow_tids - allocate the shadow TID array | ||
423 | * @dd: the infinipath device | ||
424 | * | ||
425 | * allocate the shadow TID array, so we can ipath_munlock previous | ||
426 | * entries. It may make more sense to move the pageshadow to the | ||
427 | * port data structure, so we only allocate memory for ports actually | ||
428 | * in use, since we at 8k per port, now. | ||
429 | */ | ||
430 | static void init_shadow_tids(struct ipath_devdata *dd) | ||
431 | { | ||
432 | dd->ipath_pageshadow = (struct page **) | ||
433 | vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * | ||
434 | sizeof(struct page *)); | ||
435 | if (!dd->ipath_pageshadow) | ||
436 | ipath_dev_err(dd, "failed to allocate shadow page * " | ||
437 | "array, no expected sends!\n"); | ||
438 | else | ||
439 | memset(dd->ipath_pageshadow, 0, | ||
440 | dd->ipath_cfgports * dd->ipath_rcvtidcnt * | ||
441 | sizeof(struct page *)); | ||
442 | } | ||
443 | |||
444 | static void enable_chip(struct ipath_devdata *dd, | ||
445 | struct ipath_portdata *pd, int reinit) | ||
446 | { | ||
447 | u32 val; | ||
448 | int i; | ||
449 | |||
450 | if (!reinit) { | ||
451 | init_waitqueue_head(&ipath_sma_state_wait); | ||
452 | } | ||
453 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, | ||
454 | dd->ipath_rcvctrl); | ||
455 | |||
456 | /* Enable PIO send, and update of PIOavail regs to memory. */ | ||
457 | dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE | | ||
458 | INFINIPATH_S_PIOBUFAVAILUPD; | ||
459 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
460 | dd->ipath_sendctrl); | ||
461 | |||
462 | /* | ||
463 | * enable port 0 receive, and receive interrupt. other ports | ||
464 | * done as user opens and inits them. | ||
465 | */ | ||
466 | dd->ipath_rcvctrl = INFINIPATH_R_TAILUPD | | ||
467 | (1ULL << INFINIPATH_R_PORTENABLE_SHIFT) | | ||
468 | (1ULL << INFINIPATH_R_INTRAVAIL_SHIFT); | ||
469 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, | ||
470 | dd->ipath_rcvctrl); | ||
471 | |||
472 | /* | ||
473 | * now ready for use. this should be cleared whenever we | ||
474 | * detect a reset, or initiate one. | ||
475 | */ | ||
476 | dd->ipath_flags |= IPATH_INITTED; | ||
477 | |||
478 | /* | ||
479 | * init our shadow copies of head from tail values, and write | ||
480 | * head values to match. | ||
481 | */ | ||
482 | val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0); | ||
483 | (void)ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0); | ||
484 | dd->ipath_port0head = ipath_read_ureg32(dd, ur_rcvhdrtail, 0); | ||
485 | |||
486 | /* Initialize so we interrupt on next packet received */ | ||
487 | (void)ipath_write_ureg(dd, ur_rcvhdrhead, | ||
488 | dd->ipath_rhdrhead_intr_off | | ||
489 | dd->ipath_port0head, 0); | ||
490 | |||
491 | /* | ||
492 | * by now pioavail updates to memory should have occurred, so | ||
493 | * copy them into our working/shadow registers; this is in | ||
494 | * case something went wrong with abort, but mostly to get the | ||
495 | * initial values of the generation bit correct. | ||
496 | */ | ||
497 | for (i = 0; i < dd->ipath_pioavregs; i++) { | ||
498 | __le64 val; | ||
499 | |||
500 | /* | ||
501 | * Chip Errata bug 6641; even and odd qwords>3 are swapped. | ||
502 | */ | ||
503 | if (i > 3) { | ||
504 | if (i & 1) | ||
505 | val = dd->ipath_pioavailregs_dma[i - 1]; | ||
506 | else | ||
507 | val = dd->ipath_pioavailregs_dma[i + 1]; | ||
508 | } | ||
509 | else | ||
510 | val = dd->ipath_pioavailregs_dma[i]; | ||
511 | dd->ipath_pioavailshadow[i] = le64_to_cpu(val); | ||
512 | } | ||
513 | /* can get counters, stats, etc. */ | ||
514 | dd->ipath_flags |= IPATH_PRESENT; | ||
515 | } | ||
516 | |||
517 | static int init_housekeeping(struct ipath_devdata *dd, | ||
518 | struct ipath_portdata **pdp, int reinit) | ||
519 | { | ||
520 | char boardn[32]; | ||
521 | int ret = 0; | ||
522 | |||
523 | /* | ||
524 | * have to clear shadow copies of registers at init that are | ||
525 | * not otherwise set here, or all kinds of bizarre things | ||
526 | * happen with driver on chip reset | ||
527 | */ | ||
528 | dd->ipath_rcvhdrsize = 0; | ||
529 | |||
530 | /* | ||
531 | * Don't clear ipath_flags as 8bit mode was set before | ||
532 | * entering this func. However, we do set the linkstate to | ||
533 | * unknown, so we can watch for a transition. | ||
534 | */ | ||
535 | dd->ipath_flags |= IPATH_LINKUNK; | ||
536 | dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED | | ||
537 | IPATH_LINKDOWN | IPATH_LINKINIT); | ||
538 | |||
539 | ipath_cdbg(VERBOSE, "Try to read spc chip revision\n"); | ||
540 | dd->ipath_revision = | ||
541 | ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision); | ||
542 | |||
543 | /* | ||
544 | * set up fundamental info we need to use the chip; we assume | ||
545 | * if the revision reg and these regs are OK, we don't need to | ||
546 | * special case the rest | ||
547 | */ | ||
548 | dd->ipath_sregbase = | ||
549 | ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase); | ||
550 | dd->ipath_cregbase = | ||
551 | ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase); | ||
552 | dd->ipath_uregbase = | ||
553 | ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase); | ||
554 | ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, " | ||
555 | "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase, | ||
556 | dd->ipath_uregbase, dd->ipath_cregbase); | ||
557 | if ((dd->ipath_revision & 0xffffffff) == 0xffffffff | ||
558 | || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff | ||
559 | || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff | ||
560 | || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) { | ||
561 | ipath_dev_err(dd, "Register read failures from chip, " | ||
562 | "giving up initialization\n"); | ||
563 | ret = -ENODEV; | ||
564 | goto done; | ||
565 | } | ||
566 | |||
567 | /* clear the initial reset flag, in case first driver load */ | ||
568 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, | ||
569 | INFINIPATH_E_RESET); | ||
570 | |||
571 | if (reinit) | ||
572 | ret = init_chip_reset(dd, pdp); | ||
573 | else | ||
574 | ret = init_chip_first(dd, pdp); | ||
575 | |||
576 | if (ret) | ||
577 | goto done; | ||
578 | |||
579 | ipath_cdbg(VERBOSE, "Revision %llx (PCI %x), %u ports, %u tids, " | ||
580 | "%u egrtids\n", (unsigned long long) dd->ipath_revision, | ||
581 | dd->ipath_pcirev, dd->ipath_portcnt, dd->ipath_rcvtidcnt, | ||
582 | dd->ipath_rcvegrcnt); | ||
583 | |||
584 | if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) & | ||
585 | INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) { | ||
586 | ipath_dev_err(dd, "Driver only handles version %d, " | ||
587 | "chip swversion is %d (%llx), failng\n", | ||
588 | IPATH_CHIP_SWVERSION, | ||
589 | (int)(dd->ipath_revision >> | ||
590 | INFINIPATH_R_SOFTWARE_SHIFT) & | ||
591 | INFINIPATH_R_SOFTWARE_MASK, | ||
592 | (unsigned long long) dd->ipath_revision); | ||
593 | ret = -ENOSYS; | ||
594 | goto done; | ||
595 | } | ||
596 | dd->ipath_majrev = (u8) ((dd->ipath_revision >> | ||
597 | INFINIPATH_R_CHIPREVMAJOR_SHIFT) & | ||
598 | INFINIPATH_R_CHIPREVMAJOR_MASK); | ||
599 | dd->ipath_minrev = (u8) ((dd->ipath_revision >> | ||
600 | INFINIPATH_R_CHIPREVMINOR_SHIFT) & | ||
601 | INFINIPATH_R_CHIPREVMINOR_MASK); | ||
602 | dd->ipath_boardrev = (u8) ((dd->ipath_revision >> | ||
603 | INFINIPATH_R_BOARDID_SHIFT) & | ||
604 | INFINIPATH_R_BOARDID_MASK); | ||
605 | |||
606 | ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn); | ||
607 | |||
608 | snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion), | ||
609 | "Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, " | ||
610 | "SW Compat %u\n", | ||
611 | IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn, | ||
612 | (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) & | ||
613 | INFINIPATH_R_ARCH_MASK, | ||
614 | dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev, | ||
615 | (unsigned)(dd->ipath_revision >> | ||
616 | INFINIPATH_R_SOFTWARE_SHIFT) & | ||
617 | INFINIPATH_R_SOFTWARE_MASK); | ||
618 | |||
619 | ipath_dbg("%s", dd->ipath_boardversion); | ||
620 | |||
621 | done: | ||
622 | return ret; | ||
623 | } | ||
624 | |||
625 | |||
626 | /** | ||
627 | * ipath_init_chip - do the actual initialization sequence on the chip | ||
628 | * @dd: the infinipath device | ||
629 | * @reinit: reinitializing, so don't allocate new memory | ||
630 | * | ||
631 | * Do the actual initialization sequence on the chip. This is done | ||
632 | * both from the init routine called from the PCI infrastructure, and | ||
633 | * when we reset the chip, or detect that it was reset internally, | ||
634 | * or it's administratively re-enabled. | ||
635 | * | ||
636 | * Memory allocation here and in called routines is only done in | ||
637 | * the first case (reinit == 0). We have to be careful, because even | ||
638 | * without memory allocation, we need to re-write all the chip registers | ||
639 | * TIDs, etc. after the reset or enable has completed. | ||
640 | */ | ||
641 | int ipath_init_chip(struct ipath_devdata *dd, int reinit) | ||
642 | { | ||
643 | int ret = 0, i; | ||
644 | u32 val32, kpiobufs; | ||
645 | u64 val, atmp; | ||
646 | struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ | ||
647 | |||
648 | ret = init_housekeeping(dd, &pd, reinit); | ||
649 | if (ret) | ||
650 | goto done; | ||
651 | |||
652 | /* | ||
653 | * we ignore most issues after reporting them, but have to specially | ||
654 | * handle hardware-disabled chips. | ||
655 | */ | ||
656 | if (ret == 2) { | ||
657 | /* unique error, known to ipath_init_one */ | ||
658 | ret = -EPERM; | ||
659 | goto done; | ||
660 | } | ||
661 | |||
662 | /* | ||
663 | * We could bump this to allow for full rcvegrcnt + rcvtidcnt, | ||
664 | * but then it no longer nicely fits power of two, and since | ||
665 | * we now use routines that backend onto __get_free_pages, the | ||
666 | * rest would be wasted. | ||
667 | */ | ||
668 | dd->ipath_rcvhdrcnt = dd->ipath_rcvegrcnt; | ||
669 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt, | ||
670 | dd->ipath_rcvhdrcnt); | ||
671 | |||
672 | /* | ||
673 | * Set up the shadow copies of the piobufavail registers, | ||
674 | * which we compare against the chip registers for now, and | ||
675 | * the in memory DMA'ed copies of the registers. This has to | ||
676 | * be done early, before we calculate lastport, etc. | ||
677 | */ | ||
678 | val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; | ||
679 | /* | ||
680 | * calc number of pioavail registers, and save it; we have 2 | ||
681 | * bits per buffer. | ||
682 | */ | ||
683 | dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2) | ||
684 | / (sizeof(u64) * BITS_PER_BYTE / 2); | ||
685 | if (!ipath_kpiobufs) /* have to have at least 1, for SMA */ | ||
686 | kpiobufs = ipath_kpiobufs = 1; | ||
687 | else if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) < | ||
688 | (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT)) { | ||
689 | dev_info(&dd->pcidev->dev, "Too few PIO buffers (%u) " | ||
690 | "for %u ports to have %u each!\n", | ||
691 | dd->ipath_piobcnt2k + dd->ipath_piobcnt4k, | ||
692 | dd->ipath_cfgports, IPATH_MIN_USER_PORT_BUFCNT); | ||
693 | kpiobufs = 1; /* reserve just the minimum for SMA/ether */ | ||
694 | } else | ||
695 | kpiobufs = ipath_kpiobufs; | ||
696 | |||
697 | if (kpiobufs > | ||
698 | (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - | ||
699 | (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) { | ||
700 | i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - | ||
701 | (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT); | ||
702 | if (i < 0) | ||
703 | i = 0; | ||
704 | dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for " | ||
705 | "kernel leaves too few for %d user ports " | ||
706 | "(%d each); using %u\n", kpiobufs, | ||
707 | dd->ipath_cfgports - 1, | ||
708 | IPATH_MIN_USER_PORT_BUFCNT, i); | ||
709 | /* | ||
710 | * shouldn't change ipath_kpiobufs, because could be | ||
711 | * different for different devices... | ||
712 | */ | ||
713 | kpiobufs = i; | ||
714 | } | ||
715 | dd->ipath_lastport_piobuf = | ||
716 | dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs; | ||
717 | dd->ipath_pbufsport = dd->ipath_cfgports > 1 | ||
718 | ? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1) | ||
719 | : 0; | ||
720 | val32 = dd->ipath_lastport_piobuf - | ||
721 | (dd->ipath_pbufsport * (dd->ipath_cfgports - 1)); | ||
722 | if (val32 > 0) { | ||
723 | ipath_dbg("allocating %u pbufs/port leaves %u unused, " | ||
724 | "add to kernel\n", dd->ipath_pbufsport, val32); | ||
725 | dd->ipath_lastport_piobuf -= val32; | ||
726 | ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n", | ||
727 | dd->ipath_pbufsport, val32); | ||
728 | } | ||
729 | dd->ipath_lastpioindex = dd->ipath_lastport_piobuf; | ||
730 | ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " | ||
731 | "each for %u user ports\n", kpiobufs, | ||
732 | dd->ipath_piobcnt2k + dd->ipath_piobcnt4k, | ||
733 | dd->ipath_pbufsport, dd->ipath_cfgports - 1); | ||
734 | |||
735 | dd->ipath_f_early_init(dd); | ||
736 | |||
737 | /* early_init sets rcvhdrentsize and rcvhdrsize, so this must be | ||
738 | * done after early_init */ | ||
739 | dd->ipath_hdrqlast = | ||
740 | dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1); | ||
741 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize, | ||
742 | dd->ipath_rcvhdrentsize); | ||
743 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, | ||
744 | dd->ipath_rcvhdrsize); | ||
745 | |||
746 | if (!reinit) { | ||
747 | ret = init_pioavailregs(dd); | ||
748 | init_shadow_tids(dd); | ||
749 | if (ret) | ||
750 | goto done; | ||
751 | } | ||
752 | |||
753 | (void)ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr, | ||
754 | dd->ipath_pioavailregs_phys); | ||
755 | /* | ||
756 | * this is to detect s/w errors, which the h/w works around by | ||
757 | * ignoring the low 6 bits of address, if it wasn't aligned. | ||
758 | */ | ||
759 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr); | ||
760 | if (val != dd->ipath_pioavailregs_phys) { | ||
761 | ipath_dev_err(dd, "Catastrophic software error, " | ||
762 | "SendPIOAvailAddr written as %lx, " | ||
763 | "read back as %llx\n", | ||
764 | (unsigned long) dd->ipath_pioavailregs_phys, | ||
765 | (unsigned long long) val); | ||
766 | ret = -EINVAL; | ||
767 | goto done; | ||
768 | } | ||
769 | |||
770 | val = ipath_port0_rcvhdrtail_dma + dd->ipath_unit * 64; | ||
771 | |||
772 | /* verify that the alignment requirement was met */ | ||
773 | ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, | ||
774 | 0, val); | ||
775 | atmp = ipath_read_kreg64_port( | ||
776 | dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 0); | ||
777 | if (val != atmp) { | ||
778 | ipath_dev_err(dd, "Catastrophic software error, " | ||
779 | "RcvHdrTailAddr0 written as %llx, " | ||
780 | "read back as %llx from %x\n", | ||
781 | (unsigned long long) val, | ||
782 | (unsigned long long) atmp, | ||
783 | dd->ipath_kregs->kr_rcvhdrtailaddr); | ||
784 | ret = -EINVAL; | ||
785 | goto done; | ||
786 | } | ||
787 | |||
788 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP); | ||
789 | |||
790 | /* | ||
791 | * make sure we are not in freeze, and PIO send enabled, so | ||
792 | * writes to pbc happen | ||
793 | */ | ||
794 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL); | ||
795 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, | ||
796 | ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); | ||
797 | ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); | ||
798 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
799 | INFINIPATH_S_PIOENABLE); | ||
800 | |||
801 | /* | ||
802 | * before error clears, since we expect serdes pll errors during | ||
803 | * this, the first time after reset | ||
804 | */ | ||
805 | if (bringup_link(dd)) { | ||
806 | dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n"); | ||
807 | ret = -ENETDOWN; | ||
808 | goto done; | ||
809 | } | ||
810 | |||
811 | /* | ||
812 | * clear any "expected" hwerrs from reset and/or initialization | ||
813 | * clear any that aren't enabled (at least this once), and then | ||
814 | * set the enable mask | ||
815 | */ | ||
816 | dd->ipath_f_init_hwerrors(dd); | ||
817 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, | ||
818 | ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); | ||
819 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, | ||
820 | dd->ipath_hwerrmask); | ||
821 | |||
822 | dd->ipath_maskederrs = dd->ipath_ignorederrs; | ||
823 | /* clear all */ | ||
824 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); | ||
825 | /* enable errors that are masked, at least this first time. */ | ||
826 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, | ||
827 | ~dd->ipath_maskederrs); | ||
828 | /* clear any interrups up to this point (ints still not enabled) */ | ||
829 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); | ||
830 | |||
831 | ipath_stats.sps_lid[dd->ipath_unit] = dd->ipath_lid; | ||
832 | |||
833 | /* | ||
834 | * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing | ||
835 | * re-init, the simplest way to handle this is to free | ||
836 | * existing, and re-allocate. | ||
837 | */ | ||
838 | if (reinit) | ||
839 | ipath_free_pddata(dd, 0, 0); | ||
840 | dd->ipath_f_tidtemplate(dd); | ||
841 | ret = ipath_create_rcvhdrq(dd, pd); | ||
842 | if (!ret) | ||
843 | ret = create_port0_egr(dd); | ||
844 | if (ret) | ||
845 | ipath_dev_err(dd, "failed to allocate port 0 (kernel) " | ||
846 | "rcvhdrq and/or egr bufs\n"); | ||
847 | else | ||
848 | enable_chip(dd, pd, reinit); | ||
849 | |||
850 | /* | ||
851 | * cause retrigger of pending interrupts ignored during init, | ||
852 | * even if we had errors | ||
853 | */ | ||
854 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); | ||
855 | |||
856 | if(!dd->ipath_stats_timer_active) { | ||
857 | /* | ||
858 | * first init, or after an admin disable/enable | ||
859 | * set up stats retrieval timer, even if we had errors | ||
860 | * in last portion of setup | ||
861 | */ | ||
862 | init_timer(&dd->ipath_stats_timer); | ||
863 | dd->ipath_stats_timer.function = ipath_get_faststats; | ||
864 | dd->ipath_stats_timer.data = (unsigned long) dd; | ||
865 | /* every 5 seconds; */ | ||
866 | dd->ipath_stats_timer.expires = jiffies + 5 * HZ; | ||
867 | /* takes ~16 seconds to overflow at full IB 4x bandwdith */ | ||
868 | add_timer(&dd->ipath_stats_timer); | ||
869 | dd->ipath_stats_timer_active = 1; | ||
870 | } | ||
871 | |||
872 | done: | ||
873 | if (!ret) { | ||
874 | ipath_get_guid(dd); | ||
875 | *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT; | ||
876 | if (!dd->ipath_f_intrsetup(dd)) { | ||
877 | /* now we can enable all interrupts from the chip */ | ||
878 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, | ||
879 | -1LL); | ||
880 | /* force re-interrupt of any pending interrupts. */ | ||
881 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, | ||
882 | 0ULL); | ||
883 | /* chip is usable; mark it as initialized */ | ||
884 | *dd->ipath_statusp |= IPATH_STATUS_INITTED; | ||
885 | } else | ||
886 | ipath_dev_err(dd, "No interrupts enabled, couldn't " | ||
887 | "setup interrupt address\n"); | ||
888 | |||
889 | if (dd->ipath_cfgports > ipath_stats.sps_nports) | ||
890 | /* | ||
891 | * sps_nports is a global, so, we set it to | ||
892 | * the highest number of ports of any of the | ||
893 | * chips we find; we never decrement it, at | ||
894 | * least for now. Since this might have changed | ||
895 | * over disable/enable or prior to reset, always | ||
896 | * do the check and potentially adjust. | ||
897 | */ | ||
898 | ipath_stats.sps_nports = dd->ipath_cfgports; | ||
899 | } else | ||
900 | ipath_dbg("Failed (%d) to initialize chip\n", ret); | ||
901 | |||
902 | /* if ret is non-zero, we probably should do some cleanup | ||
903 | here... */ | ||
904 | return ret; | ||
905 | } | ||
906 | |||
907 | static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp) | ||
908 | { | ||
909 | struct ipath_devdata *dd; | ||
910 | unsigned long flags; | ||
911 | unsigned short val; | ||
912 | int ret; | ||
913 | |||
914 | ret = ipath_parse_ushort(str, &val); | ||
915 | |||
916 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
917 | |||
918 | if (ret < 0) | ||
919 | goto bail; | ||
920 | |||
921 | if (val == 0) { | ||
922 | ret = -EINVAL; | ||
923 | goto bail; | ||
924 | } | ||
925 | |||
926 | list_for_each_entry(dd, &ipath_dev_list, ipath_list) { | ||
927 | if (dd->ipath_kregbase) | ||
928 | continue; | ||
929 | if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - | ||
930 | (dd->ipath_cfgports * | ||
931 | IPATH_MIN_USER_PORT_BUFCNT))) | ||
932 | { | ||
933 | ipath_dev_err( | ||
934 | dd, | ||
935 | "Allocating %d PIO bufs for kernel leaves " | ||
936 | "too few for %d user ports (%d each)\n", | ||
937 | val, dd->ipath_cfgports - 1, | ||
938 | IPATH_MIN_USER_PORT_BUFCNT); | ||
939 | ret = -EINVAL; | ||
940 | goto bail; | ||
941 | } | ||
942 | dd->ipath_lastport_piobuf = | ||
943 | dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val; | ||
944 | } | ||
945 | |||
946 | ret = 0; | ||
947 | bail: | ||
948 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
949 | |||
950 | return ret; | ||
951 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c new file mode 100644 index 000000000000..0bcb428041f3 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_intr.c | |||
@@ -0,0 +1,841 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/pci.h> | ||
34 | |||
35 | #include "ipath_kernel.h" | ||
36 | #include "ips_common.h" | ||
37 | #include "ipath_layer.h" | ||
38 | |||
39 | #define E_SUM_PKTERRS \ | ||
40 | (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ | ||
41 | INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \ | ||
42 | INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \ | ||
43 | INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \ | ||
44 | INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \ | ||
45 | INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP) | ||
46 | |||
47 | #define E_SUM_ERRS \ | ||
48 | (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \ | ||
49 | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ | ||
50 | INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \ | ||
51 | INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ | ||
52 | INFINIPATH_E_INVALIDADDR) | ||
53 | |||
54 | static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) | ||
55 | { | ||
56 | unsigned long sbuf[4]; | ||
57 | u64 ignore_this_time = 0; | ||
58 | u32 piobcnt; | ||
59 | |||
60 | /* if possible that sendbuffererror could be valid */ | ||
61 | piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; | ||
62 | /* read these before writing errorclear */ | ||
63 | sbuf[0] = ipath_read_kreg64( | ||
64 | dd, dd->ipath_kregs->kr_sendbuffererror); | ||
65 | sbuf[1] = ipath_read_kreg64( | ||
66 | dd, dd->ipath_kregs->kr_sendbuffererror + 1); | ||
67 | if (piobcnt > 128) { | ||
68 | sbuf[2] = ipath_read_kreg64( | ||
69 | dd, dd->ipath_kregs->kr_sendbuffererror + 2); | ||
70 | sbuf[3] = ipath_read_kreg64( | ||
71 | dd, dd->ipath_kregs->kr_sendbuffererror + 3); | ||
72 | } | ||
73 | |||
74 | if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { | ||
75 | int i; | ||
76 | |||
77 | ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]); | ||
78 | if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) | ||
79 | printk("%lx %lx ", sbuf[2], sbuf[3]); | ||
80 | for (i = 0; i < piobcnt; i++) { | ||
81 | if (test_bit(i, sbuf)) { | ||
82 | u32 __iomem *piobuf; | ||
83 | if (i < dd->ipath_piobcnt2k) | ||
84 | piobuf = (u32 __iomem *) | ||
85 | (dd->ipath_pio2kbase + | ||
86 | i * dd->ipath_palign); | ||
87 | else | ||
88 | piobuf = (u32 __iomem *) | ||
89 | (dd->ipath_pio4kbase + | ||
90 | (i - dd->ipath_piobcnt2k) * | ||
91 | dd->ipath_4kalign); | ||
92 | |||
93 | ipath_cdbg(PKT, | ||
94 | "PIObuf[%u] @%p pbc is %x; ", | ||
95 | i, piobuf, readl(piobuf)); | ||
96 | |||
97 | ipath_disarm_piobufs(dd, i, 1); | ||
98 | } | ||
99 | } | ||
100 | if (ipath_debug & __IPATH_PKTDBG) | ||
101 | printk("\n"); | ||
102 | } | ||
103 | if ((errs & (INFINIPATH_E_SDROPPEDDATAPKT | | ||
104 | INFINIPATH_E_SDROPPEDSMPPKT | | ||
105 | INFINIPATH_E_SMINPKTLEN)) && | ||
106 | !(dd->ipath_flags & IPATH_LINKACTIVE)) { | ||
107 | /* | ||
108 | * This can happen when SMA is trying to bring the link | ||
109 | * up, but the IB link changes state at the "wrong" time. | ||
110 | * The IB logic then complains that the packet isn't | ||
111 | * valid. We don't want to confuse people, so we just | ||
112 | * don't print them, except at debug | ||
113 | */ | ||
114 | ipath_dbg("Ignoring pktsend errors %llx, because not " | ||
115 | "yet active\n", (unsigned long long) errs); | ||
116 | ignore_this_time = INFINIPATH_E_SDROPPEDDATAPKT | | ||
117 | INFINIPATH_E_SDROPPEDSMPPKT | | ||
118 | INFINIPATH_E_SMINPKTLEN; | ||
119 | } | ||
120 | |||
121 | return ignore_this_time; | ||
122 | } | ||
123 | |||
124 | /* return the strings for the most common link states */ | ||
125 | static char *ib_linkstate(u32 linkstate) | ||
126 | { | ||
127 | char *ret; | ||
128 | |||
129 | switch (linkstate) { | ||
130 | case IPATH_IBSTATE_INIT: | ||
131 | ret = "Init"; | ||
132 | break; | ||
133 | case IPATH_IBSTATE_ARM: | ||
134 | ret = "Arm"; | ||
135 | break; | ||
136 | case IPATH_IBSTATE_ACTIVE: | ||
137 | ret = "Active"; | ||
138 | break; | ||
139 | default: | ||
140 | ret = "Down"; | ||
141 | } | ||
142 | |||
143 | return ret; | ||
144 | } | ||
145 | |||
146 | static void handle_e_ibstatuschanged(struct ipath_devdata *dd, | ||
147 | ipath_err_t errs, int noprint) | ||
148 | { | ||
149 | u64 val; | ||
150 | u32 ltstate, lstate; | ||
151 | |||
152 | /* | ||
153 | * even if diags are enabled, we want to notice LINKINIT, etc. | ||
154 | * We just don't want to change the LED state, or | ||
155 | * dd->ipath_kregs->kr_ibcctrl | ||
156 | */ | ||
157 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); | ||
158 | lstate = val & IPATH_IBSTATE_MASK; | ||
159 | if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM || | ||
160 | lstate == IPATH_IBSTATE_ACTIVE) { | ||
161 | /* | ||
162 | * only print at SMA if there is a change, debug if not | ||
163 | * (sometimes we want to know that, usually not). | ||
164 | */ | ||
165 | if (lstate == ((unsigned) dd->ipath_lastibcstat | ||
166 | & IPATH_IBSTATE_MASK)) { | ||
167 | ipath_dbg("Status change intr but no change (%s)\n", | ||
168 | ib_linkstate(lstate)); | ||
169 | } | ||
170 | else | ||
171 | ipath_cdbg(SMA, "Unit %u link state %s, last " | ||
172 | "was %s\n", dd->ipath_unit, | ||
173 | ib_linkstate(lstate), | ||
174 | ib_linkstate((unsigned) | ||
175 | dd->ipath_lastibcstat | ||
176 | & IPATH_IBSTATE_MASK)); | ||
177 | } | ||
178 | else { | ||
179 | lstate = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK; | ||
180 | if (lstate == IPATH_IBSTATE_INIT || | ||
181 | lstate == IPATH_IBSTATE_ARM || | ||
182 | lstate == IPATH_IBSTATE_ACTIVE) | ||
183 | ipath_cdbg(SMA, "Unit %u link state down" | ||
184 | " (state 0x%x), from %s\n", | ||
185 | dd->ipath_unit, | ||
186 | (u32)val & IPATH_IBSTATE_MASK, | ||
187 | ib_linkstate(lstate)); | ||
188 | else | ||
189 | ipath_cdbg(VERBOSE, "Unit %u link state changed " | ||
190 | "to 0x%x from down (%x)\n", | ||
191 | dd->ipath_unit, (u32) val, lstate); | ||
192 | } | ||
193 | ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & | ||
194 | INFINIPATH_IBCS_LINKTRAININGSTATE_MASK; | ||
195 | lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) & | ||
196 | INFINIPATH_IBCS_LINKSTATE_MASK; | ||
197 | |||
198 | if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || | ||
199 | ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { | ||
200 | u32 last_ltstate; | ||
201 | |||
202 | /* | ||
203 | * Ignore cycling back and forth from Polling.Active | ||
204 | * to Polling.Quiet while waiting for the other end of | ||
205 | * the link to come up. We will cycle back and forth | ||
206 | * between them if no cable is plugged in, | ||
207 | * the other device is powered off or disabled, etc. | ||
208 | */ | ||
209 | last_ltstate = (dd->ipath_lastibcstat >> | ||
210 | INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | ||
211 | & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK; | ||
212 | if (last_ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE | ||
213 | || last_ltstate == | ||
214 | INFINIPATH_IBCS_LT_STATE_POLLQUIET) { | ||
215 | if (dd->ipath_ibpollcnt > 40) { | ||
216 | dd->ipath_flags |= IPATH_NOCABLE; | ||
217 | *dd->ipath_statusp |= | ||
218 | IPATH_STATUS_IB_NOCABLE; | ||
219 | } else | ||
220 | dd->ipath_ibpollcnt++; | ||
221 | goto skip_ibchange; | ||
222 | } | ||
223 | } | ||
224 | dd->ipath_ibpollcnt = 0; /* some state other than 2 or 3 */ | ||
225 | ipath_stats.sps_iblink++; | ||
226 | if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) { | ||
227 | dd->ipath_flags |= IPATH_LINKDOWN; | ||
228 | dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT | ||
229 | | IPATH_LINKACTIVE | | ||
230 | IPATH_LINKARMED); | ||
231 | *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; | ||
232 | if (!noprint) { | ||
233 | if (((dd->ipath_lastibcstat >> | ||
234 | INFINIPATH_IBCS_LINKSTATE_SHIFT) & | ||
235 | INFINIPATH_IBCS_LINKSTATE_MASK) | ||
236 | == INFINIPATH_IBCS_L_STATE_ACTIVE) | ||
237 | /* if from up to down be more vocal */ | ||
238 | ipath_cdbg(SMA, | ||
239 | "Unit %u link now down (%s)\n", | ||
240 | dd->ipath_unit, | ||
241 | ipath_ibcstatus_str[ltstate]); | ||
242 | else | ||
243 | ipath_cdbg(VERBOSE, "Unit %u link is " | ||
244 | "down (%s)\n", dd->ipath_unit, | ||
245 | ipath_ibcstatus_str[ltstate]); | ||
246 | } | ||
247 | |||
248 | dd->ipath_f_setextled(dd, lstate, ltstate); | ||
249 | } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ACTIVE) { | ||
250 | dd->ipath_flags |= IPATH_LINKACTIVE; | ||
251 | dd->ipath_flags &= | ||
252 | ~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN | | ||
253 | IPATH_LINKARMED | IPATH_NOCABLE); | ||
254 | *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE; | ||
255 | *dd->ipath_statusp |= | ||
256 | IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; | ||
257 | dd->ipath_f_setextled(dd, lstate, ltstate); | ||
258 | |||
259 | __ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP); | ||
260 | } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) { | ||
261 | /* | ||
262 | * set INIT and DOWN. Down is checked by most of the other | ||
263 | * code, but INIT is useful to know in a few places. | ||
264 | */ | ||
265 | dd->ipath_flags |= IPATH_LINKINIT | IPATH_LINKDOWN; | ||
266 | dd->ipath_flags &= | ||
267 | ~(IPATH_LINKUNK | IPATH_LINKACTIVE | IPATH_LINKARMED | ||
268 | | IPATH_NOCABLE); | ||
269 | *dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE | ||
270 | | IPATH_STATUS_IB_READY); | ||
271 | dd->ipath_f_setextled(dd, lstate, ltstate); | ||
272 | } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) { | ||
273 | dd->ipath_flags |= IPATH_LINKARMED; | ||
274 | dd->ipath_flags &= | ||
275 | ~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT | | ||
276 | IPATH_LINKACTIVE | IPATH_NOCABLE); | ||
277 | *dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE | ||
278 | | IPATH_STATUS_IB_READY); | ||
279 | dd->ipath_f_setextled(dd, lstate, ltstate); | ||
280 | } else { | ||
281 | if (!noprint) | ||
282 | ipath_dbg("IBstatuschange unit %u: %s (%x)\n", | ||
283 | dd->ipath_unit, | ||
284 | ipath_ibcstatus_str[ltstate], ltstate); | ||
285 | } | ||
286 | skip_ibchange: | ||
287 | dd->ipath_lastibcstat = val; | ||
288 | } | ||
289 | |||
290 | static void handle_supp_msgs(struct ipath_devdata *dd, | ||
291 | unsigned supp_msgs, char msg[512]) | ||
292 | { | ||
293 | /* | ||
294 | * Print the message unless it's ibc status change only, which | ||
295 | * happens so often we never want to count it. | ||
296 | */ | ||
297 | if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { | ||
298 | ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror & | ||
299 | ~INFINIPATH_E_IBSTATUSCHANGED); | ||
300 | if (dd->ipath_lasterror & | ||
301 | ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) | ||
302 | ipath_dev_err(dd, "Suppressed %u messages for " | ||
303 | "fast-repeating errors (%s) (%llx)\n", | ||
304 | supp_msgs, msg, | ||
305 | (unsigned long long) | ||
306 | dd->ipath_lasterror); | ||
307 | else { | ||
308 | /* | ||
309 | * rcvegrfull and rcvhdrqfull are "normal", for some | ||
310 | * types of processes (mostly benchmarks) that send | ||
311 | * huge numbers of messages, while not processing | ||
312 | * them. So only complain about these at debug | ||
313 | * level. | ||
314 | */ | ||
315 | ipath_dbg("Suppressed %u messages for %s\n", | ||
316 | supp_msgs, msg); | ||
317 | } | ||
318 | } | ||
319 | } | ||
320 | |||
321 | static unsigned handle_frequent_errors(struct ipath_devdata *dd, | ||
322 | ipath_err_t errs, char msg[512], | ||
323 | int *noprint) | ||
324 | { | ||
325 | unsigned long nc; | ||
326 | static unsigned long nextmsg_time; | ||
327 | static unsigned nmsgs, supp_msgs; | ||
328 | |||
329 | /* | ||
330 | * Throttle back "fast" messages to no more than 10 per 5 seconds. | ||
331 | * This isn't perfect, but it's a reasonable heuristic. If we get | ||
332 | * more than 10, give a 6x longer delay. | ||
333 | */ | ||
334 | nc = jiffies; | ||
335 | if (nmsgs > 10) { | ||
336 | if (time_before(nc, nextmsg_time)) { | ||
337 | *noprint = 1; | ||
338 | if (!supp_msgs++) | ||
339 | nextmsg_time = nc + HZ * 3; | ||
340 | } | ||
341 | else if (supp_msgs) { | ||
342 | handle_supp_msgs(dd, supp_msgs, msg); | ||
343 | supp_msgs = 0; | ||
344 | nmsgs = 0; | ||
345 | } | ||
346 | } | ||
347 | else if (!nmsgs++ || time_after(nc, nextmsg_time)) | ||
348 | nextmsg_time = nc + HZ / 2; | ||
349 | |||
350 | return supp_msgs; | ||
351 | } | ||
352 | |||
353 | static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs) | ||
354 | { | ||
355 | char msg[512]; | ||
356 | u64 ignore_this_time = 0; | ||
357 | int i; | ||
358 | int chkerrpkts = 0, noprint = 0; | ||
359 | unsigned supp_msgs; | ||
360 | |||
361 | supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); | ||
362 | |||
363 | /* | ||
364 | * don't report errors that are masked (includes those always | ||
365 | * ignored) | ||
366 | */ | ||
367 | errs &= ~dd->ipath_maskederrs; | ||
368 | |||
369 | /* do these first, they are most important */ | ||
370 | if (errs & INFINIPATH_E_HARDWARE) { | ||
371 | /* reuse same msg buf */ | ||
372 | dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); | ||
373 | } | ||
374 | |||
375 | if (!noprint && (errs & ~infinipath_e_bitsextant)) | ||
376 | ipath_dev_err(dd, "error interrupt with unknown errors " | ||
377 | "%llx set\n", (unsigned long long) | ||
378 | (errs & ~infinipath_e_bitsextant)); | ||
379 | |||
380 | if (errs & E_SUM_ERRS) | ||
381 | ignore_this_time = handle_e_sum_errs(dd, errs); | ||
382 | |||
383 | if (supp_msgs == 250000) { | ||
384 | /* | ||
385 | * It's not entirely reasonable assuming that the errors set | ||
386 | * in the last clear period are all responsible for the | ||
387 | * problem, but the alternative is to assume it's the only | ||
388 | * ones on this particular interrupt, which also isn't great | ||
389 | */ | ||
390 | dd->ipath_maskederrs |= dd->ipath_lasterror | errs; | ||
391 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, | ||
392 | ~dd->ipath_maskederrs); | ||
393 | ipath_decode_err(msg, sizeof msg, | ||
394 | (dd->ipath_maskederrs & ~dd-> | ||
395 | ipath_ignorederrs)); | ||
396 | |||
397 | if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & | ||
398 | ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) | ||
399 | ipath_dev_err(dd, "Disabling error(s) %llx because " | ||
400 | "occuring too frequently (%s)\n", | ||
401 | (unsigned long long) | ||
402 | (dd->ipath_maskederrs & | ||
403 | ~dd->ipath_ignorederrs), msg); | ||
404 | else { | ||
405 | /* | ||
406 | * rcvegrfull and rcvhdrqfull are "normal", | ||
407 | * for some types of processes (mostly benchmarks) | ||
408 | * that send huge numbers of messages, while not | ||
409 | * processing them. So only complain about | ||
410 | * these at debug level. | ||
411 | */ | ||
412 | ipath_dbg("Disabling frequent queue full errors " | ||
413 | "(%s)\n", msg); | ||
414 | } | ||
415 | |||
416 | /* | ||
417 | * Re-enable the masked errors after around 3 minutes. in | ||
418 | * ipath_get_faststats(). If we have a series of fast | ||
419 | * repeating but different errors, the interval will keep | ||
420 | * stretching out, but that's OK, as that's pretty | ||
421 | * catastrophic. | ||
422 | */ | ||
423 | dd->ipath_unmasktime = jiffies + HZ * 180; | ||
424 | } | ||
425 | |||
426 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs); | ||
427 | if (ignore_this_time) | ||
428 | errs &= ~ignore_this_time; | ||
429 | if (errs & ~dd->ipath_lasterror) { | ||
430 | errs &= ~dd->ipath_lasterror; | ||
431 | /* never suppress duplicate hwerrors or ibstatuschange */ | ||
432 | dd->ipath_lasterror |= errs & | ||
433 | ~(INFINIPATH_E_HARDWARE | | ||
434 | INFINIPATH_E_IBSTATUSCHANGED); | ||
435 | } | ||
436 | if (!errs) | ||
437 | return; | ||
438 | |||
439 | if (!noprint) | ||
440 | /* | ||
441 | * the ones we mask off are handled specially below or above | ||
442 | */ | ||
443 | ipath_decode_err(msg, sizeof msg, | ||
444 | errs & ~(INFINIPATH_E_IBSTATUSCHANGED | | ||
445 | INFINIPATH_E_RRCVEGRFULL | | ||
446 | INFINIPATH_E_RRCVHDRFULL | | ||
447 | INFINIPATH_E_HARDWARE)); | ||
448 | else | ||
449 | /* so we don't need if (!noprint) at strlcat's below */ | ||
450 | *msg = 0; | ||
451 | |||
452 | if (errs & E_SUM_PKTERRS) { | ||
453 | ipath_stats.sps_pkterrs++; | ||
454 | chkerrpkts = 1; | ||
455 | } | ||
456 | if (errs & E_SUM_ERRS) | ||
457 | ipath_stats.sps_errs++; | ||
458 | |||
459 | if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) { | ||
460 | ipath_stats.sps_crcerrs++; | ||
461 | chkerrpkts = 1; | ||
462 | } | ||
463 | |||
464 | /* | ||
465 | * We don't want to print these two as they happen, or we can make | ||
466 | * the situation even worse, because it takes so long to print | ||
467 | * messages to serial consoles. Kernel ports get printed from | ||
468 | * fast_stats, no more than every 5 seconds, user ports get printed | ||
469 | * on close | ||
470 | */ | ||
471 | if (errs & INFINIPATH_E_RRCVHDRFULL) { | ||
472 | int any; | ||
473 | u32 hd, tl; | ||
474 | ipath_stats.sps_hdrqfull++; | ||
475 | for (any = i = 0; i < dd->ipath_cfgports; i++) { | ||
476 | struct ipath_portdata *pd = dd->ipath_pd[i]; | ||
477 | if (i == 0) { | ||
478 | hd = dd->ipath_port0head; | ||
479 | tl = (u32) le64_to_cpu( | ||
480 | *dd->ipath_hdrqtailptr); | ||
481 | } else if (pd && pd->port_cnt && | ||
482 | pd->port_rcvhdrtail_kvaddr) { | ||
483 | /* | ||
484 | * don't report same point multiple times, | ||
485 | * except kernel | ||
486 | */ | ||
487 | tl = (u32) * pd->port_rcvhdrtail_kvaddr; | ||
488 | if (tl == dd->ipath_lastrcvhdrqtails[i]) | ||
489 | continue; | ||
490 | hd = ipath_read_ureg32(dd, ur_rcvhdrhead, | ||
491 | i); | ||
492 | } else | ||
493 | continue; | ||
494 | if (hd == (tl + 1) || | ||
495 | (!hd && tl == dd->ipath_hdrqlast)) { | ||
496 | dd->ipath_lastrcvhdrqtails[i] = tl; | ||
497 | pd->port_hdrqfull++; | ||
498 | if (i == 0) | ||
499 | chkerrpkts = 1; | ||
500 | } | ||
501 | } | ||
502 | } | ||
503 | if (errs & INFINIPATH_E_RRCVEGRFULL) { | ||
504 | /* | ||
505 | * since this is of less importance and not likely to | ||
506 | * happen without also getting hdrfull, only count | ||
507 | * occurrences; don't check each port (or even the kernel | ||
508 | * vs user) | ||
509 | */ | ||
510 | ipath_stats.sps_etidfull++; | ||
511 | if (dd->ipath_port0head != | ||
512 | (u32) le64_to_cpu(*dd->ipath_hdrqtailptr)) | ||
513 | chkerrpkts = 1; | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * do this before IBSTATUSCHANGED, in case both bits set in a single | ||
518 | * interrupt; we want the STATUSCHANGE to "win", so we do our | ||
519 | * internal copy of state machine correctly | ||
520 | */ | ||
521 | if (errs & INFINIPATH_E_RIBLOSTLINK) { | ||
522 | /* | ||
523 | * force through block below | ||
524 | */ | ||
525 | errs |= INFINIPATH_E_IBSTATUSCHANGED; | ||
526 | ipath_stats.sps_iblink++; | ||
527 | dd->ipath_flags |= IPATH_LINKDOWN; | ||
528 | dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT | ||
529 | | IPATH_LINKARMED | IPATH_LINKACTIVE); | ||
530 | *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; | ||
531 | if (!noprint) { | ||
532 | u64 st = ipath_read_kreg64( | ||
533 | dd, dd->ipath_kregs->kr_ibcstatus); | ||
534 | |||
535 | ipath_dbg("Lost link, link now down (%s)\n", | ||
536 | ipath_ibcstatus_str[st & 0xf]); | ||
537 | } | ||
538 | } | ||
539 | if (errs & INFINIPATH_E_IBSTATUSCHANGED) | ||
540 | handle_e_ibstatuschanged(dd, errs, noprint); | ||
541 | |||
542 | if (errs & INFINIPATH_E_RESET) { | ||
543 | if (!noprint) | ||
544 | ipath_dev_err(dd, "Got reset, requires re-init " | ||
545 | "(unload and reload driver)\n"); | ||
546 | dd->ipath_flags &= ~IPATH_INITTED; /* needs re-init */ | ||
547 | /* mark as having had error */ | ||
548 | *dd->ipath_statusp |= IPATH_STATUS_HWERROR; | ||
549 | *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; | ||
550 | } | ||
551 | |||
552 | if (!noprint && *msg) | ||
553 | ipath_dev_err(dd, "%s error\n", msg); | ||
554 | if (dd->ipath_sma_state_wanted & dd->ipath_flags) { | ||
555 | ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, " | ||
556 | "waking\n", dd->ipath_sma_state_wanted, | ||
557 | dd->ipath_flags); | ||
558 | wake_up_interruptible(&ipath_sma_state_wait); | ||
559 | } | ||
560 | |||
561 | if (chkerrpkts) | ||
562 | /* process possible error packets in hdrq */ | ||
563 | ipath_kreceive(dd); | ||
564 | } | ||
565 | |||
566 | /* this is separate to allow for better optimization of ipath_intr() */ | ||
567 | |||
568 | static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp) | ||
569 | { | ||
570 | /* | ||
571 | * sometimes happen during driver init and unload, don't want | ||
572 | * to process any interrupts at that point | ||
573 | */ | ||
574 | |||
575 | /* this is just a bandaid, not a fix, if something goes badly | ||
576 | * wrong */ | ||
577 | if (++*unexpectp > 100) { | ||
578 | if (++*unexpectp > 105) { | ||
579 | /* | ||
580 | * ok, we must be taking somebody else's interrupts, | ||
581 | * due to a messed up mptable and/or PIRQ table, so | ||
582 | * unregister the interrupt. We've seen this during | ||
583 | * linuxbios development work, and it may happen in | ||
584 | * the future again. | ||
585 | */ | ||
586 | if (dd->pcidev && dd->pcidev->irq) { | ||
587 | ipath_dev_err(dd, "Now %u unexpected " | ||
588 | "interrupts, unregistering " | ||
589 | "interrupt handler\n", | ||
590 | *unexpectp); | ||
591 | ipath_dbg("free_irq of irq %x\n", | ||
592 | dd->pcidev->irq); | ||
593 | free_irq(dd->pcidev->irq, dd); | ||
594 | } | ||
595 | } | ||
596 | if (ipath_read_kreg32(dd, dd->ipath_kregs->kr_intmask)) { | ||
597 | ipath_dev_err(dd, "%u unexpected interrupts, " | ||
598 | "disabling interrupts completely\n", | ||
599 | *unexpectp); | ||
600 | /* | ||
601 | * disable all interrupts, something is very wrong | ||
602 | */ | ||
603 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, | ||
604 | 0ULL); | ||
605 | } | ||
606 | } else if (*unexpectp > 1) | ||
607 | ipath_dbg("Interrupt when not ready, should not happen, " | ||
608 | "ignoring\n"); | ||
609 | } | ||
610 | |||
611 | static void ipath_bad_regread(struct ipath_devdata *dd) | ||
612 | { | ||
613 | static int allbits; | ||
614 | |||
615 | /* separate routine, for better optimization of ipath_intr() */ | ||
616 | |||
617 | /* | ||
618 | * We print the message and disable interrupts, in hope of | ||
619 | * having a better chance of debugging the problem. | ||
620 | */ | ||
621 | ipath_dev_err(dd, | ||
622 | "Read of interrupt status failed (all bits set)\n"); | ||
623 | if (allbits++) { | ||
624 | /* disable all interrupts, something is very wrong */ | ||
625 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); | ||
626 | if (allbits == 2) { | ||
627 | ipath_dev_err(dd, "Still bad interrupt status, " | ||
628 | "unregistering interrupt\n"); | ||
629 | free_irq(dd->pcidev->irq, dd); | ||
630 | } else if (allbits > 2) { | ||
631 | if ((allbits % 10000) == 0) | ||
632 | printk("."); | ||
633 | } else | ||
634 | ipath_dev_err(dd, "Disabling interrupts, " | ||
635 | "multiple errors\n"); | ||
636 | } | ||
637 | } | ||
638 | |||
639 | static void handle_port_pioavail(struct ipath_devdata *dd) | ||
640 | { | ||
641 | u32 i; | ||
642 | /* | ||
643 | * start from port 1, since for now port 0 is never using | ||
644 | * wait_event for PIO | ||
645 | */ | ||
646 | for (i = 1; dd->ipath_portpiowait && i < dd->ipath_cfgports; i++) { | ||
647 | struct ipath_portdata *pd = dd->ipath_pd[i]; | ||
648 | |||
649 | if (pd && pd->port_cnt && | ||
650 | dd->ipath_portpiowait & (1U << i)) { | ||
651 | clear_bit(i, &dd->ipath_portpiowait); | ||
652 | if (test_bit(IPATH_PORT_WAITING_PIO, | ||
653 | &pd->port_flag)) { | ||
654 | clear_bit(IPATH_PORT_WAITING_PIO, | ||
655 | &pd->port_flag); | ||
656 | wake_up_interruptible(&pd->port_wait); | ||
657 | } | ||
658 | } | ||
659 | } | ||
660 | } | ||
661 | |||
662 | static void handle_layer_pioavail(struct ipath_devdata *dd) | ||
663 | { | ||
664 | int ret; | ||
665 | |||
666 | ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE); | ||
667 | if (ret > 0) | ||
668 | goto clear; | ||
669 | |||
670 | ret = __ipath_verbs_piobufavail(dd); | ||
671 | if (ret > 0) | ||
672 | goto clear; | ||
673 | |||
674 | return; | ||
675 | clear: | ||
676 | set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); | ||
677 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
678 | dd->ipath_sendctrl); | ||
679 | } | ||
680 | |||
681 | static void handle_rcv(struct ipath_devdata *dd, u32 istat) | ||
682 | { | ||
683 | u64 portr; | ||
684 | int i; | ||
685 | int rcvdint = 0; | ||
686 | |||
687 | portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) & | ||
688 | infinipath_i_rcvavail_mask) | ||
689 | | ((istat >> INFINIPATH_I_RCVURG_SHIFT) & | ||
690 | infinipath_i_rcvurg_mask); | ||
691 | for (i = 0; i < dd->ipath_cfgports; i++) { | ||
692 | struct ipath_portdata *pd = dd->ipath_pd[i]; | ||
693 | if (portr & (1 << i) && pd && | ||
694 | pd->port_cnt) { | ||
695 | if (i == 0) | ||
696 | ipath_kreceive(dd); | ||
697 | else if (test_bit(IPATH_PORT_WAITING_RCV, | ||
698 | &pd->port_flag)) { | ||
699 | int rcbit; | ||
700 | clear_bit(IPATH_PORT_WAITING_RCV, | ||
701 | &pd->port_flag); | ||
702 | rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT; | ||
703 | clear_bit(1UL << rcbit, &dd->ipath_rcvctrl); | ||
704 | wake_up_interruptible(&pd->port_wait); | ||
705 | rcvdint = 1; | ||
706 | } | ||
707 | } | ||
708 | } | ||
709 | if (rcvdint) { | ||
710 | /* only want to take one interrupt, so turn off the rcv | ||
711 | * interrupt for all the ports that we did the wakeup on | ||
712 | * (but never for kernel port) | ||
713 | */ | ||
714 | ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, | ||
715 | dd->ipath_rcvctrl); | ||
716 | } | ||
717 | } | ||
718 | |||
719 | irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs) | ||
720 | { | ||
721 | struct ipath_devdata *dd = data; | ||
722 | u32 istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus); | ||
723 | ipath_err_t estat = 0; | ||
724 | static unsigned unexpected = 0; | ||
725 | irqreturn_t ret; | ||
726 | |||
727 | if (unlikely(!istat)) { | ||
728 | ipath_stats.sps_nullintr++; | ||
729 | ret = IRQ_NONE; /* not our interrupt, or already handled */ | ||
730 | goto bail; | ||
731 | } | ||
732 | if (unlikely(istat == -1)) { | ||
733 | ipath_bad_regread(dd); | ||
734 | /* don't know if it was our interrupt or not */ | ||
735 | ret = IRQ_NONE; | ||
736 | goto bail; | ||
737 | } | ||
738 | |||
739 | ipath_stats.sps_ints++; | ||
740 | |||
741 | /* | ||
742 | * this needs to be flags&initted, not statusp, so we keep | ||
743 | * taking interrupts even after link goes down, etc. | ||
744 | * Also, we *must* clear the interrupt at some point, or we won't | ||
745 | * take it again, which can be real bad for errors, etc... | ||
746 | */ | ||
747 | |||
748 | if (!(dd->ipath_flags & IPATH_INITTED)) { | ||
749 | ipath_bad_intr(dd, &unexpected); | ||
750 | ret = IRQ_NONE; | ||
751 | goto bail; | ||
752 | } | ||
753 | if (unexpected) | ||
754 | unexpected = 0; | ||
755 | |||
756 | ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat); | ||
757 | |||
758 | if (istat & ~infinipath_i_bitsextant) | ||
759 | ipath_dev_err(dd, | ||
760 | "interrupt with unknown interrupts %x set\n", | ||
761 | istat & (u32) ~ infinipath_i_bitsextant); | ||
762 | |||
763 | if (istat & INFINIPATH_I_ERROR) { | ||
764 | ipath_stats.sps_errints++; | ||
765 | estat = ipath_read_kreg64(dd, | ||
766 | dd->ipath_kregs->kr_errorstatus); | ||
767 | if (!estat) | ||
768 | dev_info(&dd->pcidev->dev, "error interrupt (%x), " | ||
769 | "but no error bits set!\n", istat); | ||
770 | else if (estat == -1LL) | ||
771 | /* | ||
772 | * should we try clearing all, or hope next read | ||
773 | * works? | ||
774 | */ | ||
775 | ipath_dev_err(dd, "Read of error status failed " | ||
776 | "(all bits set); ignoring\n"); | ||
777 | else | ||
778 | handle_errors(dd, estat); | ||
779 | } | ||
780 | |||
781 | if (istat & INFINIPATH_I_GPIO) { | ||
782 | if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) { | ||
783 | u32 gpiostatus; | ||
784 | gpiostatus = ipath_read_kreg32( | ||
785 | dd, dd->ipath_kregs->kr_gpio_status); | ||
786 | ipath_dbg("Unexpected GPIO interrupt bits %x\n", | ||
787 | gpiostatus); | ||
788 | ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, | ||
789 | gpiostatus); | ||
790 | } | ||
791 | else { | ||
792 | /* Clear GPIO status bit 2 */ | ||
793 | ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, | ||
794 | (u64) (1 << 2)); | ||
795 | |||
796 | /* | ||
797 | * Packets are available in the port 0 rcv queue. | ||
798 | * Eventually this needs to be generalized to check | ||
799 | * IPATH_GPIO_INTR, and the specific GPIO bit, if | ||
800 | * GPIO interrupts are used for anything else. | ||
801 | */ | ||
802 | ipath_kreceive(dd); | ||
803 | } | ||
804 | } | ||
805 | |||
806 | /* | ||
807 | * clear the ones we will deal with on this round | ||
808 | * We clear it early, mostly for receive interrupts, so we | ||
809 | * know the chip will have seen this by the time we process | ||
810 | * the queue, and will re-interrupt if necessary. The processor | ||
811 | * itself won't take the interrupt again until we return. | ||
812 | */ | ||
813 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat); | ||
814 | |||
815 | if (istat & INFINIPATH_I_SPIOBUFAVAIL) { | ||
816 | clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); | ||
817 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
818 | dd->ipath_sendctrl); | ||
819 | |||
820 | if (dd->ipath_portpiowait) | ||
821 | handle_port_pioavail(dd); | ||
822 | |||
823 | handle_layer_pioavail(dd); | ||
824 | } | ||
825 | |||
826 | /* | ||
827 | * we check for both transition from empty to non-empty, and urgent | ||
828 | * packets (those with the interrupt bit set in the header) | ||
829 | */ | ||
830 | |||
831 | if (istat & ((infinipath_i_rcvavail_mask << | ||
832 | INFINIPATH_I_RCVAVAIL_SHIFT) | ||
833 | | (infinipath_i_rcvurg_mask << | ||
834 | INFINIPATH_I_RCVURG_SHIFT))) | ||
835 | handle_rcv(dd, istat); | ||
836 | |||
837 | ret = IRQ_HANDLED; | ||
838 | |||
839 | bail: | ||
840 | return ret; | ||
841 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h new file mode 100644 index 000000000000..0ce5f19c9d62 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h | |||
@@ -0,0 +1,883 @@ | |||
1 | #ifndef _IPATH_KERNEL_H | ||
2 | #define _IPATH_KERNEL_H | ||
3 | /* | ||
4 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
5 | * | ||
6 | * This software is available to you under a choice of one of two | ||
7 | * licenses. You may choose to be licensed under the terms of the GNU | ||
8 | * General Public License (GPL) Version 2, available from the file | ||
9 | * COPYING in the main directory of this source tree, or the | ||
10 | * OpenIB.org BSD license below: | ||
11 | * | ||
12 | * Redistribution and use in source and binary forms, with or | ||
13 | * without modification, are permitted provided that the following | ||
14 | * conditions are met: | ||
15 | * | ||
16 | * - Redistributions of source code must retain the above | ||
17 | * copyright notice, this list of conditions and the following | ||
18 | * disclaimer. | ||
19 | * | ||
20 | * - Redistributions in binary form must reproduce the above | ||
21 | * copyright notice, this list of conditions and the following | ||
22 | * disclaimer in the documentation and/or other materials | ||
23 | * provided with the distribution. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
32 | * SOFTWARE. | ||
33 | */ | ||
34 | |||
35 | /* | ||
36 | * This header file is the base header file for infinipath kernel code | ||
37 | * ipath_user.h serves a similar purpose for user code. | ||
38 | */ | ||
39 | |||
40 | #include <linux/interrupt.h> | ||
41 | #include <asm/io.h> | ||
42 | |||
43 | #include "ipath_common.h" | ||
44 | #include "ipath_debug.h" | ||
45 | #include "ipath_registers.h" | ||
46 | |||
47 | /* only s/w major version of InfiniPath we can handle */ | ||
48 | #define IPATH_CHIP_VERS_MAJ 2U | ||
49 | |||
50 | /* don't care about this except printing */ | ||
51 | #define IPATH_CHIP_VERS_MIN 0U | ||
52 | |||
53 | /* temporary, maybe always */ | ||
54 | extern struct infinipath_stats ipath_stats; | ||
55 | |||
56 | #define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ | ||
57 | |||
58 | struct ipath_portdata { | ||
59 | void **port_rcvegrbuf; | ||
60 | dma_addr_t *port_rcvegrbuf_phys; | ||
61 | /* rcvhdrq base, needs mmap before useful */ | ||
62 | void *port_rcvhdrq; | ||
63 | /* kernel virtual address where hdrqtail is updated */ | ||
64 | u64 *port_rcvhdrtail_kvaddr; | ||
65 | /* page * used for uaddr */ | ||
66 | struct page *port_rcvhdrtail_pagep; | ||
67 | /* | ||
68 | * temp buffer for expected send setup, allocated at open, instead | ||
69 | * of each setup call | ||
70 | */ | ||
71 | void *port_tid_pg_list; | ||
72 | /* when waiting for rcv or pioavail */ | ||
73 | wait_queue_head_t port_wait; | ||
74 | /* | ||
75 | * rcvegr bufs base, physical, must fit | ||
76 | * in 44 bits so 32 bit programs mmap64 44 bit works) | ||
77 | */ | ||
78 | dma_addr_t port_rcvegr_phys; | ||
79 | /* mmap of hdrq, must fit in 44 bits */ | ||
80 | dma_addr_t port_rcvhdrq_phys; | ||
81 | /* | ||
82 | * the actual user address that we ipath_mlock'ed, so we can | ||
83 | * ipath_munlock it at close | ||
84 | */ | ||
85 | unsigned long port_rcvhdrtail_uaddr; | ||
86 | /* | ||
87 | * number of opens on this instance (0 or 1; ignoring forks, dup, | ||
88 | * etc. for now) | ||
89 | */ | ||
90 | int port_cnt; | ||
91 | /* | ||
92 | * how much space to leave at start of eager TID entries for | ||
93 | * protocol use, on each TID | ||
94 | */ | ||
95 | /* instead of calculating it */ | ||
96 | unsigned port_port; | ||
97 | /* chip offset of PIO buffers for this port */ | ||
98 | u32 port_piobufs; | ||
99 | /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ | ||
100 | u32 port_rcvegrbuf_chunks; | ||
101 | /* how many egrbufs per chunk */ | ||
102 | u32 port_rcvegrbufs_perchunk; | ||
103 | /* order for port_rcvegrbuf_pages */ | ||
104 | size_t port_rcvegrbuf_size; | ||
105 | /* rcvhdrq size (for freeing) */ | ||
106 | size_t port_rcvhdrq_size; | ||
107 | /* next expected TID to check when looking for free */ | ||
108 | u32 port_tidcursor; | ||
109 | /* next expected TID to check */ | ||
110 | unsigned long port_flag; | ||
111 | /* WAIT_RCV that timed out, no interrupt */ | ||
112 | u32 port_rcvwait_to; | ||
113 | /* WAIT_PIO that timed out, no interrupt */ | ||
114 | u32 port_piowait_to; | ||
115 | /* WAIT_RCV already happened, no wait */ | ||
116 | u32 port_rcvnowait; | ||
117 | /* WAIT_PIO already happened, no wait */ | ||
118 | u32 port_pionowait; | ||
119 | /* total number of rcvhdrqfull errors */ | ||
120 | u32 port_hdrqfull; | ||
121 | /* pid of process using this port */ | ||
122 | pid_t port_pid; | ||
123 | /* same size as task_struct .comm[] */ | ||
124 | char port_comm[16]; | ||
125 | /* pkeys set by this use of this port */ | ||
126 | u16 port_pkeys[4]; | ||
127 | /* so file ops can get at unit */ | ||
128 | struct ipath_devdata *port_dd; | ||
129 | }; | ||
130 | |||
131 | struct sk_buff; | ||
132 | |||
133 | /* | ||
134 | * control information for layered drivers | ||
135 | */ | ||
136 | struct _ipath_layer { | ||
137 | void *l_arg; | ||
138 | }; | ||
139 | |||
140 | /* Verbs layer interface */ | ||
141 | struct _verbs_layer { | ||
142 | void *l_arg; | ||
143 | struct timer_list l_timer; | ||
144 | }; | ||
145 | |||
146 | struct ipath_devdata { | ||
147 | struct list_head ipath_list; | ||
148 | |||
149 | struct ipath_kregs const *ipath_kregs; | ||
150 | struct ipath_cregs const *ipath_cregs; | ||
151 | |||
152 | /* mem-mapped pointer to base of chip regs */ | ||
153 | u64 __iomem *ipath_kregbase; | ||
154 | /* end of mem-mapped chip space; range checking */ | ||
155 | u64 __iomem *ipath_kregend; | ||
156 | /* physical address of chip for io_remap, etc. */ | ||
157 | unsigned long ipath_physaddr; | ||
158 | /* base of memory alloced for ipath_kregbase, for free */ | ||
159 | u64 *ipath_kregalloc; | ||
160 | /* | ||
161 | * version of kregbase that doesn't have high bits set (for 32 bit | ||
162 | * programs, so mmap64 44 bit works) | ||
163 | */ | ||
164 | u64 __iomem *ipath_kregvirt; | ||
165 | /* | ||
166 | * virtual address where port0 rcvhdrqtail updated for this unit. | ||
167 | * only written to by the chip, not the driver. | ||
168 | */ | ||
169 | volatile __le64 *ipath_hdrqtailptr; | ||
170 | dma_addr_t ipath_dma_addr; | ||
171 | /* ipath_cfgports pointers */ | ||
172 | struct ipath_portdata **ipath_pd; | ||
173 | /* sk_buffs used by port 0 eager receive queue */ | ||
174 | struct sk_buff **ipath_port0_skbs; | ||
175 | /* kvirt address of 1st 2k pio buffer */ | ||
176 | void __iomem *ipath_pio2kbase; | ||
177 | /* kvirt address of 1st 4k pio buffer */ | ||
178 | void __iomem *ipath_pio4kbase; | ||
179 | /* | ||
180 | * points to area where PIOavail registers will be DMA'ed. | ||
181 | * Has to be on a page of it's own, because the page will be | ||
182 | * mapped into user program space. This copy is *ONLY* ever | ||
183 | * written by DMA, not by the driver! Need a copy per device | ||
184 | * when we get to multiple devices | ||
185 | */ | ||
186 | volatile __le64 *ipath_pioavailregs_dma; | ||
187 | /* physical address where updates occur */ | ||
188 | dma_addr_t ipath_pioavailregs_phys; | ||
189 | struct _ipath_layer ipath_layer; | ||
190 | /* setup intr */ | ||
191 | int (*ipath_f_intrsetup)(struct ipath_devdata *); | ||
192 | /* setup on-chip bus config */ | ||
193 | int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *); | ||
194 | /* hard reset chip */ | ||
195 | int (*ipath_f_reset)(struct ipath_devdata *); | ||
196 | int (*ipath_f_get_boardname)(struct ipath_devdata *, char *, | ||
197 | size_t); | ||
198 | void (*ipath_f_init_hwerrors)(struct ipath_devdata *); | ||
199 | void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *, | ||
200 | size_t); | ||
201 | void (*ipath_f_quiet_serdes)(struct ipath_devdata *); | ||
202 | int (*ipath_f_bringup_serdes)(struct ipath_devdata *); | ||
203 | int (*ipath_f_early_init)(struct ipath_devdata *); | ||
204 | void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned); | ||
205 | void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*, | ||
206 | u32, unsigned long); | ||
207 | void (*ipath_f_tidtemplate)(struct ipath_devdata *); | ||
208 | void (*ipath_f_cleanup)(struct ipath_devdata *); | ||
209 | void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64); | ||
210 | /* fill out chip-specific fields */ | ||
211 | int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); | ||
212 | struct _verbs_layer verbs_layer; | ||
213 | /* total dwords sent (summed from counter) */ | ||
214 | u64 ipath_sword; | ||
215 | /* total dwords rcvd (summed from counter) */ | ||
216 | u64 ipath_rword; | ||
217 | /* total packets sent (summed from counter) */ | ||
218 | u64 ipath_spkts; | ||
219 | /* total packets rcvd (summed from counter) */ | ||
220 | u64 ipath_rpkts; | ||
221 | /* ipath_statusp initially points to this. */ | ||
222 | u64 _ipath_status; | ||
223 | /* GUID for this interface, in network order */ | ||
224 | __be64 ipath_guid; | ||
225 | /* | ||
226 | * aggregrate of error bits reported since last cleared, for | ||
227 | * limiting of error reporting | ||
228 | */ | ||
229 | ipath_err_t ipath_lasterror; | ||
230 | /* | ||
231 | * aggregrate of error bits reported since last cleared, for | ||
232 | * limiting of hwerror reporting | ||
233 | */ | ||
234 | ipath_err_t ipath_lasthwerror; | ||
235 | /* | ||
236 | * errors masked because they occur too fast, also includes errors | ||
237 | * that are always ignored (ipath_ignorederrs) | ||
238 | */ | ||
239 | ipath_err_t ipath_maskederrs; | ||
240 | /* time in jiffies at which to re-enable maskederrs */ | ||
241 | unsigned long ipath_unmasktime; | ||
242 | /* | ||
243 | * errors always ignored (masked), at least for a given | ||
244 | * chip/device, because they are wrong or not useful | ||
245 | */ | ||
246 | ipath_err_t ipath_ignorederrs; | ||
247 | /* count of egrfull errors, combined for all ports */ | ||
248 | u64 ipath_last_tidfull; | ||
249 | /* for ipath_qcheck() */ | ||
250 | u64 ipath_lastport0rcv_cnt; | ||
251 | /* template for writing TIDs */ | ||
252 | u64 ipath_tidtemplate; | ||
253 | /* value to write to free TIDs */ | ||
254 | u64 ipath_tidinvalid; | ||
255 | /* PE-800 rcv interrupt setup */ | ||
256 | u64 ipath_rhdrhead_intr_off; | ||
257 | |||
258 | /* size of memory at ipath_kregbase */ | ||
259 | u32 ipath_kregsize; | ||
260 | /* number of registers used for pioavail */ | ||
261 | u32 ipath_pioavregs; | ||
262 | /* IPATH_POLL, etc. */ | ||
263 | u32 ipath_flags; | ||
264 | /* ipath_flags sma is waiting for */ | ||
265 | u32 ipath_sma_state_wanted; | ||
266 | /* last buffer for user use, first buf for kernel use is this | ||
267 | * index. */ | ||
268 | u32 ipath_lastport_piobuf; | ||
269 | /* is a stats timer active */ | ||
270 | u32 ipath_stats_timer_active; | ||
271 | /* dwords sent read from counter */ | ||
272 | u32 ipath_lastsword; | ||
273 | /* dwords received read from counter */ | ||
274 | u32 ipath_lastrword; | ||
275 | /* sent packets read from counter */ | ||
276 | u32 ipath_lastspkts; | ||
277 | /* received packets read from counter */ | ||
278 | u32 ipath_lastrpkts; | ||
279 | /* pio bufs allocated per port */ | ||
280 | u32 ipath_pbufsport; | ||
281 | /* | ||
282 | * number of ports configured as max; zero is set to number chip | ||
283 | * supports, less gives more pio bufs/port, etc. | ||
284 | */ | ||
285 | u32 ipath_cfgports; | ||
286 | /* port0 rcvhdrq head offset */ | ||
287 | u32 ipath_port0head; | ||
288 | /* count of port 0 hdrqfull errors */ | ||
289 | u32 ipath_p0_hdrqfull; | ||
290 | |||
291 | /* | ||
292 | * (*cfgports) used to suppress multiple instances of same | ||
293 | * port staying stuck at same point | ||
294 | */ | ||
295 | u32 *ipath_lastrcvhdrqtails; | ||
296 | /* | ||
297 | * (*cfgports) used to suppress multiple instances of same | ||
298 | * port staying stuck at same point | ||
299 | */ | ||
300 | u32 *ipath_lastegrheads; | ||
301 | /* | ||
302 | * index of last piobuffer we used. Speeds up searching, by | ||
303 | * starting at this point. Doesn't matter if multiple cpu's use and | ||
304 | * update, last updater is only write that matters. Whenever it | ||
305 | * wraps, we update shadow copies. Need a copy per device when we | ||
306 | * get to multiple devices | ||
307 | */ | ||
308 | u32 ipath_lastpioindex; | ||
309 | /* max length of freezemsg */ | ||
310 | u32 ipath_freezelen; | ||
311 | /* | ||
312 | * consecutive times we wanted a PIO buffer but were unable to | ||
313 | * get one | ||
314 | */ | ||
315 | u32 ipath_consec_nopiobuf; | ||
316 | /* | ||
317 | * hint that we should update ipath_pioavailshadow before | ||
318 | * looking for a PIO buffer | ||
319 | */ | ||
320 | u32 ipath_upd_pio_shadow; | ||
321 | /* so we can rewrite it after a chip reset */ | ||
322 | u32 ipath_pcibar0; | ||
323 | /* so we can rewrite it after a chip reset */ | ||
324 | u32 ipath_pcibar1; | ||
325 | /* sequential tries for SMA send and no bufs */ | ||
326 | u32 ipath_nosma_bufs; | ||
327 | /* duration (seconds) ipath_nosma_bufs set */ | ||
328 | u32 ipath_nosma_secs; | ||
329 | |||
330 | /* HT/PCI Vendor ID (here for NodeInfo) */ | ||
331 | u16 ipath_vendorid; | ||
332 | /* HT/PCI Device ID (here for NodeInfo) */ | ||
333 | u16 ipath_deviceid; | ||
334 | /* offset in HT config space of slave/primary interface block */ | ||
335 | u8 ipath_ht_slave_off; | ||
336 | /* for write combining settings */ | ||
337 | unsigned long ipath_wc_cookie; | ||
338 | /* ref count for each pkey */ | ||
339 | atomic_t ipath_pkeyrefs[4]; | ||
340 | /* shadow copy of all exptids physaddr; used only by funcsim */ | ||
341 | u64 *ipath_tidsimshadow; | ||
342 | /* shadow copy of struct page *'s for exp tid pages */ | ||
343 | struct page **ipath_pageshadow; | ||
344 | /* lock to workaround chip bug 9437 */ | ||
345 | spinlock_t ipath_tid_lock; | ||
346 | |||
347 | /* | ||
348 | * IPATH_STATUS_*, | ||
349 | * this address is mapped readonly into user processes so they can | ||
350 | * get status cheaply, whenever they want. | ||
351 | */ | ||
352 | u64 *ipath_statusp; | ||
353 | /* freeze msg if hw error put chip in freeze */ | ||
354 | char *ipath_freezemsg; | ||
355 | /* pci access data structure */ | ||
356 | struct pci_dev *pcidev; | ||
357 | struct cdev *cdev; | ||
358 | struct class_device *class_dev; | ||
359 | /* timer used to prevent stats overflow, error throttling, etc. */ | ||
360 | struct timer_list ipath_stats_timer; | ||
361 | /* check for stale messages in rcv queue */ | ||
362 | /* only allow one intr at a time. */ | ||
363 | unsigned long ipath_rcv_pending; | ||
364 | |||
365 | /* | ||
366 | * Shadow copies of registers; size indicates read access size. | ||
367 | * Most of them are readonly, but some are write-only register, | ||
368 | * where we manipulate the bits in the shadow copy, and then write | ||
369 | * the shadow copy to infinipath. | ||
370 | * | ||
371 | * We deliberately make most of these 32 bits, since they have | ||
372 | * restricted range. For any that we read, we won't to generate 32 | ||
373 | * bit accesses, since Opteron will generate 2 separate 32 bit HT | ||
374 | * transactions for a 64 bit read, and we want to avoid unnecessary | ||
375 | * HT transactions. | ||
376 | */ | ||
377 | |||
378 | /* This is the 64 bit group */ | ||
379 | |||
380 | /* | ||
381 | * shadow of pioavail, check to be sure it's large enough at | ||
382 | * init time. | ||
383 | */ | ||
384 | unsigned long ipath_pioavailshadow[8]; | ||
385 | /* shadow of kr_gpio_out, for rmw ops */ | ||
386 | u64 ipath_gpio_out; | ||
387 | /* kr_revision shadow */ | ||
388 | u64 ipath_revision; | ||
389 | /* | ||
390 | * shadow of ibcctrl, for interrupt handling of link changes, | ||
391 | * etc. | ||
392 | */ | ||
393 | u64 ipath_ibcctrl; | ||
394 | /* | ||
395 | * last ibcstatus, to suppress "duplicate" status change messages, | ||
396 | * mostly from 2 to 3 | ||
397 | */ | ||
398 | u64 ipath_lastibcstat; | ||
399 | /* hwerrmask shadow */ | ||
400 | ipath_err_t ipath_hwerrmask; | ||
401 | /* interrupt config reg shadow */ | ||
402 | u64 ipath_intconfig; | ||
403 | /* kr_sendpiobufbase value */ | ||
404 | u64 ipath_piobufbase; | ||
405 | |||
406 | /* these are the "32 bit" regs */ | ||
407 | |||
408 | /* | ||
409 | * number of GUIDs in the flash for this interface; may need some | ||
410 | * rethinking for setting on other ifaces | ||
411 | */ | ||
412 | u32 ipath_nguid; | ||
413 | /* | ||
414 | * the following two are 32-bit bitmasks, but {test,clear,set}_bit | ||
415 | * all expect bit fields to be "unsigned long" | ||
416 | */ | ||
417 | /* shadow kr_rcvctrl */ | ||
418 | unsigned long ipath_rcvctrl; | ||
419 | /* shadow kr_sendctrl */ | ||
420 | unsigned long ipath_sendctrl; | ||
421 | |||
422 | /* value we put in kr_rcvhdrcnt */ | ||
423 | u32 ipath_rcvhdrcnt; | ||
424 | /* value we put in kr_rcvhdrsize */ | ||
425 | u32 ipath_rcvhdrsize; | ||
426 | /* value we put in kr_rcvhdrentsize */ | ||
427 | u32 ipath_rcvhdrentsize; | ||
428 | /* offset of last entry in rcvhdrq */ | ||
429 | u32 ipath_hdrqlast; | ||
430 | /* kr_portcnt value */ | ||
431 | u32 ipath_portcnt; | ||
432 | /* kr_pagealign value */ | ||
433 | u32 ipath_palign; | ||
434 | /* number of "2KB" PIO buffers */ | ||
435 | u32 ipath_piobcnt2k; | ||
436 | /* size in bytes of "2KB" PIO buffers */ | ||
437 | u32 ipath_piosize2k; | ||
438 | /* number of "4KB" PIO buffers */ | ||
439 | u32 ipath_piobcnt4k; | ||
440 | /* size in bytes of "4KB" PIO buffers */ | ||
441 | u32 ipath_piosize4k; | ||
442 | /* kr_rcvegrbase value */ | ||
443 | u32 ipath_rcvegrbase; | ||
444 | /* kr_rcvegrcnt value */ | ||
445 | u32 ipath_rcvegrcnt; | ||
446 | /* kr_rcvtidbase value */ | ||
447 | u32 ipath_rcvtidbase; | ||
448 | /* kr_rcvtidcnt value */ | ||
449 | u32 ipath_rcvtidcnt; | ||
450 | /* kr_sendregbase */ | ||
451 | u32 ipath_sregbase; | ||
452 | /* kr_userregbase */ | ||
453 | u32 ipath_uregbase; | ||
454 | /* kr_counterregbase */ | ||
455 | u32 ipath_cregbase; | ||
456 | /* shadow the control register contents */ | ||
457 | u32 ipath_control; | ||
458 | /* shadow the gpio output contents */ | ||
459 | u32 ipath_extctrl; | ||
460 | /* PCI revision register (HTC rev on FPGA) */ | ||
461 | u32 ipath_pcirev; | ||
462 | |||
463 | /* chip address space used by 4k pio buffers */ | ||
464 | u32 ipath_4kalign; | ||
465 | /* The MTU programmed for this unit */ | ||
466 | u32 ipath_ibmtu; | ||
467 | /* | ||
468 | * The max size IB packet, included IB headers that we can send. | ||
469 | * Starts same as ipath_piosize, but is affected when ibmtu is | ||
470 | * changed, or by size of eager buffers | ||
471 | */ | ||
472 | u32 ipath_ibmaxlen; | ||
473 | /* | ||
474 | * ibmaxlen at init time, limited by chip and by receive buffer | ||
475 | * size. Not changed after init. | ||
476 | */ | ||
477 | u32 ipath_init_ibmaxlen; | ||
478 | /* size of each rcvegrbuffer */ | ||
479 | u32 ipath_rcvegrbufsize; | ||
480 | /* width (2,4,8,16,32) from HT config reg */ | ||
481 | u32 ipath_htwidth; | ||
482 | /* HT speed (200,400,800,1000) from HT config */ | ||
483 | u32 ipath_htspeed; | ||
484 | /* ports waiting for PIOavail intr */ | ||
485 | unsigned long ipath_portpiowait; | ||
486 | /* | ||
487 | * number of sequential ibcstatus change for polling active/quiet | ||
488 | * (i.e., link not coming up). | ||
489 | */ | ||
490 | u32 ipath_ibpollcnt; | ||
491 | /* low and high portions of MSI capability/vector */ | ||
492 | u32 ipath_msi_lo; | ||
493 | /* saved after PCIe init for restore after reset */ | ||
494 | u32 ipath_msi_hi; | ||
495 | /* MSI data (vector) saved for restore */ | ||
496 | u16 ipath_msi_data; | ||
497 | /* MLID programmed for this instance */ | ||
498 | u16 ipath_mlid; | ||
499 | /* LID programmed for this instance */ | ||
500 | u16 ipath_lid; | ||
501 | /* list of pkeys programmed; 0 if not set */ | ||
502 | u16 ipath_pkeys[4]; | ||
503 | /* ASCII serial number, from flash */ | ||
504 | u8 ipath_serial[12]; | ||
505 | /* human readable board version */ | ||
506 | u8 ipath_boardversion[80]; | ||
507 | /* chip major rev, from ipath_revision */ | ||
508 | u8 ipath_majrev; | ||
509 | /* chip minor rev, from ipath_revision */ | ||
510 | u8 ipath_minrev; | ||
511 | /* board rev, from ipath_revision */ | ||
512 | u8 ipath_boardrev; | ||
513 | /* unit # of this chip, if present */ | ||
514 | int ipath_unit; | ||
515 | /* saved for restore after reset */ | ||
516 | u8 ipath_pci_cacheline; | ||
517 | /* LID mask control */ | ||
518 | u8 ipath_lmc; | ||
519 | }; | ||
520 | |||
521 | extern volatile __le64 *ipath_port0_rcvhdrtail; | ||
522 | extern dma_addr_t ipath_port0_rcvhdrtail_dma; | ||
523 | |||
524 | #define IPATH_PORT0_RCVHDRTAIL_SIZE PAGE_SIZE | ||
525 | |||
526 | extern struct list_head ipath_dev_list; | ||
527 | extern spinlock_t ipath_devs_lock; | ||
528 | extern struct ipath_devdata *ipath_lookup(int unit); | ||
529 | |||
530 | extern u16 ipath_layer_rcv_opcode; | ||
531 | extern int __ipath_layer_intr(struct ipath_devdata *, u32); | ||
532 | extern int ipath_layer_intr(struct ipath_devdata *, u32); | ||
533 | extern int __ipath_layer_rcv(struct ipath_devdata *, void *, | ||
534 | struct sk_buff *); | ||
535 | extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *); | ||
536 | extern int __ipath_verbs_piobufavail(struct ipath_devdata *); | ||
537 | extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32); | ||
538 | |||
539 | void ipath_layer_add(struct ipath_devdata *); | ||
540 | void ipath_layer_del(struct ipath_devdata *); | ||
541 | |||
542 | int ipath_init_chip(struct ipath_devdata *, int); | ||
543 | int ipath_enable_wc(struct ipath_devdata *dd); | ||
544 | void ipath_disable_wc(struct ipath_devdata *dd); | ||
545 | int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); | ||
546 | void ipath_shutdown_device(struct ipath_devdata *); | ||
547 | |||
548 | struct file_operations; | ||
549 | int ipath_cdev_init(int minor, char *name, struct file_operations *fops, | ||
550 | struct cdev **cdevp, struct class_device **class_devp); | ||
551 | void ipath_cdev_cleanup(struct cdev **cdevp, | ||
552 | struct class_device **class_devp); | ||
553 | |||
554 | int ipath_diag_init(void); | ||
555 | void ipath_diag_cleanup(void); | ||
556 | void ipath_diag_bringup_link(struct ipath_devdata *); | ||
557 | |||
558 | extern wait_queue_head_t ipath_sma_state_wait; | ||
559 | |||
560 | int ipath_user_add(struct ipath_devdata *dd); | ||
561 | void ipath_user_del(struct ipath_devdata *dd); | ||
562 | |||
563 | struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t); | ||
564 | |||
565 | extern int ipath_diag_inuse; | ||
566 | |||
567 | irqreturn_t ipath_intr(int irq, void *devid, struct pt_regs *regs); | ||
568 | void ipath_decode_err(char *buf, size_t blen, ipath_err_t err); | ||
569 | #if __IPATH_INFO || __IPATH_DBG | ||
570 | extern const char *ipath_ibcstatus_str[]; | ||
571 | #endif | ||
572 | |||
573 | /* clean up any per-chip chip-specific stuff */ | ||
574 | void ipath_chip_cleanup(struct ipath_devdata *); | ||
575 | /* clean up any chip type-specific stuff */ | ||
576 | void ipath_chip_done(void); | ||
577 | |||
578 | /* check to see if we have to force ordering for write combining */ | ||
579 | int ipath_unordered_wc(void); | ||
580 | |||
581 | void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first, | ||
582 | unsigned cnt); | ||
583 | |||
584 | int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *); | ||
585 | void ipath_free_pddata(struct ipath_devdata *, u32, int); | ||
586 | |||
587 | int ipath_parse_ushort(const char *str, unsigned short *valp); | ||
588 | |||
589 | int ipath_wait_linkstate(struct ipath_devdata *, u32, int); | ||
590 | void ipath_set_ib_lstate(struct ipath_devdata *, int); | ||
591 | void ipath_kreceive(struct ipath_devdata *); | ||
592 | int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); | ||
593 | int ipath_reset_device(int); | ||
594 | void ipath_get_faststats(unsigned long); | ||
595 | |||
596 | /* for use in system calls, where we want to know device type, etc. */ | ||
597 | #define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data) | ||
598 | |||
599 | /* | ||
600 | * values for ipath_flags | ||
601 | */ | ||
602 | /* The chip is up and initted */ | ||
603 | #define IPATH_INITTED 0x2 | ||
604 | /* set if any user code has set kr_rcvhdrsize */ | ||
605 | #define IPATH_RCVHDRSZ_SET 0x4 | ||
606 | /* The chip is present and valid for accesses */ | ||
607 | #define IPATH_PRESENT 0x8 | ||
608 | /* HT link0 is only 8 bits wide, ignore upper byte crc | ||
609 | * errors, etc. */ | ||
610 | #define IPATH_8BIT_IN_HT0 0x10 | ||
611 | /* HT link1 is only 8 bits wide, ignore upper byte crc | ||
612 | * errors, etc. */ | ||
613 | #define IPATH_8BIT_IN_HT1 0x20 | ||
614 | /* The link is down */ | ||
615 | #define IPATH_LINKDOWN 0x40 | ||
616 | /* The link level is up (0x11) */ | ||
617 | #define IPATH_LINKINIT 0x80 | ||
618 | /* The link is in the armed (0x21) state */ | ||
619 | #define IPATH_LINKARMED 0x100 | ||
620 | /* The link is in the active (0x31) state */ | ||
621 | #define IPATH_LINKACTIVE 0x200 | ||
622 | /* link current state is unknown */ | ||
623 | #define IPATH_LINKUNK 0x400 | ||
624 | /* no IB cable, or no device on IB cable */ | ||
625 | #define IPATH_NOCABLE 0x4000 | ||
626 | /* Supports port zero per packet receive interrupts via | ||
627 | * GPIO */ | ||
628 | #define IPATH_GPIO_INTR 0x8000 | ||
629 | /* uses the coded 4byte TID, not 8 byte */ | ||
630 | #define IPATH_4BYTE_TID 0x10000 | ||
631 | /* packet/word counters are 32 bit, else those 4 counters | ||
632 | * are 64bit */ | ||
633 | #define IPATH_32BITCOUNTERS 0x20000 | ||
634 | /* can miss port0 rx interrupts */ | ||
635 | #define IPATH_POLL_RX_INTR 0x40000 | ||
636 | #define IPATH_DISABLED 0x80000 /* administratively disabled */ | ||
637 | |||
638 | /* portdata flag bit offsets */ | ||
639 | /* waiting for a packet to arrive */ | ||
640 | #define IPATH_PORT_WAITING_RCV 2 | ||
641 | /* waiting for a PIO buffer to be available */ | ||
642 | #define IPATH_PORT_WAITING_PIO 3 | ||
643 | |||
644 | /* free up any allocated data at closes */ | ||
645 | void ipath_free_data(struct ipath_portdata *dd); | ||
646 | int ipath_waitfor_mdio_cmdready(struct ipath_devdata *); | ||
647 | int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *); | ||
648 | u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); | ||
649 | /* init PE-800-specific func */ | ||
650 | void ipath_init_pe800_funcs(struct ipath_devdata *); | ||
651 | /* init HT-400-specific func */ | ||
652 | void ipath_init_ht400_funcs(struct ipath_devdata *); | ||
653 | void ipath_get_guid(struct ipath_devdata *); | ||
654 | u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); | ||
655 | |||
656 | /* | ||
657 | * number of words used for protocol header if not set by ipath_userinit(); | ||
658 | */ | ||
659 | #define IPATH_DFLT_RCVHDRSIZE 9 | ||
660 | |||
661 | #define IPATH_MDIO_CMD_WRITE 1 | ||
662 | #define IPATH_MDIO_CMD_READ 2 | ||
663 | #define IPATH_MDIO_CLD_DIV 25 /* to get 2.5 Mhz mdio clock */ | ||
664 | #define IPATH_MDIO_CMDVALID 0x40000000 /* bit 30 */ | ||
665 | #define IPATH_MDIO_DATAVALID 0x80000000 /* bit 31 */ | ||
666 | #define IPATH_MDIO_CTRL_STD 0x0 | ||
667 | |||
668 | static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data) | ||
669 | { | ||
670 | return (((u64) IPATH_MDIO_CLD_DIV) << 32) | | ||
671 | (cmd << 26) | | ||
672 | (dev << 21) | | ||
673 | (reg << 16) | | ||
674 | (data & 0xFFFF); | ||
675 | } | ||
676 | |||
677 | /* signal and fifo status, in bank 31 */ | ||
678 | #define IPATH_MDIO_CTRL_XGXS_REG_8 0x8 | ||
679 | /* controls loopback, redundancy */ | ||
680 | #define IPATH_MDIO_CTRL_8355_REG_1 0x10 | ||
681 | /* premph, encdec, etc. */ | ||
682 | #define IPATH_MDIO_CTRL_8355_REG_2 0x11 | ||
683 | /* Kchars, etc. */ | ||
684 | #define IPATH_MDIO_CTRL_8355_REG_6 0x15 | ||
685 | #define IPATH_MDIO_CTRL_8355_REG_9 0x18 | ||
686 | #define IPATH_MDIO_CTRL_8355_REG_10 0x1D | ||
687 | |||
688 | int ipath_get_user_pages(unsigned long, size_t, struct page **); | ||
689 | int ipath_get_user_pages_nocopy(unsigned long, struct page **); | ||
690 | void ipath_release_user_pages(struct page **, size_t); | ||
691 | void ipath_release_user_pages_on_close(struct page **, size_t); | ||
692 | int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int); | ||
693 | int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int); | ||
694 | |||
695 | /* these are used for the registers that vary with port */ | ||
696 | void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg, | ||
697 | unsigned, u64); | ||
698 | u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg, | ||
699 | unsigned); | ||
700 | |||
701 | /* | ||
702 | * We could have a single register get/put routine, that takes a group type, | ||
703 | * but this is somewhat clearer and cleaner. It also gives us some error | ||
704 | * checking. 64 bit register reads should always work, but are inefficient | ||
705 | * on opteron (the northbridge always generates 2 separate HT 32 bit reads), | ||
706 | * so we use kreg32 wherever possible. User register and counter register | ||
707 | * reads are always 32 bit reads, so only one form of those routines. | ||
708 | */ | ||
709 | |||
710 | /* | ||
711 | * At the moment, none of the s-registers are writable, so no | ||
712 | * ipath_write_sreg(), and none of the c-registers are writable, so no | ||
713 | * ipath_write_creg(). | ||
714 | */ | ||
715 | |||
716 | /** | ||
717 | * ipath_read_ureg32 - read 32-bit virtualized per-port register | ||
718 | * @dd: device | ||
719 | * @regno: register number | ||
720 | * @port: port number | ||
721 | * | ||
722 | * Return the contents of a register that is virtualized to be per port. | ||
723 | * Prints a debug message and returns -1 on errors (not distinguishable from | ||
724 | * valid contents at runtime; we may add a separate error variable at some | ||
725 | * point). | ||
726 | * | ||
727 | * This is normally not used by the kernel, but may be for debugging, and | ||
728 | * has a different implementation than user mode, which is why it's not in | ||
729 | * _common.h. | ||
730 | */ | ||
731 | static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd, | ||
732 | ipath_ureg regno, int port) | ||
733 | { | ||
734 | if (!dd->ipath_kregbase) | ||
735 | return 0; | ||
736 | |||
737 | return readl(regno + (u64 __iomem *) | ||
738 | (dd->ipath_uregbase + | ||
739 | (char __iomem *)dd->ipath_kregbase + | ||
740 | dd->ipath_palign * port)); | ||
741 | } | ||
742 | |||
743 | /** | ||
744 | * ipath_write_ureg - write 32-bit virtualized per-port register | ||
745 | * @dd: device | ||
746 | * @regno: register number | ||
747 | * @value: value | ||
748 | * @port: port | ||
749 | * | ||
750 | * Write the contents of a register that is virtualized to be per port. | ||
751 | */ | ||
752 | static inline void ipath_write_ureg(const struct ipath_devdata *dd, | ||
753 | ipath_ureg regno, u64 value, int port) | ||
754 | { | ||
755 | u64 __iomem *ubase = (u64 __iomem *) | ||
756 | (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase + | ||
757 | dd->ipath_palign * port); | ||
758 | if (dd->ipath_kregbase) | ||
759 | writeq(value, &ubase[regno]); | ||
760 | } | ||
761 | |||
762 | static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd, | ||
763 | ipath_kreg regno) | ||
764 | { | ||
765 | if (!dd->ipath_kregbase) | ||
766 | return -1; | ||
767 | return readl((u32 __iomem *) & dd->ipath_kregbase[regno]); | ||
768 | } | ||
769 | |||
770 | static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd, | ||
771 | ipath_kreg regno) | ||
772 | { | ||
773 | if (!dd->ipath_kregbase) | ||
774 | return -1; | ||
775 | |||
776 | return readq(&dd->ipath_kregbase[regno]); | ||
777 | } | ||
778 | |||
779 | static inline void ipath_write_kreg(const struct ipath_devdata *dd, | ||
780 | ipath_kreg regno, u64 value) | ||
781 | { | ||
782 | if (dd->ipath_kregbase) | ||
783 | writeq(value, &dd->ipath_kregbase[regno]); | ||
784 | } | ||
785 | |||
786 | static inline u64 ipath_read_creg(const struct ipath_devdata *dd, | ||
787 | ipath_sreg regno) | ||
788 | { | ||
789 | if (!dd->ipath_kregbase) | ||
790 | return 0; | ||
791 | |||
792 | return readq(regno + (u64 __iomem *) | ||
793 | (dd->ipath_cregbase + | ||
794 | (char __iomem *)dd->ipath_kregbase)); | ||
795 | } | ||
796 | |||
797 | static inline u32 ipath_read_creg32(const struct ipath_devdata *dd, | ||
798 | ipath_sreg regno) | ||
799 | { | ||
800 | if (!dd->ipath_kregbase) | ||
801 | return 0; | ||
802 | return readl(regno + (u64 __iomem *) | ||
803 | (dd->ipath_cregbase + | ||
804 | (char __iomem *)dd->ipath_kregbase)); | ||
805 | } | ||
806 | |||
807 | /* | ||
808 | * sysfs interface. | ||
809 | */ | ||
810 | |||
811 | struct device_driver; | ||
812 | |||
813 | extern const char ipath_core_version[]; | ||
814 | |||
815 | int ipath_driver_create_group(struct device_driver *); | ||
816 | void ipath_driver_remove_group(struct device_driver *); | ||
817 | |||
818 | int ipath_device_create_group(struct device *, struct ipath_devdata *); | ||
819 | void ipath_device_remove_group(struct device *, struct ipath_devdata *); | ||
820 | int ipath_expose_reset(struct device *); | ||
821 | |||
822 | int ipath_init_ipathfs(void); | ||
823 | void ipath_exit_ipathfs(void); | ||
824 | int ipathfs_add_device(struct ipath_devdata *); | ||
825 | int ipathfs_remove_device(struct ipath_devdata *); | ||
826 | |||
827 | /* | ||
828 | * Flush write combining store buffers (if present) and perform a write | ||
829 | * barrier. | ||
830 | */ | ||
831 | #if defined(CONFIG_X86_64) | ||
832 | #define ipath_flush_wc() asm volatile("sfence" ::: "memory") | ||
833 | #else | ||
834 | #define ipath_flush_wc() wmb() | ||
835 | #endif | ||
836 | |||
837 | extern unsigned ipath_debug; /* debugging bit mask */ | ||
838 | |||
839 | const char *ipath_get_unit_name(int unit); | ||
840 | |||
841 | extern struct mutex ipath_mutex; | ||
842 | |||
843 | #define IPATH_DRV_NAME "ipath_core" | ||
844 | #define IPATH_MAJOR 233 | ||
845 | #define IPATH_SMA_MINOR 128 | ||
846 | #define IPATH_DIAG_MINOR 129 | ||
847 | #define IPATH_NMINORS 130 | ||
848 | |||
849 | #define ipath_dev_err(dd,fmt,...) \ | ||
850 | do { \ | ||
851 | const struct ipath_devdata *__dd = (dd); \ | ||
852 | if (__dd->pcidev) \ | ||
853 | dev_err(&__dd->pcidev->dev, "%s: " fmt, \ | ||
854 | ipath_get_unit_name(__dd->ipath_unit), \ | ||
855 | ##__VA_ARGS__); \ | ||
856 | else \ | ||
857 | printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \ | ||
858 | ipath_get_unit_name(__dd->ipath_unit), \ | ||
859 | ##__VA_ARGS__); \ | ||
860 | } while (0) | ||
861 | |||
862 | #if _IPATH_DEBUGGING | ||
863 | |||
864 | # define __IPATH_DBG_WHICH(which,fmt,...) \ | ||
865 | do { \ | ||
866 | if(unlikely(ipath_debug&(which))) \ | ||
867 | printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \ | ||
868 | __func__,##__VA_ARGS__); \ | ||
869 | } while(0) | ||
870 | |||
871 | # define ipath_dbg(fmt,...) \ | ||
872 | __IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__) | ||
873 | # define ipath_cdbg(which,fmt,...) \ | ||
874 | __IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__) | ||
875 | |||
876 | #else /* ! _IPATH_DEBUGGING */ | ||
877 | |||
878 | # define ipath_dbg(fmt,...) | ||
879 | # define ipath_cdbg(which,fmt,...) | ||
880 | |||
881 | #endif /* _IPATH_DEBUGGING */ | ||
882 | |||
883 | #endif /* _IPATH_KERNEL_H */ | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c new file mode 100644 index 000000000000..aa33b0e9f2f6 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_keys.c | |||
@@ -0,0 +1,236 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <asm/io.h> | ||
34 | |||
35 | #include "ipath_verbs.h" | ||
36 | |||
37 | /** | ||
38 | * ipath_alloc_lkey - allocate an lkey | ||
39 | * @rkt: lkey table in which to allocate the lkey | ||
40 | * @mr: memory region that this lkey protects | ||
41 | * | ||
42 | * Returns 1 if successful, otherwise returns 0. | ||
43 | */ | ||
44 | |||
45 | int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr) | ||
46 | { | ||
47 | unsigned long flags; | ||
48 | u32 r; | ||
49 | u32 n; | ||
50 | int ret; | ||
51 | |||
52 | spin_lock_irqsave(&rkt->lock, flags); | ||
53 | |||
54 | /* Find the next available LKEY */ | ||
55 | r = n = rkt->next; | ||
56 | for (;;) { | ||
57 | if (rkt->table[r] == NULL) | ||
58 | break; | ||
59 | r = (r + 1) & (rkt->max - 1); | ||
60 | if (r == n) { | ||
61 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
62 | _VERBS_INFO("LKEY table full\n"); | ||
63 | ret = 0; | ||
64 | goto bail; | ||
65 | } | ||
66 | } | ||
67 | rkt->next = (r + 1) & (rkt->max - 1); | ||
68 | /* | ||
69 | * Make sure lkey is never zero which is reserved to indicate an | ||
70 | * unrestricted LKEY. | ||
71 | */ | ||
72 | rkt->gen++; | ||
73 | mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) | | ||
74 | ((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen) | ||
75 | << 8); | ||
76 | if (mr->lkey == 0) { | ||
77 | mr->lkey |= 1 << 8; | ||
78 | rkt->gen++; | ||
79 | } | ||
80 | rkt->table[r] = mr; | ||
81 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
82 | |||
83 | ret = 1; | ||
84 | |||
85 | bail: | ||
86 | return ret; | ||
87 | } | ||
88 | |||
89 | /** | ||
90 | * ipath_free_lkey - free an lkey | ||
91 | * @rkt: table from which to free the lkey | ||
92 | * @lkey: lkey id to free | ||
93 | */ | ||
94 | void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey) | ||
95 | { | ||
96 | unsigned long flags; | ||
97 | u32 r; | ||
98 | |||
99 | if (lkey == 0) | ||
100 | return; | ||
101 | r = lkey >> (32 - ib_ipath_lkey_table_size); | ||
102 | spin_lock_irqsave(&rkt->lock, flags); | ||
103 | rkt->table[r] = NULL; | ||
104 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
105 | } | ||
106 | |||
107 | /** | ||
108 | * ipath_lkey_ok - check IB SGE for validity and initialize | ||
109 | * @rkt: table containing lkey to check SGE against | ||
110 | * @isge: outgoing internal SGE | ||
111 | * @sge: SGE to check | ||
112 | * @acc: access flags | ||
113 | * | ||
114 | * Return 1 if valid and successful, otherwise returns 0. | ||
115 | * | ||
116 | * Check the IB SGE for validity and initialize our internal version | ||
117 | * of it. | ||
118 | */ | ||
119 | int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, | ||
120 | struct ib_sge *sge, int acc) | ||
121 | { | ||
122 | struct ipath_mregion *mr; | ||
123 | size_t off; | ||
124 | int ret; | ||
125 | |||
126 | /* | ||
127 | * We use LKEY == zero to mean a physical kmalloc() address. | ||
128 | * This is a bit of a hack since we rely on dma_map_single() | ||
129 | * being reversible by calling bus_to_virt(). | ||
130 | */ | ||
131 | if (sge->lkey == 0) { | ||
132 | isge->mr = NULL; | ||
133 | isge->vaddr = bus_to_virt(sge->addr); | ||
134 | isge->length = sge->length; | ||
135 | isge->sge_length = sge->length; | ||
136 | ret = 1; | ||
137 | goto bail; | ||
138 | } | ||
139 | spin_lock(&rkt->lock); | ||
140 | mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))]; | ||
141 | spin_unlock(&rkt->lock); | ||
142 | if (unlikely(mr == NULL || mr->lkey != sge->lkey)) { | ||
143 | ret = 0; | ||
144 | goto bail; | ||
145 | } | ||
146 | |||
147 | off = sge->addr - mr->user_base; | ||
148 | if (unlikely(sge->addr < mr->user_base || | ||
149 | off + sge->length > mr->length || | ||
150 | (mr->access_flags & acc) != acc)) { | ||
151 | ret = 0; | ||
152 | goto bail; | ||
153 | } | ||
154 | |||
155 | off += mr->offset; | ||
156 | isge->mr = mr; | ||
157 | isge->m = 0; | ||
158 | isge->n = 0; | ||
159 | while (off >= mr->map[isge->m]->segs[isge->n].length) { | ||
160 | off -= mr->map[isge->m]->segs[isge->n].length; | ||
161 | isge->n++; | ||
162 | if (isge->n >= IPATH_SEGSZ) { | ||
163 | isge->m++; | ||
164 | isge->n = 0; | ||
165 | } | ||
166 | } | ||
167 | isge->vaddr = mr->map[isge->m]->segs[isge->n].vaddr + off; | ||
168 | isge->length = mr->map[isge->m]->segs[isge->n].length - off; | ||
169 | isge->sge_length = sge->length; | ||
170 | |||
171 | ret = 1; | ||
172 | |||
173 | bail: | ||
174 | return ret; | ||
175 | } | ||
176 | |||
177 | /** | ||
178 | * ipath_rkey_ok - check the IB virtual address, length, and RKEY | ||
179 | * @dev: infiniband device | ||
180 | * @ss: SGE state | ||
181 | * @len: length of data | ||
182 | * @vaddr: virtual address to place data | ||
183 | * @rkey: rkey to check | ||
184 | * @acc: access flags | ||
185 | * | ||
186 | * Return 1 if successful, otherwise 0. | ||
187 | * | ||
188 | * The QP r_rq.lock should be held. | ||
189 | */ | ||
190 | int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, | ||
191 | u32 len, u64 vaddr, u32 rkey, int acc) | ||
192 | { | ||
193 | struct ipath_lkey_table *rkt = &dev->lk_table; | ||
194 | struct ipath_sge *sge = &ss->sge; | ||
195 | struct ipath_mregion *mr; | ||
196 | size_t off; | ||
197 | int ret; | ||
198 | |||
199 | spin_lock(&rkt->lock); | ||
200 | mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))]; | ||
201 | spin_unlock(&rkt->lock); | ||
202 | if (unlikely(mr == NULL || mr->lkey != rkey)) { | ||
203 | ret = 0; | ||
204 | goto bail; | ||
205 | } | ||
206 | |||
207 | off = vaddr - mr->iova; | ||
208 | if (unlikely(vaddr < mr->iova || off + len > mr->length || | ||
209 | (mr->access_flags & acc) == 0)) { | ||
210 | ret = 0; | ||
211 | goto bail; | ||
212 | } | ||
213 | |||
214 | off += mr->offset; | ||
215 | sge->mr = mr; | ||
216 | sge->m = 0; | ||
217 | sge->n = 0; | ||
218 | while (off >= mr->map[sge->m]->segs[sge->n].length) { | ||
219 | off -= mr->map[sge->m]->segs[sge->n].length; | ||
220 | sge->n++; | ||
221 | if (sge->n >= IPATH_SEGSZ) { | ||
222 | sge->m++; | ||
223 | sge->n = 0; | ||
224 | } | ||
225 | } | ||
226 | sge->vaddr = mr->map[sge->m]->segs[sge->n].vaddr + off; | ||
227 | sge->length = mr->map[sge->m]->segs[sge->n].length - off; | ||
228 | sge->sge_length = len; | ||
229 | ss->sg_list = NULL; | ||
230 | ss->num_sge = 1; | ||
231 | |||
232 | ret = 1; | ||
233 | |||
234 | bail: | ||
235 | return ret; | ||
236 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c new file mode 100644 index 000000000000..69ed1100701a --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_layer.c | |||
@@ -0,0 +1,1515 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | * These are the routines used by layered drivers, currently just the | ||
35 | * layered ethernet driver and verbs layer. | ||
36 | */ | ||
37 | |||
38 | #include <linux/io.h> | ||
39 | #include <linux/pci.h> | ||
40 | #include <asm/byteorder.h> | ||
41 | |||
42 | #include "ipath_kernel.h" | ||
43 | #include "ips_common.h" | ||
44 | #include "ipath_layer.h" | ||
45 | |||
46 | /* Acquire before ipath_devs_lock. */ | ||
47 | static DEFINE_MUTEX(ipath_layer_mutex); | ||
48 | |||
49 | u16 ipath_layer_rcv_opcode; | ||
50 | static int (*layer_intr)(void *, u32); | ||
51 | static int (*layer_rcv)(void *, void *, struct sk_buff *); | ||
52 | static int (*layer_rcv_lid)(void *, void *); | ||
53 | static int (*verbs_piobufavail)(void *); | ||
54 | static void (*verbs_rcv)(void *, void *, void *, u32); | ||
55 | static int ipath_verbs_registered; | ||
56 | |||
57 | static void *(*layer_add_one)(int, struct ipath_devdata *); | ||
58 | static void (*layer_remove_one)(void *); | ||
59 | static void *(*verbs_add_one)(int, struct ipath_devdata *); | ||
60 | static void (*verbs_remove_one)(void *); | ||
61 | static void (*verbs_timer_cb)(void *); | ||
62 | |||
63 | int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg) | ||
64 | { | ||
65 | int ret = -ENODEV; | ||
66 | |||
67 | if (dd->ipath_layer.l_arg && layer_intr) | ||
68 | ret = layer_intr(dd->ipath_layer.l_arg, arg); | ||
69 | |||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | int ipath_layer_intr(struct ipath_devdata *dd, u32 arg) | ||
74 | { | ||
75 | int ret; | ||
76 | |||
77 | mutex_lock(&ipath_layer_mutex); | ||
78 | |||
79 | ret = __ipath_layer_intr(dd, arg); | ||
80 | |||
81 | mutex_unlock(&ipath_layer_mutex); | ||
82 | |||
83 | return ret; | ||
84 | } | ||
85 | |||
86 | int __ipath_layer_rcv(struct ipath_devdata *dd, void *hdr, | ||
87 | struct sk_buff *skb) | ||
88 | { | ||
89 | int ret = -ENODEV; | ||
90 | |||
91 | if (dd->ipath_layer.l_arg && layer_rcv) | ||
92 | ret = layer_rcv(dd->ipath_layer.l_arg, hdr, skb); | ||
93 | |||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr) | ||
98 | { | ||
99 | int ret = -ENODEV; | ||
100 | |||
101 | if (dd->ipath_layer.l_arg && layer_rcv_lid) | ||
102 | ret = layer_rcv_lid(dd->ipath_layer.l_arg, hdr); | ||
103 | |||
104 | return ret; | ||
105 | } | ||
106 | |||
107 | int __ipath_verbs_piobufavail(struct ipath_devdata *dd) | ||
108 | { | ||
109 | int ret = -ENODEV; | ||
110 | |||
111 | if (dd->verbs_layer.l_arg && verbs_piobufavail) | ||
112 | ret = verbs_piobufavail(dd->verbs_layer.l_arg); | ||
113 | |||
114 | return ret; | ||
115 | } | ||
116 | |||
117 | int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf, | ||
118 | u32 tlen) | ||
119 | { | ||
120 | int ret = -ENODEV; | ||
121 | |||
122 | if (dd->verbs_layer.l_arg && verbs_rcv) { | ||
123 | verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen); | ||
124 | ret = 0; | ||
125 | } | ||
126 | |||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate) | ||
131 | { | ||
132 | u32 lstate; | ||
133 | int ret; | ||
134 | |||
135 | switch (newstate) { | ||
136 | case IPATH_IB_LINKDOWN: | ||
137 | ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL << | ||
138 | INFINIPATH_IBCC_LINKINITCMD_SHIFT); | ||
139 | /* don't wait */ | ||
140 | ret = 0; | ||
141 | goto bail; | ||
142 | |||
143 | case IPATH_IB_LINKDOWN_SLEEP: | ||
144 | ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP << | ||
145 | INFINIPATH_IBCC_LINKINITCMD_SHIFT); | ||
146 | /* don't wait */ | ||
147 | ret = 0; | ||
148 | goto bail; | ||
149 | |||
150 | case IPATH_IB_LINKDOWN_DISABLE: | ||
151 | ipath_set_ib_lstate(dd, | ||
152 | INFINIPATH_IBCC_LINKINITCMD_DISABLE << | ||
153 | INFINIPATH_IBCC_LINKINITCMD_SHIFT); | ||
154 | /* don't wait */ | ||
155 | ret = 0; | ||
156 | goto bail; | ||
157 | |||
158 | case IPATH_IB_LINKINIT: | ||
159 | if (dd->ipath_flags & IPATH_LINKINIT) { | ||
160 | ret = 0; | ||
161 | goto bail; | ||
162 | } | ||
163 | ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT << | ||
164 | INFINIPATH_IBCC_LINKCMD_SHIFT); | ||
165 | lstate = IPATH_LINKINIT; | ||
166 | break; | ||
167 | |||
168 | case IPATH_IB_LINKARM: | ||
169 | if (dd->ipath_flags & IPATH_LINKARMED) { | ||
170 | ret = 0; | ||
171 | goto bail; | ||
172 | } | ||
173 | if (!(dd->ipath_flags & | ||
174 | (IPATH_LINKINIT | IPATH_LINKACTIVE))) { | ||
175 | ret = -EINVAL; | ||
176 | goto bail; | ||
177 | } | ||
178 | ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED << | ||
179 | INFINIPATH_IBCC_LINKCMD_SHIFT); | ||
180 | /* | ||
181 | * Since the port can transition to ACTIVE by receiving | ||
182 | * a non VL 15 packet, wait for either state. | ||
183 | */ | ||
184 | lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; | ||
185 | break; | ||
186 | |||
187 | case IPATH_IB_LINKACTIVE: | ||
188 | if (dd->ipath_flags & IPATH_LINKACTIVE) { | ||
189 | ret = 0; | ||
190 | goto bail; | ||
191 | } | ||
192 | if (!(dd->ipath_flags & IPATH_LINKARMED)) { | ||
193 | ret = -EINVAL; | ||
194 | goto bail; | ||
195 | } | ||
196 | ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE << | ||
197 | INFINIPATH_IBCC_LINKCMD_SHIFT); | ||
198 | lstate = IPATH_LINKACTIVE; | ||
199 | break; | ||
200 | |||
201 | default: | ||
202 | ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); | ||
203 | ret = -EINVAL; | ||
204 | goto bail; | ||
205 | } | ||
206 | ret = ipath_wait_linkstate(dd, lstate, 2000); | ||
207 | |||
208 | bail: | ||
209 | return ret; | ||
210 | } | ||
211 | |||
212 | EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate); | ||
213 | |||
214 | /** | ||
215 | * ipath_layer_set_mtu - set the MTU | ||
216 | * @dd: the infinipath device | ||
217 | * @arg: the new MTU | ||
218 | * | ||
219 | * we can handle "any" incoming size, the issue here is whether we | ||
220 | * need to restrict our outgoing size. For now, we don't do any | ||
221 | * sanity checking on this, and we don't deal with what happens to | ||
222 | * programs that are already running when the size changes. | ||
223 | * NOTE: changing the MTU will usually cause the IBC to go back to | ||
224 | * link initialize (IPATH_IBSTATE_INIT) state... | ||
225 | */ | ||
226 | int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg) | ||
227 | { | ||
228 | u32 piosize; | ||
229 | int changed = 0; | ||
230 | int ret; | ||
231 | |||
232 | /* | ||
233 | * mtu is IB data payload max. It's the largest power of 2 less | ||
234 | * than piosize (or even larger, since it only really controls the | ||
235 | * largest we can receive; we can send the max of the mtu and | ||
236 | * piosize). We check that it's one of the valid IB sizes. | ||
237 | */ | ||
238 | if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && | ||
239 | arg != 4096) { | ||
240 | ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); | ||
241 | ret = -EINVAL; | ||
242 | goto bail; | ||
243 | } | ||
244 | if (dd->ipath_ibmtu == arg) { | ||
245 | ret = 0; /* same as current */ | ||
246 | goto bail; | ||
247 | } | ||
248 | |||
249 | piosize = dd->ipath_ibmaxlen; | ||
250 | dd->ipath_ibmtu = arg; | ||
251 | |||
252 | if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { | ||
253 | /* Only if it's not the initial value (or reset to it) */ | ||
254 | if (piosize != dd->ipath_init_ibmaxlen) { | ||
255 | dd->ipath_ibmaxlen = piosize; | ||
256 | changed = 1; | ||
257 | } | ||
258 | } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { | ||
259 | piosize = arg + IPATH_PIO_MAXIBHDR; | ||
260 | ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " | ||
261 | "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, | ||
262 | arg); | ||
263 | dd->ipath_ibmaxlen = piosize; | ||
264 | changed = 1; | ||
265 | } | ||
266 | |||
267 | if (changed) { | ||
268 | /* | ||
269 | * set the IBC maxpktlength to the size of our pio | ||
270 | * buffers in words | ||
271 | */ | ||
272 | u64 ibc = dd->ipath_ibcctrl; | ||
273 | ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << | ||
274 | INFINIPATH_IBCC_MAXPKTLEN_SHIFT); | ||
275 | |||
276 | piosize = piosize - 2 * sizeof(u32); /* ignore pbc */ | ||
277 | dd->ipath_ibmaxlen = piosize; | ||
278 | piosize /= sizeof(u32); /* in words */ | ||
279 | /* | ||
280 | * for ICRC, which we only send in diag test pkt mode, and | ||
281 | * we don't need to worry about that for mtu | ||
282 | */ | ||
283 | piosize += 1; | ||
284 | |||
285 | ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT; | ||
286 | dd->ipath_ibcctrl = ibc; | ||
287 | ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, | ||
288 | dd->ipath_ibcctrl); | ||
289 | dd->ipath_f_tidtemplate(dd); | ||
290 | } | ||
291 | |||
292 | ret = 0; | ||
293 | |||
294 | bail: | ||
295 | return ret; | ||
296 | } | ||
297 | |||
298 | EXPORT_SYMBOL_GPL(ipath_layer_set_mtu); | ||
299 | |||
300 | int ipath_set_sps_lid(struct ipath_devdata *dd, u32 arg, u8 lmc) | ||
301 | { | ||
302 | ipath_stats.sps_lid[dd->ipath_unit] = arg; | ||
303 | dd->ipath_lid = arg; | ||
304 | dd->ipath_lmc = lmc; | ||
305 | |||
306 | mutex_lock(&ipath_layer_mutex); | ||
307 | |||
308 | if (dd->ipath_layer.l_arg && layer_intr) | ||
309 | layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID); | ||
310 | |||
311 | mutex_unlock(&ipath_layer_mutex); | ||
312 | |||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | EXPORT_SYMBOL_GPL(ipath_set_sps_lid); | ||
317 | |||
318 | int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid) | ||
319 | { | ||
320 | /* XXX - need to inform anyone who cares this just happened. */ | ||
321 | dd->ipath_guid = guid; | ||
322 | return 0; | ||
323 | } | ||
324 | |||
325 | EXPORT_SYMBOL_GPL(ipath_layer_set_guid); | ||
326 | |||
327 | __be64 ipath_layer_get_guid(struct ipath_devdata *dd) | ||
328 | { | ||
329 | return dd->ipath_guid; | ||
330 | } | ||
331 | |||
332 | EXPORT_SYMBOL_GPL(ipath_layer_get_guid); | ||
333 | |||
334 | u32 ipath_layer_get_nguid(struct ipath_devdata *dd) | ||
335 | { | ||
336 | return dd->ipath_nguid; | ||
337 | } | ||
338 | |||
339 | EXPORT_SYMBOL_GPL(ipath_layer_get_nguid); | ||
340 | |||
341 | int ipath_layer_query_device(struct ipath_devdata *dd, u32 * vendor, | ||
342 | u32 * boardrev, u32 * majrev, u32 * minrev) | ||
343 | { | ||
344 | *vendor = dd->ipath_vendorid; | ||
345 | *boardrev = dd->ipath_boardrev; | ||
346 | *majrev = dd->ipath_majrev; | ||
347 | *minrev = dd->ipath_minrev; | ||
348 | |||
349 | return 0; | ||
350 | } | ||
351 | |||
352 | EXPORT_SYMBOL_GPL(ipath_layer_query_device); | ||
353 | |||
354 | u32 ipath_layer_get_flags(struct ipath_devdata *dd) | ||
355 | { | ||
356 | return dd->ipath_flags; | ||
357 | } | ||
358 | |||
359 | EXPORT_SYMBOL_GPL(ipath_layer_get_flags); | ||
360 | |||
361 | struct device *ipath_layer_get_device(struct ipath_devdata *dd) | ||
362 | { | ||
363 | return &dd->pcidev->dev; | ||
364 | } | ||
365 | |||
366 | EXPORT_SYMBOL_GPL(ipath_layer_get_device); | ||
367 | |||
368 | u16 ipath_layer_get_deviceid(struct ipath_devdata *dd) | ||
369 | { | ||
370 | return dd->ipath_deviceid; | ||
371 | } | ||
372 | |||
373 | EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid); | ||
374 | |||
375 | u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd) | ||
376 | { | ||
377 | return dd->ipath_lastibcstat; | ||
378 | } | ||
379 | |||
380 | EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat); | ||
381 | |||
382 | u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd) | ||
383 | { | ||
384 | return dd->ipath_ibmtu; | ||
385 | } | ||
386 | |||
387 | EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu); | ||
388 | |||
389 | void ipath_layer_add(struct ipath_devdata *dd) | ||
390 | { | ||
391 | mutex_lock(&ipath_layer_mutex); | ||
392 | |||
393 | if (layer_add_one) | ||
394 | dd->ipath_layer.l_arg = | ||
395 | layer_add_one(dd->ipath_unit, dd); | ||
396 | |||
397 | if (verbs_add_one) | ||
398 | dd->verbs_layer.l_arg = | ||
399 | verbs_add_one(dd->ipath_unit, dd); | ||
400 | |||
401 | mutex_unlock(&ipath_layer_mutex); | ||
402 | } | ||
403 | |||
404 | void ipath_layer_del(struct ipath_devdata *dd) | ||
405 | { | ||
406 | mutex_lock(&ipath_layer_mutex); | ||
407 | |||
408 | if (dd->ipath_layer.l_arg && layer_remove_one) { | ||
409 | layer_remove_one(dd->ipath_layer.l_arg); | ||
410 | dd->ipath_layer.l_arg = NULL; | ||
411 | } | ||
412 | |||
413 | if (dd->verbs_layer.l_arg && verbs_remove_one) { | ||
414 | verbs_remove_one(dd->verbs_layer.l_arg); | ||
415 | dd->verbs_layer.l_arg = NULL; | ||
416 | } | ||
417 | |||
418 | mutex_unlock(&ipath_layer_mutex); | ||
419 | } | ||
420 | |||
421 | int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), | ||
422 | void (*l_remove)(void *), | ||
423 | int (*l_intr)(void *, u32), | ||
424 | int (*l_rcv)(void *, void *, struct sk_buff *), | ||
425 | u16 l_rcv_opcode, | ||
426 | int (*l_rcv_lid)(void *, void *)) | ||
427 | { | ||
428 | struct ipath_devdata *dd, *tmp; | ||
429 | unsigned long flags; | ||
430 | |||
431 | mutex_lock(&ipath_layer_mutex); | ||
432 | |||
433 | layer_add_one = l_add; | ||
434 | layer_remove_one = l_remove; | ||
435 | layer_intr = l_intr; | ||
436 | layer_rcv = l_rcv; | ||
437 | layer_rcv_lid = l_rcv_lid; | ||
438 | ipath_layer_rcv_opcode = l_rcv_opcode; | ||
439 | |||
440 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
441 | |||
442 | list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { | ||
443 | if (!(dd->ipath_flags & IPATH_INITTED)) | ||
444 | continue; | ||
445 | |||
446 | if (dd->ipath_layer.l_arg) | ||
447 | continue; | ||
448 | |||
449 | if (!(*dd->ipath_statusp & IPATH_STATUS_SMA)) | ||
450 | *dd->ipath_statusp |= IPATH_STATUS_OIB_SMA; | ||
451 | |||
452 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
453 | dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd); | ||
454 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
455 | } | ||
456 | |||
457 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
458 | mutex_unlock(&ipath_layer_mutex); | ||
459 | |||
460 | return 0; | ||
461 | } | ||
462 | |||
463 | EXPORT_SYMBOL_GPL(ipath_layer_register); | ||
464 | |||
465 | void ipath_layer_unregister(void) | ||
466 | { | ||
467 | struct ipath_devdata *dd, *tmp; | ||
468 | unsigned long flags; | ||
469 | |||
470 | mutex_lock(&ipath_layer_mutex); | ||
471 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
472 | |||
473 | list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { | ||
474 | if (dd->ipath_layer.l_arg && layer_remove_one) { | ||
475 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
476 | layer_remove_one(dd->ipath_layer.l_arg); | ||
477 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
478 | dd->ipath_layer.l_arg = NULL; | ||
479 | } | ||
480 | } | ||
481 | |||
482 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
483 | |||
484 | layer_add_one = NULL; | ||
485 | layer_remove_one = NULL; | ||
486 | layer_intr = NULL; | ||
487 | layer_rcv = NULL; | ||
488 | layer_rcv_lid = NULL; | ||
489 | |||
490 | mutex_unlock(&ipath_layer_mutex); | ||
491 | } | ||
492 | |||
493 | EXPORT_SYMBOL_GPL(ipath_layer_unregister); | ||
494 | |||
495 | static void __ipath_verbs_timer(unsigned long arg) | ||
496 | { | ||
497 | struct ipath_devdata *dd = (struct ipath_devdata *) arg; | ||
498 | |||
499 | /* | ||
500 | * If port 0 receive packet interrupts are not available, or | ||
501 | * can be missed, poll the receive queue | ||
502 | */ | ||
503 | if (dd->ipath_flags & IPATH_POLL_RX_INTR) | ||
504 | ipath_kreceive(dd); | ||
505 | |||
506 | /* Handle verbs layer timeouts. */ | ||
507 | if (dd->verbs_layer.l_arg && verbs_timer_cb) | ||
508 | verbs_timer_cb(dd->verbs_layer.l_arg); | ||
509 | |||
510 | mod_timer(&dd->verbs_layer.l_timer, jiffies + 1); | ||
511 | } | ||
512 | |||
513 | /** | ||
514 | * ipath_verbs_register - verbs layer registration | ||
515 | * @l_piobufavail: callback for when PIO buffers become available | ||
516 | * @l_rcv: callback for receiving a packet | ||
517 | * @l_timer_cb: timer callback | ||
518 | * @ipath_devdata: device data structure is put here | ||
519 | */ | ||
520 | int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *), | ||
521 | void (*l_remove)(void *arg), | ||
522 | int (*l_piobufavail) (void *arg), | ||
523 | void (*l_rcv) (void *arg, void *rhdr, | ||
524 | void *data, u32 tlen), | ||
525 | void (*l_timer_cb) (void *arg)) | ||
526 | { | ||
527 | struct ipath_devdata *dd, *tmp; | ||
528 | unsigned long flags; | ||
529 | |||
530 | mutex_lock(&ipath_layer_mutex); | ||
531 | |||
532 | verbs_add_one = l_add; | ||
533 | verbs_remove_one = l_remove; | ||
534 | verbs_piobufavail = l_piobufavail; | ||
535 | verbs_rcv = l_rcv; | ||
536 | verbs_timer_cb = l_timer_cb; | ||
537 | |||
538 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
539 | |||
540 | list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { | ||
541 | if (!(dd->ipath_flags & IPATH_INITTED)) | ||
542 | continue; | ||
543 | |||
544 | if (dd->verbs_layer.l_arg) | ||
545 | continue; | ||
546 | |||
547 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
548 | dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd); | ||
549 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
550 | } | ||
551 | |||
552 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
553 | mutex_unlock(&ipath_layer_mutex); | ||
554 | |||
555 | ipath_verbs_registered = 1; | ||
556 | |||
557 | return 0; | ||
558 | } | ||
559 | |||
560 | EXPORT_SYMBOL_GPL(ipath_verbs_register); | ||
561 | |||
562 | void ipath_verbs_unregister(void) | ||
563 | { | ||
564 | struct ipath_devdata *dd, *tmp; | ||
565 | unsigned long flags; | ||
566 | |||
567 | mutex_lock(&ipath_layer_mutex); | ||
568 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
569 | |||
570 | list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { | ||
571 | *dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA; | ||
572 | |||
573 | if (dd->verbs_layer.l_arg && verbs_remove_one) { | ||
574 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
575 | verbs_remove_one(dd->verbs_layer.l_arg); | ||
576 | spin_lock_irqsave(&ipath_devs_lock, flags); | ||
577 | dd->verbs_layer.l_arg = NULL; | ||
578 | } | ||
579 | } | ||
580 | |||
581 | spin_unlock_irqrestore(&ipath_devs_lock, flags); | ||
582 | |||
583 | verbs_add_one = NULL; | ||
584 | verbs_remove_one = NULL; | ||
585 | verbs_piobufavail = NULL; | ||
586 | verbs_rcv = NULL; | ||
587 | verbs_timer_cb = NULL; | ||
588 | |||
589 | mutex_unlock(&ipath_layer_mutex); | ||
590 | } | ||
591 | |||
592 | EXPORT_SYMBOL_GPL(ipath_verbs_unregister); | ||
593 | |||
594 | int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax) | ||
595 | { | ||
596 | int ret; | ||
597 | u32 intval = 0; | ||
598 | |||
599 | mutex_lock(&ipath_layer_mutex); | ||
600 | |||
601 | if (!dd->ipath_layer.l_arg) { | ||
602 | ret = -EINVAL; | ||
603 | goto bail; | ||
604 | } | ||
605 | |||
606 | ret = ipath_setrcvhdrsize(dd, NUM_OF_EXTRA_WORDS_IN_HEADER_QUEUE); | ||
607 | |||
608 | if (ret < 0) | ||
609 | goto bail; | ||
610 | |||
611 | *pktmax = dd->ipath_ibmaxlen; | ||
612 | |||
613 | if (*dd->ipath_statusp & IPATH_STATUS_IB_READY) | ||
614 | intval |= IPATH_LAYER_INT_IF_UP; | ||
615 | if (ipath_stats.sps_lid[dd->ipath_unit]) | ||
616 | intval |= IPATH_LAYER_INT_LID; | ||
617 | if (ipath_stats.sps_mlid[dd->ipath_unit]) | ||
618 | intval |= IPATH_LAYER_INT_BCAST; | ||
619 | /* | ||
620 | * do this on open, in case low level is already up and | ||
621 | * just layered driver was reloaded, etc. | ||
622 | */ | ||
623 | if (intval) | ||
624 | layer_intr(dd->ipath_layer.l_arg, intval); | ||
625 | |||
626 | ret = 0; | ||
627 | bail: | ||
628 | mutex_unlock(&ipath_layer_mutex); | ||
629 | |||
630 | return ret; | ||
631 | } | ||
632 | |||
633 | EXPORT_SYMBOL_GPL(ipath_layer_open); | ||
634 | |||
635 | u16 ipath_layer_get_lid(struct ipath_devdata *dd) | ||
636 | { | ||
637 | return dd->ipath_lid; | ||
638 | } | ||
639 | |||
640 | EXPORT_SYMBOL_GPL(ipath_layer_get_lid); | ||
641 | |||
642 | /** | ||
643 | * ipath_layer_get_mac - get the MAC address | ||
644 | * @dd: the infinipath device | ||
645 | * @mac: the MAC is put here | ||
646 | * | ||
647 | * This is the EUID-64 OUI octets (top 3), then | ||
648 | * skip the next 2 (which should both be zero or 0xff). | ||
649 | * The returned MAC is in network order | ||
650 | * mac points to at least 6 bytes of buffer | ||
651 | * We assume that by the time the LID is set, that the GUID is as valid | ||
652 | * as it's ever going to be, rather than adding yet another status bit. | ||
653 | */ | ||
654 | |||
655 | int ipath_layer_get_mac(struct ipath_devdata *dd, u8 * mac) | ||
656 | { | ||
657 | u8 *guid; | ||
658 | |||
659 | guid = (u8 *) &dd->ipath_guid; | ||
660 | |||
661 | mac[0] = guid[0]; | ||
662 | mac[1] = guid[1]; | ||
663 | mac[2] = guid[2]; | ||
664 | mac[3] = guid[5]; | ||
665 | mac[4] = guid[6]; | ||
666 | mac[5] = guid[7]; | ||
667 | if ((guid[3] || guid[4]) && !(guid[3] == 0xff && guid[4] == 0xff)) | ||
668 | ipath_dbg("Warning, guid bytes 3 and 4 not 0 or 0xffff: " | ||
669 | "%x %x\n", guid[3], guid[4]); | ||
670 | return 0; | ||
671 | } | ||
672 | |||
673 | EXPORT_SYMBOL_GPL(ipath_layer_get_mac); | ||
674 | |||
675 | u16 ipath_layer_get_bcast(struct ipath_devdata *dd) | ||
676 | { | ||
677 | return dd->ipath_mlid; | ||
678 | } | ||
679 | |||
680 | EXPORT_SYMBOL_GPL(ipath_layer_get_bcast); | ||
681 | |||
682 | u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd) | ||
683 | { | ||
684 | return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); | ||
685 | } | ||
686 | |||
687 | EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey); | ||
688 | |||
689 | static void update_sge(struct ipath_sge_state *ss, u32 length) | ||
690 | { | ||
691 | struct ipath_sge *sge = &ss->sge; | ||
692 | |||
693 | sge->vaddr += length; | ||
694 | sge->length -= length; | ||
695 | sge->sge_length -= length; | ||
696 | if (sge->sge_length == 0) { | ||
697 | if (--ss->num_sge) | ||
698 | *sge = *ss->sg_list++; | ||
699 | } else if (sge->length == 0 && sge->mr != NULL) { | ||
700 | if (++sge->n >= IPATH_SEGSZ) { | ||
701 | if (++sge->m >= sge->mr->mapsz) | ||
702 | return; | ||
703 | sge->n = 0; | ||
704 | } | ||
705 | sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; | ||
706 | sge->length = sge->mr->map[sge->m]->segs[sge->n].length; | ||
707 | } | ||
708 | } | ||
709 | |||
710 | #ifdef __LITTLE_ENDIAN | ||
711 | static inline u32 get_upper_bits(u32 data, u32 shift) | ||
712 | { | ||
713 | return data >> shift; | ||
714 | } | ||
715 | |||
716 | static inline u32 set_upper_bits(u32 data, u32 shift) | ||
717 | { | ||
718 | return data << shift; | ||
719 | } | ||
720 | |||
721 | static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) | ||
722 | { | ||
723 | data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); | ||
724 | data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); | ||
725 | return data; | ||
726 | } | ||
727 | #else | ||
728 | static inline u32 get_upper_bits(u32 data, u32 shift) | ||
729 | { | ||
730 | return data << shift; | ||
731 | } | ||
732 | |||
733 | static inline u32 set_upper_bits(u32 data, u32 shift) | ||
734 | { | ||
735 | return data >> shift; | ||
736 | } | ||
737 | |||
738 | static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) | ||
739 | { | ||
740 | data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); | ||
741 | data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); | ||
742 | return data; | ||
743 | } | ||
744 | #endif | ||
745 | |||
746 | static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, | ||
747 | u32 length) | ||
748 | { | ||
749 | u32 extra = 0; | ||
750 | u32 data = 0; | ||
751 | u32 last; | ||
752 | |||
753 | while (1) { | ||
754 | u32 len = ss->sge.length; | ||
755 | u32 off; | ||
756 | |||
757 | BUG_ON(len == 0); | ||
758 | if (len > length) | ||
759 | len = length; | ||
760 | if (len > ss->sge.sge_length) | ||
761 | len = ss->sge.sge_length; | ||
762 | /* If the source address is not aligned, try to align it. */ | ||
763 | off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); | ||
764 | if (off) { | ||
765 | u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & | ||
766 | ~(sizeof(u32) - 1)); | ||
767 | u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); | ||
768 | u32 y; | ||
769 | |||
770 | y = sizeof(u32) - off; | ||
771 | if (len > y) | ||
772 | len = y; | ||
773 | if (len + extra >= sizeof(u32)) { | ||
774 | data |= set_upper_bits(v, extra * | ||
775 | BITS_PER_BYTE); | ||
776 | len = sizeof(u32) - extra; | ||
777 | if (len == length) { | ||
778 | last = data; | ||
779 | break; | ||
780 | } | ||
781 | __raw_writel(data, piobuf); | ||
782 | piobuf++; | ||
783 | extra = 0; | ||
784 | data = 0; | ||
785 | } else { | ||
786 | /* Clear unused upper bytes */ | ||
787 | data |= clear_upper_bytes(v, len, extra); | ||
788 | if (len == length) { | ||
789 | last = data; | ||
790 | break; | ||
791 | } | ||
792 | extra += len; | ||
793 | } | ||
794 | } else if (extra) { | ||
795 | /* Source address is aligned. */ | ||
796 | u32 *addr = (u32 *) ss->sge.vaddr; | ||
797 | int shift = extra * BITS_PER_BYTE; | ||
798 | int ushift = 32 - shift; | ||
799 | u32 l = len; | ||
800 | |||
801 | while (l >= sizeof(u32)) { | ||
802 | u32 v = *addr; | ||
803 | |||
804 | data |= set_upper_bits(v, shift); | ||
805 | __raw_writel(data, piobuf); | ||
806 | data = get_upper_bits(v, ushift); | ||
807 | piobuf++; | ||
808 | addr++; | ||
809 | l -= sizeof(u32); | ||
810 | } | ||
811 | /* | ||
812 | * We still have 'extra' number of bytes leftover. | ||
813 | */ | ||
814 | if (l) { | ||
815 | u32 v = *addr; | ||
816 | |||
817 | if (l + extra >= sizeof(u32)) { | ||
818 | data |= set_upper_bits(v, shift); | ||
819 | len -= l + extra - sizeof(u32); | ||
820 | if (len == length) { | ||
821 | last = data; | ||
822 | break; | ||
823 | } | ||
824 | __raw_writel(data, piobuf); | ||
825 | piobuf++; | ||
826 | extra = 0; | ||
827 | data = 0; | ||
828 | } else { | ||
829 | /* Clear unused upper bytes */ | ||
830 | data |= clear_upper_bytes(v, l, | ||
831 | extra); | ||
832 | if (len == length) { | ||
833 | last = data; | ||
834 | break; | ||
835 | } | ||
836 | extra += l; | ||
837 | } | ||
838 | } else if (len == length) { | ||
839 | last = data; | ||
840 | break; | ||
841 | } | ||
842 | } else if (len == length) { | ||
843 | u32 w; | ||
844 | |||
845 | /* | ||
846 | * Need to round up for the last dword in the | ||
847 | * packet. | ||
848 | */ | ||
849 | w = (len + 3) >> 2; | ||
850 | __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); | ||
851 | piobuf += w - 1; | ||
852 | last = ((u32 *) ss->sge.vaddr)[w - 1]; | ||
853 | break; | ||
854 | } else { | ||
855 | u32 w = len >> 2; | ||
856 | |||
857 | __iowrite32_copy(piobuf, ss->sge.vaddr, w); | ||
858 | piobuf += w; | ||
859 | |||
860 | extra = len & (sizeof(u32) - 1); | ||
861 | if (extra) { | ||
862 | u32 v = ((u32 *) ss->sge.vaddr)[w]; | ||
863 | |||
864 | /* Clear unused upper bytes */ | ||
865 | data = clear_upper_bytes(v, extra, 0); | ||
866 | } | ||
867 | } | ||
868 | update_sge(ss, len); | ||
869 | length -= len; | ||
870 | } | ||
871 | /* must flush early everything before trigger word */ | ||
872 | ipath_flush_wc(); | ||
873 | __raw_writel(last, piobuf); | ||
874 | /* be sure trigger word is written */ | ||
875 | ipath_flush_wc(); | ||
876 | update_sge(ss, length); | ||
877 | } | ||
878 | |||
879 | /** | ||
880 | * ipath_verbs_send - send a packet from the verbs layer | ||
881 | * @dd: the infinipath device | ||
882 | * @hdrwords: the number of works in the header | ||
883 | * @hdr: the packet header | ||
884 | * @len: the length of the packet in bytes | ||
885 | * @ss: the SGE to send | ||
886 | * | ||
887 | * This is like ipath_sma_send_pkt() in that we need to be able to send | ||
888 | * packets after the chip is initialized (MADs) but also like | ||
889 | * ipath_layer_send_hdr() since its used by the verbs layer. | ||
890 | */ | ||
891 | int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, | ||
892 | u32 *hdr, u32 len, struct ipath_sge_state *ss) | ||
893 | { | ||
894 | u32 __iomem *piobuf; | ||
895 | u32 plen; | ||
896 | int ret; | ||
897 | |||
898 | /* +1 is for the qword padding of pbc */ | ||
899 | plen = hdrwords + ((len + 3) >> 2) + 1; | ||
900 | if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) { | ||
901 | ipath_dbg("packet len 0x%x too long, failing\n", plen); | ||
902 | ret = -EINVAL; | ||
903 | goto bail; | ||
904 | } | ||
905 | |||
906 | /* Get a PIO buffer to use. */ | ||
907 | piobuf = ipath_getpiobuf(dd, NULL); | ||
908 | if (unlikely(piobuf == NULL)) { | ||
909 | ret = -EBUSY; | ||
910 | goto bail; | ||
911 | } | ||
912 | |||
913 | /* | ||
914 | * Write len to control qword, no flags. | ||
915 | * We have to flush after the PBC for correctness on some cpus | ||
916 | * or WC buffer can be written out of order. | ||
917 | */ | ||
918 | writeq(plen, piobuf); | ||
919 | ipath_flush_wc(); | ||
920 | piobuf += 2; | ||
921 | if (len == 0) { | ||
922 | /* | ||
923 | * If there is just the header portion, must flush before | ||
924 | * writing last word of header for correctness, and after | ||
925 | * the last header word (trigger word). | ||
926 | */ | ||
927 | __iowrite32_copy(piobuf, hdr, hdrwords - 1); | ||
928 | ipath_flush_wc(); | ||
929 | __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); | ||
930 | ipath_flush_wc(); | ||
931 | ret = 0; | ||
932 | goto bail; | ||
933 | } | ||
934 | |||
935 | __iowrite32_copy(piobuf, hdr, hdrwords); | ||
936 | piobuf += hdrwords; | ||
937 | |||
938 | /* The common case is aligned and contained in one segment. */ | ||
939 | if (likely(ss->num_sge == 1 && len <= ss->sge.length && | ||
940 | !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { | ||
941 | u32 w; | ||
942 | |||
943 | /* Need to round up for the last dword in the packet. */ | ||
944 | w = (len + 3) >> 2; | ||
945 | __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); | ||
946 | /* must flush early everything before trigger word */ | ||
947 | ipath_flush_wc(); | ||
948 | __raw_writel(((u32 *) ss->sge.vaddr)[w - 1], | ||
949 | piobuf + w - 1); | ||
950 | /* be sure trigger word is written */ | ||
951 | ipath_flush_wc(); | ||
952 | update_sge(ss, len); | ||
953 | ret = 0; | ||
954 | goto bail; | ||
955 | } | ||
956 | copy_io(piobuf, ss, len); | ||
957 | ret = 0; | ||
958 | |||
959 | bail: | ||
960 | return ret; | ||
961 | } | ||
962 | |||
963 | EXPORT_SYMBOL_GPL(ipath_verbs_send); | ||
964 | |||
965 | int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords, | ||
966 | u64 *rwords, u64 *spkts, u64 *rpkts, | ||
967 | u64 *xmit_wait) | ||
968 | { | ||
969 | int ret; | ||
970 | |||
971 | if (!(dd->ipath_flags & IPATH_INITTED)) { | ||
972 | /* no hardware, freeze, etc. */ | ||
973 | ipath_dbg("unit %u not usable\n", dd->ipath_unit); | ||
974 | ret = -EINVAL; | ||
975 | goto bail; | ||
976 | } | ||
977 | *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); | ||
978 | *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); | ||
979 | *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); | ||
980 | *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); | ||
981 | *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); | ||
982 | |||
983 | ret = 0; | ||
984 | |||
985 | bail: | ||
986 | return ret; | ||
987 | } | ||
988 | |||
989 | EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters); | ||
990 | |||
991 | /** | ||
992 | * ipath_layer_get_counters - get various chip counters | ||
993 | * @dd: the infinipath device | ||
994 | * @cntrs: counters are placed here | ||
995 | * | ||
996 | * Return the counters needed by recv_pma_get_portcounters(). | ||
997 | */ | ||
998 | int ipath_layer_get_counters(struct ipath_devdata *dd, | ||
999 | struct ipath_layer_counters *cntrs) | ||
1000 | { | ||
1001 | int ret; | ||
1002 | |||
1003 | if (!(dd->ipath_flags & IPATH_INITTED)) { | ||
1004 | /* no hardware, freeze, etc. */ | ||
1005 | ipath_dbg("unit %u not usable\n", dd->ipath_unit); | ||
1006 | ret = -EINVAL; | ||
1007 | goto bail; | ||
1008 | } | ||
1009 | cntrs->symbol_error_counter = | ||
1010 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt); | ||
1011 | cntrs->link_error_recovery_counter = | ||
1012 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); | ||
1013 | cntrs->link_downed_counter = | ||
1014 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt); | ||
1015 | cntrs->port_rcv_errors = | ||
1016 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) + | ||
1017 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) + | ||
1018 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) + | ||
1019 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_errrcvflowctrlcnt) + | ||
1020 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) + | ||
1021 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) + | ||
1022 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) + | ||
1023 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) + | ||
1024 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) + | ||
1025 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlinkcnt) + | ||
1026 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt); | ||
1027 | cntrs->port_rcv_remphys_errors = | ||
1028 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt); | ||
1029 | cntrs->port_xmit_discards = | ||
1030 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt); | ||
1031 | cntrs->port_xmit_data = | ||
1032 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); | ||
1033 | cntrs->port_rcv_data = | ||
1034 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); | ||
1035 | cntrs->port_xmit_packets = | ||
1036 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); | ||
1037 | cntrs->port_rcv_packets = | ||
1038 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); | ||
1039 | |||
1040 | ret = 0; | ||
1041 | |||
1042 | bail: | ||
1043 | return ret; | ||
1044 | } | ||
1045 | |||
1046 | EXPORT_SYMBOL_GPL(ipath_layer_get_counters); | ||
1047 | |||
1048 | int ipath_layer_want_buffer(struct ipath_devdata *dd) | ||
1049 | { | ||
1050 | set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); | ||
1051 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
1052 | dd->ipath_sendctrl); | ||
1053 | |||
1054 | return 0; | ||
1055 | } | ||
1056 | |||
1057 | EXPORT_SYMBOL_GPL(ipath_layer_want_buffer); | ||
1058 | |||
1059 | int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr) | ||
1060 | { | ||
1061 | int ret = 0; | ||
1062 | u32 __iomem *piobuf; | ||
1063 | u32 plen, *uhdr; | ||
1064 | size_t count; | ||
1065 | __be16 vlsllnh; | ||
1066 | |||
1067 | if (!(dd->ipath_flags & IPATH_RCVHDRSZ_SET)) { | ||
1068 | ipath_dbg("send while not open\n"); | ||
1069 | ret = -EINVAL; | ||
1070 | } else | ||
1071 | if ((dd->ipath_flags & (IPATH_LINKUNK | IPATH_LINKDOWN)) || | ||
1072 | dd->ipath_lid == 0) { | ||
1073 | /* | ||
1074 | * lid check is for when sma hasn't yet configured | ||
1075 | */ | ||
1076 | ret = -ENETDOWN; | ||
1077 | ipath_cdbg(VERBOSE, "send while not ready, " | ||
1078 | "mylid=%u, flags=0x%x\n", | ||
1079 | dd->ipath_lid, dd->ipath_flags); | ||
1080 | } | ||
1081 | |||
1082 | vlsllnh = *((__be16 *) hdr); | ||
1083 | if (vlsllnh != htons(IPS_LRH_BTH)) { | ||
1084 | ipath_dbg("Warning: lrh[0] wrong (%x, not %x); " | ||
1085 | "not sending\n", be16_to_cpu(vlsllnh), | ||
1086 | IPS_LRH_BTH); | ||
1087 | ret = -EINVAL; | ||
1088 | } | ||
1089 | if (ret) | ||
1090 | goto done; | ||
1091 | |||
1092 | /* Get a PIO buffer to use. */ | ||
1093 | piobuf = ipath_getpiobuf(dd, NULL); | ||
1094 | if (piobuf == NULL) { | ||
1095 | ret = -EBUSY; | ||
1096 | goto done; | ||
1097 | } | ||
1098 | |||
1099 | plen = (sizeof(*hdr) >> 2); /* actual length */ | ||
1100 | ipath_cdbg(EPKT, "0x%x+1w pio %p\n", plen, piobuf); | ||
1101 | |||
1102 | writeq(plen+1, piobuf); /* len (+1 for pad) to pbc, no flags */ | ||
1103 | ipath_flush_wc(); | ||
1104 | piobuf += 2; | ||
1105 | uhdr = (u32 *)hdr; | ||
1106 | count = plen-1; /* amount we can copy before trigger word */ | ||
1107 | __iowrite32_copy(piobuf, uhdr, count); | ||
1108 | ipath_flush_wc(); | ||
1109 | __raw_writel(uhdr[count], piobuf + count); | ||
1110 | ipath_flush_wc(); /* ensure it's sent, now */ | ||
1111 | |||
1112 | ipath_stats.sps_ether_spkts++; /* ether packet sent */ | ||
1113 | |||
1114 | done: | ||
1115 | return ret; | ||
1116 | } | ||
1117 | |||
1118 | EXPORT_SYMBOL_GPL(ipath_layer_send_hdr); | ||
1119 | |||
1120 | int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd) | ||
1121 | { | ||
1122 | set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl); | ||
1123 | |||
1124 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
1125 | dd->ipath_sendctrl); | ||
1126 | return 0; | ||
1127 | } | ||
1128 | |||
1129 | EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int); | ||
1130 | |||
1131 | int ipath_layer_enable_timer(struct ipath_devdata *dd) | ||
1132 | { | ||
1133 | /* | ||
1134 | * HT-400 has a design flaw where the chip and kernel idea | ||
1135 | * of the tail register don't always agree, and therefore we won't | ||
1136 | * get an interrupt on the next packet received. | ||
1137 | * If the board supports per packet receive interrupts, use it. | ||
1138 | * Otherwise, the timer function periodically checks for packets | ||
1139 | * to cover this case. | ||
1140 | * Either way, the timer is needed for verbs layer related | ||
1141 | * processing. | ||
1142 | */ | ||
1143 | if (dd->ipath_flags & IPATH_GPIO_INTR) { | ||
1144 | ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, | ||
1145 | 0x2074076542310ULL); | ||
1146 | /* Enable GPIO bit 2 interrupt */ | ||
1147 | ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, | ||
1148 | (u64) (1 << 2)); | ||
1149 | } | ||
1150 | |||
1151 | init_timer(&dd->verbs_layer.l_timer); | ||
1152 | dd->verbs_layer.l_timer.function = __ipath_verbs_timer; | ||
1153 | dd->verbs_layer.l_timer.data = (unsigned long)dd; | ||
1154 | dd->verbs_layer.l_timer.expires = jiffies + 1; | ||
1155 | add_timer(&dd->verbs_layer.l_timer); | ||
1156 | |||
1157 | return 0; | ||
1158 | } | ||
1159 | |||
1160 | EXPORT_SYMBOL_GPL(ipath_layer_enable_timer); | ||
1161 | |||
1162 | int ipath_layer_disable_timer(struct ipath_devdata *dd) | ||
1163 | { | ||
1164 | /* Disable GPIO bit 2 interrupt */ | ||
1165 | if (dd->ipath_flags & IPATH_GPIO_INTR) | ||
1166 | ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0); | ||
1167 | |||
1168 | del_timer_sync(&dd->verbs_layer.l_timer); | ||
1169 | |||
1170 | return 0; | ||
1171 | } | ||
1172 | |||
1173 | EXPORT_SYMBOL_GPL(ipath_layer_disable_timer); | ||
1174 | |||
1175 | /** | ||
1176 | * ipath_layer_set_verbs_flags - set the verbs layer flags | ||
1177 | * @dd: the infinipath device | ||
1178 | * @flags: the flags to set | ||
1179 | */ | ||
1180 | int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags) | ||
1181 | { | ||
1182 | struct ipath_devdata *ss; | ||
1183 | unsigned long lflags; | ||
1184 | |||
1185 | spin_lock_irqsave(&ipath_devs_lock, lflags); | ||
1186 | |||
1187 | list_for_each_entry(ss, &ipath_dev_list, ipath_list) { | ||
1188 | if (!(ss->ipath_flags & IPATH_INITTED)) | ||
1189 | continue; | ||
1190 | if ((flags & IPATH_VERBS_KERNEL_SMA) && | ||
1191 | !(*ss->ipath_statusp & IPATH_STATUS_SMA)) | ||
1192 | *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA; | ||
1193 | else | ||
1194 | *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA; | ||
1195 | } | ||
1196 | |||
1197 | spin_unlock_irqrestore(&ipath_devs_lock, lflags); | ||
1198 | |||
1199 | return 0; | ||
1200 | } | ||
1201 | |||
1202 | EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags); | ||
1203 | |||
1204 | /** | ||
1205 | * ipath_layer_get_npkeys - return the size of the PKEY table for port 0 | ||
1206 | * @dd: the infinipath device | ||
1207 | */ | ||
1208 | unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd) | ||
1209 | { | ||
1210 | return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); | ||
1211 | } | ||
1212 | |||
1213 | EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys); | ||
1214 | |||
1215 | /** | ||
1216 | * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table | ||
1217 | * @dd: the infinipath device | ||
1218 | * @index: the PKEY index | ||
1219 | */ | ||
1220 | unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index) | ||
1221 | { | ||
1222 | unsigned ret; | ||
1223 | |||
1224 | if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) | ||
1225 | ret = 0; | ||
1226 | else | ||
1227 | ret = dd->ipath_pd[0]->port_pkeys[index]; | ||
1228 | |||
1229 | return ret; | ||
1230 | } | ||
1231 | |||
1232 | EXPORT_SYMBOL_GPL(ipath_layer_get_pkey); | ||
1233 | |||
1234 | /** | ||
1235 | * ipath_layer_get_pkeys - return the PKEY table for port 0 | ||
1236 | * @dd: the infinipath device | ||
1237 | * @pkeys: the pkey table is placed here | ||
1238 | */ | ||
1239 | int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys) | ||
1240 | { | ||
1241 | struct ipath_portdata *pd = dd->ipath_pd[0]; | ||
1242 | |||
1243 | memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); | ||
1244 | |||
1245 | return 0; | ||
1246 | } | ||
1247 | |||
1248 | EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys); | ||
1249 | |||
1250 | /** | ||
1251 | * rm_pkey - decrecment the reference count for the given PKEY | ||
1252 | * @dd: the infinipath device | ||
1253 | * @key: the PKEY index | ||
1254 | * | ||
1255 | * Return true if this was the last reference and the hardware table entry | ||
1256 | * needs to be changed. | ||
1257 | */ | ||
1258 | static int rm_pkey(struct ipath_devdata *dd, u16 key) | ||
1259 | { | ||
1260 | int i; | ||
1261 | int ret; | ||
1262 | |||
1263 | for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { | ||
1264 | if (dd->ipath_pkeys[i] != key) | ||
1265 | continue; | ||
1266 | if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) { | ||
1267 | dd->ipath_pkeys[i] = 0; | ||
1268 | ret = 1; | ||
1269 | goto bail; | ||
1270 | } | ||
1271 | break; | ||
1272 | } | ||
1273 | |||
1274 | ret = 0; | ||
1275 | |||
1276 | bail: | ||
1277 | return ret; | ||
1278 | } | ||
1279 | |||
1280 | /** | ||
1281 | * add_pkey - add the given PKEY to the hardware table | ||
1282 | * @dd: the infinipath device | ||
1283 | * @key: the PKEY | ||
1284 | * | ||
1285 | * Return an error code if unable to add the entry, zero if no change, | ||
1286 | * or 1 if the hardware PKEY register needs to be updated. | ||
1287 | */ | ||
1288 | static int add_pkey(struct ipath_devdata *dd, u16 key) | ||
1289 | { | ||
1290 | int i; | ||
1291 | u16 lkey = key & 0x7FFF; | ||
1292 | int any = 0; | ||
1293 | int ret; | ||
1294 | |||
1295 | if (lkey == 0x7FFF) { | ||
1296 | ret = 0; | ||
1297 | goto bail; | ||
1298 | } | ||
1299 | |||
1300 | /* Look for an empty slot or a matching PKEY. */ | ||
1301 | for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { | ||
1302 | if (!dd->ipath_pkeys[i]) { | ||
1303 | any++; | ||
1304 | continue; | ||
1305 | } | ||
1306 | /* If it matches exactly, try to increment the ref count */ | ||
1307 | if (dd->ipath_pkeys[i] == key) { | ||
1308 | if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) { | ||
1309 | ret = 0; | ||
1310 | goto bail; | ||
1311 | } | ||
1312 | /* Lost the race. Look for an empty slot below. */ | ||
1313 | atomic_dec(&dd->ipath_pkeyrefs[i]); | ||
1314 | any++; | ||
1315 | } | ||
1316 | /* | ||
1317 | * It makes no sense to have both the limited and unlimited | ||
1318 | * PKEY set at the same time since the unlimited one will | ||
1319 | * disable the limited one. | ||
1320 | */ | ||
1321 | if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { | ||
1322 | ret = -EEXIST; | ||
1323 | goto bail; | ||
1324 | } | ||
1325 | } | ||
1326 | if (!any) { | ||
1327 | ret = -EBUSY; | ||
1328 | goto bail; | ||
1329 | } | ||
1330 | for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { | ||
1331 | if (!dd->ipath_pkeys[i] && | ||
1332 | atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { | ||
1333 | /* for ipathstats, etc. */ | ||
1334 | ipath_stats.sps_pkeys[i] = lkey; | ||
1335 | dd->ipath_pkeys[i] = key; | ||
1336 | ret = 1; | ||
1337 | goto bail; | ||
1338 | } | ||
1339 | } | ||
1340 | ret = -EBUSY; | ||
1341 | |||
1342 | bail: | ||
1343 | return ret; | ||
1344 | } | ||
1345 | |||
1346 | /** | ||
1347 | * ipath_layer_set_pkeys - set the PKEY table for port 0 | ||
1348 | * @dd: the infinipath device | ||
1349 | * @pkeys: the PKEY table | ||
1350 | */ | ||
1351 | int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys) | ||
1352 | { | ||
1353 | struct ipath_portdata *pd; | ||
1354 | int i; | ||
1355 | int changed = 0; | ||
1356 | |||
1357 | pd = dd->ipath_pd[0]; | ||
1358 | |||
1359 | for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { | ||
1360 | u16 key = pkeys[i]; | ||
1361 | u16 okey = pd->port_pkeys[i]; | ||
1362 | |||
1363 | if (key == okey) | ||
1364 | continue; | ||
1365 | /* | ||
1366 | * The value of this PKEY table entry is changing. | ||
1367 | * Remove the old entry in the hardware's array of PKEYs. | ||
1368 | */ | ||
1369 | if (okey & 0x7FFF) | ||
1370 | changed |= rm_pkey(dd, okey); | ||
1371 | if (key & 0x7FFF) { | ||
1372 | int ret = add_pkey(dd, key); | ||
1373 | |||
1374 | if (ret < 0) | ||
1375 | key = 0; | ||
1376 | else | ||
1377 | changed |= ret; | ||
1378 | } | ||
1379 | pd->port_pkeys[i] = key; | ||
1380 | } | ||
1381 | if (changed) { | ||
1382 | u64 pkey; | ||
1383 | |||
1384 | pkey = (u64) dd->ipath_pkeys[0] | | ||
1385 | ((u64) dd->ipath_pkeys[1] << 16) | | ||
1386 | ((u64) dd->ipath_pkeys[2] << 32) | | ||
1387 | ((u64) dd->ipath_pkeys[3] << 48); | ||
1388 | ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n", | ||
1389 | (unsigned long long) pkey); | ||
1390 | ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, | ||
1391 | pkey); | ||
1392 | } | ||
1393 | return 0; | ||
1394 | } | ||
1395 | |||
1396 | EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys); | ||
1397 | |||
1398 | /** | ||
1399 | * ipath_layer_get_linkdowndefaultstate - get the default linkdown state | ||
1400 | * @dd: the infinipath device | ||
1401 | * | ||
1402 | * Returns zero if the default is POLL, 1 if the default is SLEEP. | ||
1403 | */ | ||
1404 | int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd) | ||
1405 | { | ||
1406 | return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE); | ||
1407 | } | ||
1408 | |||
1409 | EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate); | ||
1410 | |||
1411 | /** | ||
1412 | * ipath_layer_set_linkdowndefaultstate - set the default linkdown state | ||
1413 | * @dd: the infinipath device | ||
1414 | * @sleep: the new state | ||
1415 | * | ||
1416 | * Note that this will only take effect when the link state changes. | ||
1417 | */ | ||
1418 | int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd, | ||
1419 | int sleep) | ||
1420 | { | ||
1421 | if (sleep) | ||
1422 | dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; | ||
1423 | else | ||
1424 | dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE; | ||
1425 | ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, | ||
1426 | dd->ipath_ibcctrl); | ||
1427 | return 0; | ||
1428 | } | ||
1429 | |||
1430 | EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate); | ||
1431 | |||
1432 | int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd) | ||
1433 | { | ||
1434 | return (dd->ipath_ibcctrl >> | ||
1435 | INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & | ||
1436 | INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; | ||
1437 | } | ||
1438 | |||
1439 | EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold); | ||
1440 | |||
1441 | /** | ||
1442 | * ipath_layer_set_phyerrthreshold - set the physical error threshold | ||
1443 | * @dd: the infinipath device | ||
1444 | * @n: the new threshold | ||
1445 | * | ||
1446 | * Note that this will only take effect when the link state changes. | ||
1447 | */ | ||
1448 | int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n) | ||
1449 | { | ||
1450 | unsigned v; | ||
1451 | |||
1452 | v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & | ||
1453 | INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; | ||
1454 | if (v != n) { | ||
1455 | dd->ipath_ibcctrl &= | ||
1456 | ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK << | ||
1457 | INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT); | ||
1458 | dd->ipath_ibcctrl |= | ||
1459 | (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT; | ||
1460 | ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, | ||
1461 | dd->ipath_ibcctrl); | ||
1462 | } | ||
1463 | return 0; | ||
1464 | } | ||
1465 | |||
1466 | EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold); | ||
1467 | |||
1468 | int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd) | ||
1469 | { | ||
1470 | return (dd->ipath_ibcctrl >> | ||
1471 | INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & | ||
1472 | INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; | ||
1473 | } | ||
1474 | |||
1475 | EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold); | ||
1476 | |||
1477 | /** | ||
1478 | * ipath_layer_set_overrunthreshold - set the overrun threshold | ||
1479 | * @dd: the infinipath device | ||
1480 | * @n: the new threshold | ||
1481 | * | ||
1482 | * Note that this will only take effect when the link state changes. | ||
1483 | */ | ||
1484 | int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n) | ||
1485 | { | ||
1486 | unsigned v; | ||
1487 | |||
1488 | v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) & | ||
1489 | INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK; | ||
1490 | if (v != n) { | ||
1491 | dd->ipath_ibcctrl &= | ||
1492 | ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK << | ||
1493 | INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT); | ||
1494 | dd->ipath_ibcctrl |= | ||
1495 | (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT; | ||
1496 | ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, | ||
1497 | dd->ipath_ibcctrl); | ||
1498 | } | ||
1499 | return 0; | ||
1500 | } | ||
1501 | |||
1502 | EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold); | ||
1503 | |||
1504 | int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name, | ||
1505 | size_t namelen) | ||
1506 | { | ||
1507 | return dd->ipath_f_get_boardname(dd, name, namelen); | ||
1508 | } | ||
1509 | EXPORT_SYMBOL_GPL(ipath_layer_get_boardname); | ||
1510 | |||
1511 | u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd) | ||
1512 | { | ||
1513 | return dd->ipath_rcvhdrentsize; | ||
1514 | } | ||
1515 | EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize); | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h new file mode 100644 index 000000000000..6fefd15bd2da --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_layer.h | |||
@@ -0,0 +1,181 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef _IPATH_LAYER_H | ||
34 | #define _IPATH_LAYER_H | ||
35 | |||
36 | /* | ||
37 | * This header file is for symbols shared between the infinipath driver | ||
38 | * and drivers layered upon it (such as ipath). | ||
39 | */ | ||
40 | |||
41 | struct sk_buff; | ||
42 | struct ipath_sge_state; | ||
43 | struct ipath_devdata; | ||
44 | struct ether_header; | ||
45 | |||
46 | struct ipath_layer_counters { | ||
47 | u64 symbol_error_counter; | ||
48 | u64 link_error_recovery_counter; | ||
49 | u64 link_downed_counter; | ||
50 | u64 port_rcv_errors; | ||
51 | u64 port_rcv_remphys_errors; | ||
52 | u64 port_xmit_discards; | ||
53 | u64 port_xmit_data; | ||
54 | u64 port_rcv_data; | ||
55 | u64 port_xmit_packets; | ||
56 | u64 port_rcv_packets; | ||
57 | }; | ||
58 | |||
59 | /* | ||
60 | * A segment is a linear region of low physical memory. | ||
61 | * XXX Maybe we should use phys addr here and kmap()/kunmap(). | ||
62 | * Used by the verbs layer. | ||
63 | */ | ||
64 | struct ipath_seg { | ||
65 | void *vaddr; | ||
66 | size_t length; | ||
67 | }; | ||
68 | |||
69 | /* The number of ipath_segs that fit in a page. */ | ||
70 | #define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg)) | ||
71 | |||
72 | struct ipath_segarray { | ||
73 | struct ipath_seg segs[IPATH_SEGSZ]; | ||
74 | }; | ||
75 | |||
76 | struct ipath_mregion { | ||
77 | u64 user_base; /* User's address for this region */ | ||
78 | u64 iova; /* IB start address of this region */ | ||
79 | size_t length; | ||
80 | u32 lkey; | ||
81 | u32 offset; /* offset (bytes) to start of region */ | ||
82 | int access_flags; | ||
83 | u32 max_segs; /* number of ipath_segs in all the arrays */ | ||
84 | u32 mapsz; /* size of the map array */ | ||
85 | struct ipath_segarray *map[0]; /* the segments */ | ||
86 | }; | ||
87 | |||
88 | /* | ||
89 | * These keep track of the copy progress within a memory region. | ||
90 | * Used by the verbs layer. | ||
91 | */ | ||
92 | struct ipath_sge { | ||
93 | struct ipath_mregion *mr; | ||
94 | void *vaddr; /* current pointer into the segment */ | ||
95 | u32 sge_length; /* length of the SGE */ | ||
96 | u32 length; /* remaining length of the segment */ | ||
97 | u16 m; /* current index: mr->map[m] */ | ||
98 | u16 n; /* current index: mr->map[m]->segs[n] */ | ||
99 | }; | ||
100 | |||
101 | struct ipath_sge_state { | ||
102 | struct ipath_sge *sg_list; /* next SGE to be used if any */ | ||
103 | struct ipath_sge sge; /* progress state for the current SGE */ | ||
104 | u8 num_sge; | ||
105 | }; | ||
106 | |||
107 | int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), | ||
108 | void (*l_remove)(void *), | ||
109 | int (*l_intr)(void *, u32), | ||
110 | int (*l_rcv)(void *, void *, | ||
111 | struct sk_buff *), | ||
112 | u16 rcv_opcode, | ||
113 | int (*l_rcv_lid)(void *, void *)); | ||
114 | int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *), | ||
115 | void (*l_remove)(void *arg), | ||
116 | int (*l_piobufavail)(void *arg), | ||
117 | void (*l_rcv)(void *arg, void *rhdr, | ||
118 | void *data, u32 tlen), | ||
119 | void (*l_timer_cb)(void *arg)); | ||
120 | void ipath_layer_unregister(void); | ||
121 | void ipath_verbs_unregister(void); | ||
122 | int ipath_layer_open(struct ipath_devdata *, u32 * pktmax); | ||
123 | u16 ipath_layer_get_lid(struct ipath_devdata *dd); | ||
124 | int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *); | ||
125 | u16 ipath_layer_get_bcast(struct ipath_devdata *dd); | ||
126 | u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd); | ||
127 | int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state); | ||
128 | int ipath_layer_set_mtu(struct ipath_devdata *, u16); | ||
129 | int ipath_set_sps_lid(struct ipath_devdata *, u32, u8); | ||
130 | int ipath_layer_send_hdr(struct ipath_devdata *dd, | ||
131 | struct ether_header *hdr); | ||
132 | int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords, | ||
133 | u32 * hdr, u32 len, struct ipath_sge_state *ss); | ||
134 | int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd); | ||
135 | int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name, | ||
136 | size_t namelen); | ||
137 | int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords, | ||
138 | u64 *rwords, u64 *spkts, u64 *rpkts, | ||
139 | u64 *xmit_wait); | ||
140 | int ipath_layer_get_counters(struct ipath_devdata *dd, | ||
141 | struct ipath_layer_counters *cntrs); | ||
142 | int ipath_layer_want_buffer(struct ipath_devdata *dd); | ||
143 | int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid); | ||
144 | __be64 ipath_layer_get_guid(struct ipath_devdata *); | ||
145 | u32 ipath_layer_get_nguid(struct ipath_devdata *); | ||
146 | int ipath_layer_query_device(struct ipath_devdata *, u32 * vendor, | ||
147 | u32 * boardrev, u32 * majrev, u32 * minrev); | ||
148 | u32 ipath_layer_get_flags(struct ipath_devdata *dd); | ||
149 | struct device *ipath_layer_get_device(struct ipath_devdata *dd); | ||
150 | u16 ipath_layer_get_deviceid(struct ipath_devdata *dd); | ||
151 | u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd); | ||
152 | u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd); | ||
153 | int ipath_layer_enable_timer(struct ipath_devdata *dd); | ||
154 | int ipath_layer_disable_timer(struct ipath_devdata *dd); | ||
155 | int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags); | ||
156 | unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd); | ||
157 | unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index); | ||
158 | int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys); | ||
159 | int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys); | ||
160 | int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd); | ||
161 | int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd, | ||
162 | int sleep); | ||
163 | int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd); | ||
164 | int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n); | ||
165 | int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd); | ||
166 | int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n); | ||
167 | u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd); | ||
168 | |||
169 | /* ipath_ether interrupt values */ | ||
170 | #define IPATH_LAYER_INT_IF_UP 0x2 | ||
171 | #define IPATH_LAYER_INT_IF_DOWN 0x4 | ||
172 | #define IPATH_LAYER_INT_LID 0x8 | ||
173 | #define IPATH_LAYER_INT_SEND_CONTINUE 0x10 | ||
174 | #define IPATH_LAYER_INT_BCAST 0x40 | ||
175 | |||
176 | /* _verbs_layer.l_flags */ | ||
177 | #define IPATH_VERBS_KERNEL_SMA 0x1 | ||
178 | |||
179 | extern unsigned ipath_debug; /* debugging bit mask */ | ||
180 | |||
181 | #endif /* _IPATH_LAYER_H */ | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c new file mode 100644 index 000000000000..f7f8391fe43f --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_mad.c | |||
@@ -0,0 +1,1352 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <rdma/ib_smi.h> | ||
34 | |||
35 | #include "ipath_kernel.h" | ||
36 | #include "ipath_verbs.h" | ||
37 | #include "ips_common.h" | ||
38 | |||
39 | #define IB_SMP_UNSUP_VERSION __constant_htons(0x0004) | ||
40 | #define IB_SMP_UNSUP_METHOD __constant_htons(0x0008) | ||
41 | #define IB_SMP_UNSUP_METH_ATTR __constant_htons(0x000C) | ||
42 | #define IB_SMP_INVALID_FIELD __constant_htons(0x001C) | ||
43 | |||
44 | static int reply(struct ib_smp *smp) | ||
45 | { | ||
46 | /* | ||
47 | * The verbs framework will handle the directed/LID route | ||
48 | * packet changes. | ||
49 | */ | ||
50 | smp->method = IB_MGMT_METHOD_GET_RESP; | ||
51 | if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) | ||
52 | smp->status |= IB_SMP_DIRECTION; | ||
53 | return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; | ||
54 | } | ||
55 | |||
56 | static int recv_subn_get_nodedescription(struct ib_smp *smp, | ||
57 | struct ib_device *ibdev) | ||
58 | { | ||
59 | if (smp->attr_mod) | ||
60 | smp->status |= IB_SMP_INVALID_FIELD; | ||
61 | |||
62 | strncpy(smp->data, ibdev->node_desc, sizeof(smp->data)); | ||
63 | |||
64 | return reply(smp); | ||
65 | } | ||
66 | |||
67 | struct nodeinfo { | ||
68 | u8 base_version; | ||
69 | u8 class_version; | ||
70 | u8 node_type; | ||
71 | u8 num_ports; | ||
72 | __be64 sys_guid; | ||
73 | __be64 node_guid; | ||
74 | __be64 port_guid; | ||
75 | __be16 partition_cap; | ||
76 | __be16 device_id; | ||
77 | __be32 revision; | ||
78 | u8 local_port_num; | ||
79 | u8 vendor_id[3]; | ||
80 | } __attribute__ ((packed)); | ||
81 | |||
82 | static int recv_subn_get_nodeinfo(struct ib_smp *smp, | ||
83 | struct ib_device *ibdev, u8 port) | ||
84 | { | ||
85 | struct nodeinfo *nip = (struct nodeinfo *)&smp->data; | ||
86 | struct ipath_devdata *dd = to_idev(ibdev)->dd; | ||
87 | u32 vendor, boardid, majrev, minrev; | ||
88 | |||
89 | if (smp->attr_mod) | ||
90 | smp->status |= IB_SMP_INVALID_FIELD; | ||
91 | |||
92 | nip->base_version = 1; | ||
93 | nip->class_version = 1; | ||
94 | nip->node_type = 1; /* channel adapter */ | ||
95 | /* | ||
96 | * XXX The num_ports value will need a layer function to get | ||
97 | * the value if we ever have more than one IB port on a chip. | ||
98 | * We will also need to get the GUID for the port. | ||
99 | */ | ||
100 | nip->num_ports = ibdev->phys_port_cnt; | ||
101 | /* This is already in network order */ | ||
102 | nip->sys_guid = to_idev(ibdev)->sys_image_guid; | ||
103 | nip->node_guid = ipath_layer_get_guid(dd); | ||
104 | nip->port_guid = nip->sys_guid; | ||
105 | nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd)); | ||
106 | nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd)); | ||
107 | ipath_layer_query_device(dd, &vendor, &boardid, &majrev, &minrev); | ||
108 | nip->revision = cpu_to_be32((majrev << 16) | minrev); | ||
109 | nip->local_port_num = port; | ||
110 | nip->vendor_id[0] = 0; | ||
111 | nip->vendor_id[1] = vendor >> 8; | ||
112 | nip->vendor_id[2] = vendor; | ||
113 | |||
114 | return reply(smp); | ||
115 | } | ||
116 | |||
117 | static int recv_subn_get_guidinfo(struct ib_smp *smp, | ||
118 | struct ib_device *ibdev) | ||
119 | { | ||
120 | u32 startgx = 8 * be32_to_cpu(smp->attr_mod); | ||
121 | __be64 *p = (__be64 *) smp->data; | ||
122 | |||
123 | /* 32 blocks of 8 64-bit GUIDs per block */ | ||
124 | |||
125 | memset(smp->data, 0, sizeof(smp->data)); | ||
126 | |||
127 | /* | ||
128 | * We only support one GUID for now. If this changes, the | ||
129 | * portinfo.guid_cap field needs to be updated too. | ||
130 | */ | ||
131 | if (startgx == 0) | ||
132 | /* The first is a copy of the read-only HW GUID. */ | ||
133 | *p = ipath_layer_get_guid(to_idev(ibdev)->dd); | ||
134 | else | ||
135 | smp->status |= IB_SMP_INVALID_FIELD; | ||
136 | |||
137 | return reply(smp); | ||
138 | } | ||
139 | |||
140 | struct port_info { | ||
141 | __be64 mkey; | ||
142 | __be64 gid_prefix; | ||
143 | __be16 lid; | ||
144 | __be16 sm_lid; | ||
145 | __be32 cap_mask; | ||
146 | __be16 diag_code; | ||
147 | __be16 mkey_lease_period; | ||
148 | u8 local_port_num; | ||
149 | u8 link_width_enabled; | ||
150 | u8 link_width_supported; | ||
151 | u8 link_width_active; | ||
152 | u8 linkspeed_portstate; /* 4 bits, 4 bits */ | ||
153 | u8 portphysstate_linkdown; /* 4 bits, 4 bits */ | ||
154 | u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */ | ||
155 | u8 linkspeedactive_enabled; /* 4 bits, 4 bits */ | ||
156 | u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */ | ||
157 | u8 vlcap_inittype; /* 4 bits, 4 bits */ | ||
158 | u8 vl_high_limit; | ||
159 | u8 vl_arb_high_cap; | ||
160 | u8 vl_arb_low_cap; | ||
161 | u8 inittypereply_mtucap; /* 4 bits, 4 bits */ | ||
162 | u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */ | ||
163 | u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */ | ||
164 | __be16 mkey_violations; | ||
165 | __be16 pkey_violations; | ||
166 | __be16 qkey_violations; | ||
167 | u8 guid_cap; | ||
168 | u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */ | ||
169 | u8 resv_resptimevalue; /* 3 bits, 5 bits */ | ||
170 | u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */ | ||
171 | __be16 max_credit_hint; | ||
172 | u8 resv; | ||
173 | u8 link_roundtrip_latency[3]; | ||
174 | } __attribute__ ((packed)); | ||
175 | |||
176 | static int recv_subn_get_portinfo(struct ib_smp *smp, | ||
177 | struct ib_device *ibdev, u8 port) | ||
178 | { | ||
179 | struct ipath_ibdev *dev; | ||
180 | struct port_info *pip = (struct port_info *)smp->data; | ||
181 | u16 lid; | ||
182 | u8 ibcstat; | ||
183 | u8 mtu; | ||
184 | int ret; | ||
185 | |||
186 | if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) { | ||
187 | smp->status |= IB_SMP_INVALID_FIELD; | ||
188 | ret = reply(smp); | ||
189 | goto bail; | ||
190 | } | ||
191 | |||
192 | dev = to_idev(ibdev); | ||
193 | |||
194 | /* Clear all fields. Only set the non-zero fields. */ | ||
195 | memset(smp->data, 0, sizeof(smp->data)); | ||
196 | |||
197 | /* Only return the mkey if the protection field allows it. */ | ||
198 | if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey || | ||
199 | (dev->mkeyprot_resv_lmc >> 6) == 0) | ||
200 | pip->mkey = dev->mkey; | ||
201 | pip->gid_prefix = dev->gid_prefix; | ||
202 | lid = ipath_layer_get_lid(dev->dd); | ||
203 | pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; | ||
204 | pip->sm_lid = cpu_to_be16(dev->sm_lid); | ||
205 | pip->cap_mask = cpu_to_be32(dev->port_cap_flags); | ||
206 | /* pip->diag_code; */ | ||
207 | pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period); | ||
208 | pip->local_port_num = port; | ||
209 | pip->link_width_enabled = dev->link_width_enabled; | ||
210 | pip->link_width_supported = 3; /* 1x or 4x */ | ||
211 | pip->link_width_active = 2; /* 4x */ | ||
212 | pip->linkspeed_portstate = 0x10; /* 2.5Gbps */ | ||
213 | ibcstat = ipath_layer_get_lastibcstat(dev->dd); | ||
214 | pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1; | ||
215 | pip->portphysstate_linkdown = | ||
216 | (ipath_cvt_physportstate[ibcstat & 0xf] << 4) | | ||
217 | (ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2); | ||
218 | pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc; | ||
219 | pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */ | ||
220 | switch (ipath_layer_get_ibmtu(dev->dd)) { | ||
221 | case 4096: | ||
222 | mtu = IB_MTU_4096; | ||
223 | break; | ||
224 | case 2048: | ||
225 | mtu = IB_MTU_2048; | ||
226 | break; | ||
227 | case 1024: | ||
228 | mtu = IB_MTU_1024; | ||
229 | break; | ||
230 | case 512: | ||
231 | mtu = IB_MTU_512; | ||
232 | break; | ||
233 | case 256: | ||
234 | mtu = IB_MTU_256; | ||
235 | break; | ||
236 | default: /* oops, something is wrong */ | ||
237 | mtu = IB_MTU_2048; | ||
238 | break; | ||
239 | } | ||
240 | pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl; | ||
241 | pip->vlcap_inittype = 0x10; /* VLCap = VL0, InitType = 0 */ | ||
242 | pip->vl_high_limit = dev->vl_high_limit; | ||
243 | /* pip->vl_arb_high_cap; // only one VL */ | ||
244 | /* pip->vl_arb_low_cap; // only one VL */ | ||
245 | /* InitTypeReply = 0 */ | ||
246 | pip->inittypereply_mtucap = IB_MTU_4096; | ||
247 | // HCAs ignore VLStallCount and HOQLife | ||
248 | /* pip->vlstallcnt_hoqlife; */ | ||
249 | pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */ | ||
250 | pip->mkey_violations = cpu_to_be16(dev->mkey_violations); | ||
251 | /* P_KeyViolations are counted by hardware. */ | ||
252 | pip->pkey_violations = | ||
253 | cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) - | ||
254 | dev->n_pkey_violations) & 0xFFFF); | ||
255 | pip->qkey_violations = cpu_to_be16(dev->qkey_violations); | ||
256 | /* Only the hardware GUID is supported for now */ | ||
257 | pip->guid_cap = 1; | ||
258 | pip->clientrereg_resv_subnetto = dev->subnet_timeout; | ||
259 | /* 32.768 usec. response time (guessing) */ | ||
260 | pip->resv_resptimevalue = 3; | ||
261 | pip->localphyerrors_overrunerrors = | ||
262 | (ipath_layer_get_phyerrthreshold(dev->dd) << 4) | | ||
263 | ipath_layer_get_overrunthreshold(dev->dd); | ||
264 | /* pip->max_credit_hint; */ | ||
265 | /* pip->link_roundtrip_latency[3]; */ | ||
266 | |||
267 | ret = reply(smp); | ||
268 | |||
269 | bail: | ||
270 | return ret; | ||
271 | } | ||
272 | |||
273 | static int recv_subn_get_pkeytable(struct ib_smp *smp, | ||
274 | struct ib_device *ibdev) | ||
275 | { | ||
276 | u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); | ||
277 | u16 *p = (u16 *) smp->data; | ||
278 | __be16 *q = (__be16 *) smp->data; | ||
279 | |||
280 | /* 64 blocks of 32 16-bit P_Key entries */ | ||
281 | |||
282 | memset(smp->data, 0, sizeof(smp->data)); | ||
283 | if (startpx == 0) { | ||
284 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
285 | unsigned i, n = ipath_layer_get_npkeys(dev->dd); | ||
286 | |||
287 | ipath_layer_get_pkeys(dev->dd, p); | ||
288 | |||
289 | for (i = 0; i < n; i++) | ||
290 | q[i] = cpu_to_be16(p[i]); | ||
291 | } else | ||
292 | smp->status |= IB_SMP_INVALID_FIELD; | ||
293 | |||
294 | return reply(smp); | ||
295 | } | ||
296 | |||
297 | static int recv_subn_set_guidinfo(struct ib_smp *smp, | ||
298 | struct ib_device *ibdev) | ||
299 | { | ||
300 | /* The only GUID we support is the first read-only entry. */ | ||
301 | return recv_subn_get_guidinfo(smp, ibdev); | ||
302 | } | ||
303 | |||
304 | /** | ||
305 | * recv_subn_set_portinfo - set port information | ||
306 | * @smp: the incoming SM packet | ||
307 | * @ibdev: the infiniband device | ||
308 | * @port: the port on the device | ||
309 | * | ||
310 | * Set Portinfo (see ch. 14.2.5.6). | ||
311 | */ | ||
312 | static int recv_subn_set_portinfo(struct ib_smp *smp, | ||
313 | struct ib_device *ibdev, u8 port) | ||
314 | { | ||
315 | struct port_info *pip = (struct port_info *)smp->data; | ||
316 | struct ib_event event; | ||
317 | struct ipath_ibdev *dev; | ||
318 | u32 flags; | ||
319 | char clientrereg = 0; | ||
320 | u16 lid, smlid; | ||
321 | u8 lwe; | ||
322 | u8 lse; | ||
323 | u8 state; | ||
324 | u16 lstate; | ||
325 | u32 mtu; | ||
326 | int ret; | ||
327 | |||
328 | if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) | ||
329 | goto err; | ||
330 | |||
331 | dev = to_idev(ibdev); | ||
332 | event.device = ibdev; | ||
333 | event.element.port_num = port; | ||
334 | |||
335 | dev->mkey = pip->mkey; | ||
336 | dev->gid_prefix = pip->gid_prefix; | ||
337 | dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); | ||
338 | |||
339 | lid = be16_to_cpu(pip->lid); | ||
340 | if (lid != ipath_layer_get_lid(dev->dd)) { | ||
341 | /* Must be a valid unicast LID address. */ | ||
342 | if (lid == 0 || lid >= IPS_MULTICAST_LID_BASE) | ||
343 | goto err; | ||
344 | ipath_set_sps_lid(dev->dd, lid, pip->mkeyprot_resv_lmc & 7); | ||
345 | event.event = IB_EVENT_LID_CHANGE; | ||
346 | ib_dispatch_event(&event); | ||
347 | } | ||
348 | |||
349 | smlid = be16_to_cpu(pip->sm_lid); | ||
350 | if (smlid != dev->sm_lid) { | ||
351 | /* Must be a valid unicast LID address. */ | ||
352 | if (smlid == 0 || smlid >= IPS_MULTICAST_LID_BASE) | ||
353 | goto err; | ||
354 | dev->sm_lid = smlid; | ||
355 | event.event = IB_EVENT_SM_CHANGE; | ||
356 | ib_dispatch_event(&event); | ||
357 | } | ||
358 | |||
359 | /* Only 4x supported but allow 1x or 4x to be set (see 14.2.6.6). */ | ||
360 | lwe = pip->link_width_enabled; | ||
361 | if ((lwe >= 4 && lwe <= 8) || (lwe >= 0xC && lwe <= 0xFE)) | ||
362 | goto err; | ||
363 | if (lwe == 0xFF) | ||
364 | dev->link_width_enabled = 3; /* 1x or 4x */ | ||
365 | else if (lwe) | ||
366 | dev->link_width_enabled = lwe; | ||
367 | |||
368 | /* Only 2.5 Gbs supported. */ | ||
369 | lse = pip->linkspeedactive_enabled & 0xF; | ||
370 | if (lse >= 2 && lse <= 0xE) | ||
371 | goto err; | ||
372 | |||
373 | /* Set link down default state. */ | ||
374 | switch (pip->portphysstate_linkdown & 0xF) { | ||
375 | case 0: /* NOP */ | ||
376 | break; | ||
377 | case 1: /* SLEEP */ | ||
378 | if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1)) | ||
379 | goto err; | ||
380 | break; | ||
381 | case 2: /* POLL */ | ||
382 | if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0)) | ||
383 | goto err; | ||
384 | break; | ||
385 | default: | ||
386 | goto err; | ||
387 | } | ||
388 | |||
389 | dev->mkeyprot_resv_lmc = pip->mkeyprot_resv_lmc; | ||
390 | dev->vl_high_limit = pip->vl_high_limit; | ||
391 | |||
392 | switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) { | ||
393 | case IB_MTU_256: | ||
394 | mtu = 256; | ||
395 | break; | ||
396 | case IB_MTU_512: | ||
397 | mtu = 512; | ||
398 | break; | ||
399 | case IB_MTU_1024: | ||
400 | mtu = 1024; | ||
401 | break; | ||
402 | case IB_MTU_2048: | ||
403 | mtu = 2048; | ||
404 | break; | ||
405 | case IB_MTU_4096: | ||
406 | mtu = 4096; | ||
407 | break; | ||
408 | default: | ||
409 | /* XXX We have already partially updated our state! */ | ||
410 | goto err; | ||
411 | } | ||
412 | ipath_layer_set_mtu(dev->dd, mtu); | ||
413 | |||
414 | dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF; | ||
415 | |||
416 | /* We only support VL0 */ | ||
417 | if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1) | ||
418 | goto err; | ||
419 | |||
420 | if (pip->mkey_violations == 0) | ||
421 | dev->mkey_violations = 0; | ||
422 | |||
423 | /* | ||
424 | * Hardware counter can't be reset so snapshot and subtract | ||
425 | * later. | ||
426 | */ | ||
427 | if (pip->pkey_violations == 0) | ||
428 | dev->n_pkey_violations = | ||
429 | ipath_layer_get_cr_errpkey(dev->dd); | ||
430 | |||
431 | if (pip->qkey_violations == 0) | ||
432 | dev->qkey_violations = 0; | ||
433 | |||
434 | if (ipath_layer_set_phyerrthreshold( | ||
435 | dev->dd, | ||
436 | (pip->localphyerrors_overrunerrors >> 4) & 0xF)) | ||
437 | goto err; | ||
438 | |||
439 | if (ipath_layer_set_overrunthreshold( | ||
440 | dev->dd, | ||
441 | (pip->localphyerrors_overrunerrors & 0xF))) | ||
442 | goto err; | ||
443 | |||
444 | dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; | ||
445 | |||
446 | if (pip->clientrereg_resv_subnetto & 0x80) { | ||
447 | clientrereg = 1; | ||
448 | event.event = IB_EVENT_LID_CHANGE; | ||
449 | ib_dispatch_event(&event); | ||
450 | } | ||
451 | |||
452 | /* | ||
453 | * Do the port state change now that the other link parameters | ||
454 | * have been set. | ||
455 | * Changing the port physical state only makes sense if the link | ||
456 | * is down or is being set to down. | ||
457 | */ | ||
458 | state = pip->linkspeed_portstate & 0xF; | ||
459 | flags = ipath_layer_get_flags(dev->dd); | ||
460 | lstate = (pip->portphysstate_linkdown >> 4) & 0xF; | ||
461 | if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) | ||
462 | goto err; | ||
463 | |||
464 | /* | ||
465 | * Only state changes of DOWN, ARM, and ACTIVE are valid | ||
466 | * and must be in the correct state to take effect (see 7.2.6). | ||
467 | */ | ||
468 | switch (state) { | ||
469 | case IB_PORT_NOP: | ||
470 | if (lstate == 0) | ||
471 | break; | ||
472 | /* FALLTHROUGH */ | ||
473 | case IB_PORT_DOWN: | ||
474 | if (lstate == 0) | ||
475 | if (ipath_layer_get_linkdowndefaultstate(dev->dd)) | ||
476 | lstate = IPATH_IB_LINKDOWN_SLEEP; | ||
477 | else | ||
478 | lstate = IPATH_IB_LINKDOWN; | ||
479 | else if (lstate == 1) | ||
480 | lstate = IPATH_IB_LINKDOWN_SLEEP; | ||
481 | else if (lstate == 2) | ||
482 | lstate = IPATH_IB_LINKDOWN; | ||
483 | else if (lstate == 3) | ||
484 | lstate = IPATH_IB_LINKDOWN_DISABLE; | ||
485 | else | ||
486 | goto err; | ||
487 | ipath_layer_set_linkstate(dev->dd, lstate); | ||
488 | if (flags & IPATH_LINKACTIVE) { | ||
489 | event.event = IB_EVENT_PORT_ERR; | ||
490 | ib_dispatch_event(&event); | ||
491 | } | ||
492 | break; | ||
493 | case IB_PORT_ARMED: | ||
494 | if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE))) | ||
495 | break; | ||
496 | ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM); | ||
497 | if (flags & IPATH_LINKACTIVE) { | ||
498 | event.event = IB_EVENT_PORT_ERR; | ||
499 | ib_dispatch_event(&event); | ||
500 | } | ||
501 | break; | ||
502 | case IB_PORT_ACTIVE: | ||
503 | if (!(flags & IPATH_LINKARMED)) | ||
504 | break; | ||
505 | ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE); | ||
506 | event.event = IB_EVENT_PORT_ACTIVE; | ||
507 | ib_dispatch_event(&event); | ||
508 | break; | ||
509 | default: | ||
510 | /* XXX We have already partially updated our state! */ | ||
511 | goto err; | ||
512 | } | ||
513 | |||
514 | ret = recv_subn_get_portinfo(smp, ibdev, port); | ||
515 | |||
516 | if (clientrereg) | ||
517 | pip->clientrereg_resv_subnetto |= 0x80; | ||
518 | |||
519 | goto done; | ||
520 | |||
521 | err: | ||
522 | smp->status |= IB_SMP_INVALID_FIELD; | ||
523 | ret = recv_subn_get_portinfo(smp, ibdev, port); | ||
524 | |||
525 | done: | ||
526 | return ret; | ||
527 | } | ||
528 | |||
529 | static int recv_subn_set_pkeytable(struct ib_smp *smp, | ||
530 | struct ib_device *ibdev) | ||
531 | { | ||
532 | u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); | ||
533 | __be16 *p = (__be16 *) smp->data; | ||
534 | u16 *q = (u16 *) smp->data; | ||
535 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
536 | unsigned i, n = ipath_layer_get_npkeys(dev->dd); | ||
537 | |||
538 | for (i = 0; i < n; i++) | ||
539 | q[i] = be16_to_cpu(p[i]); | ||
540 | |||
541 | if (startpx != 0 || | ||
542 | ipath_layer_set_pkeys(dev->dd, q) != 0) | ||
543 | smp->status |= IB_SMP_INVALID_FIELD; | ||
544 | |||
545 | return recv_subn_get_pkeytable(smp, ibdev); | ||
546 | } | ||
547 | |||
548 | #define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001) | ||
549 | #define IB_PMA_PORT_SAMPLES_CONTROL __constant_htons(0x0010) | ||
550 | #define IB_PMA_PORT_SAMPLES_RESULT __constant_htons(0x0011) | ||
551 | #define IB_PMA_PORT_COUNTERS __constant_htons(0x0012) | ||
552 | #define IB_PMA_PORT_COUNTERS_EXT __constant_htons(0x001D) | ||
553 | #define IB_PMA_PORT_SAMPLES_RESULT_EXT __constant_htons(0x001E) | ||
554 | |||
555 | struct ib_perf { | ||
556 | u8 base_version; | ||
557 | u8 mgmt_class; | ||
558 | u8 class_version; | ||
559 | u8 method; | ||
560 | __be16 status; | ||
561 | __be16 unused; | ||
562 | __be64 tid; | ||
563 | __be16 attr_id; | ||
564 | __be16 resv; | ||
565 | __be32 attr_mod; | ||
566 | u8 reserved[40]; | ||
567 | u8 data[192]; | ||
568 | } __attribute__ ((packed)); | ||
569 | |||
570 | struct ib_pma_classportinfo { | ||
571 | u8 base_version; | ||
572 | u8 class_version; | ||
573 | __be16 cap_mask; | ||
574 | u8 reserved[3]; | ||
575 | u8 resp_time_value; /* only lower 5 bits */ | ||
576 | union ib_gid redirect_gid; | ||
577 | __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */ | ||
578 | __be16 redirect_lid; | ||
579 | __be16 redirect_pkey; | ||
580 | __be32 redirect_qp; /* only lower 24 bits */ | ||
581 | __be32 redirect_qkey; | ||
582 | union ib_gid trap_gid; | ||
583 | __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */ | ||
584 | __be16 trap_lid; | ||
585 | __be16 trap_pkey; | ||
586 | __be32 trap_hl_qp; /* 8, 24 bits respectively */ | ||
587 | __be32 trap_qkey; | ||
588 | } __attribute__ ((packed)); | ||
589 | |||
590 | struct ib_pma_portsamplescontrol { | ||
591 | u8 opcode; | ||
592 | u8 port_select; | ||
593 | u8 tick; | ||
594 | u8 counter_width; /* only lower 3 bits */ | ||
595 | __be32 counter_mask0_9; /* 2, 10 * 3, bits */ | ||
596 | __be16 counter_mask10_14; /* 1, 5 * 3, bits */ | ||
597 | u8 sample_mechanisms; | ||
598 | u8 sample_status; /* only lower 2 bits */ | ||
599 | __be64 option_mask; | ||
600 | __be64 vendor_mask; | ||
601 | __be32 sample_start; | ||
602 | __be32 sample_interval; | ||
603 | __be16 tag; | ||
604 | __be16 counter_select[15]; | ||
605 | } __attribute__ ((packed)); | ||
606 | |||
607 | struct ib_pma_portsamplesresult { | ||
608 | __be16 tag; | ||
609 | __be16 sample_status; /* only lower 2 bits */ | ||
610 | __be32 counter[15]; | ||
611 | } __attribute__ ((packed)); | ||
612 | |||
613 | struct ib_pma_portsamplesresult_ext { | ||
614 | __be16 tag; | ||
615 | __be16 sample_status; /* only lower 2 bits */ | ||
616 | __be32 extended_width; /* only upper 2 bits */ | ||
617 | __be64 counter[15]; | ||
618 | } __attribute__ ((packed)); | ||
619 | |||
620 | struct ib_pma_portcounters { | ||
621 | u8 reserved; | ||
622 | u8 port_select; | ||
623 | __be16 counter_select; | ||
624 | __be16 symbol_error_counter; | ||
625 | u8 link_error_recovery_counter; | ||
626 | u8 link_downed_counter; | ||
627 | __be16 port_rcv_errors; | ||
628 | __be16 port_rcv_remphys_errors; | ||
629 | __be16 port_rcv_switch_relay_errors; | ||
630 | __be16 port_xmit_discards; | ||
631 | u8 port_xmit_constraint_errors; | ||
632 | u8 port_rcv_constraint_errors; | ||
633 | u8 reserved1; | ||
634 | u8 lli_ebor_errors; /* 4, 4, bits */ | ||
635 | __be16 reserved2; | ||
636 | __be16 vl15_dropped; | ||
637 | __be32 port_xmit_data; | ||
638 | __be32 port_rcv_data; | ||
639 | __be32 port_xmit_packets; | ||
640 | __be32 port_rcv_packets; | ||
641 | } __attribute__ ((packed)); | ||
642 | |||
643 | #define IB_PMA_SEL_SYMBOL_ERROR __constant_htons(0x0001) | ||
644 | #define IB_PMA_SEL_LINK_ERROR_RECOVERY __constant_htons(0x0002) | ||
645 | #define IB_PMA_SEL_LINK_DOWNED __constant_htons(0x0004) | ||
646 | #define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008) | ||
647 | #define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010) | ||
648 | #define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040) | ||
649 | #define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000) | ||
650 | #define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000) | ||
651 | #define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000) | ||
652 | #define IB_PMA_SEL_PORT_RCV_PACKETS __constant_htons(0x8000) | ||
653 | |||
654 | struct ib_pma_portcounters_ext { | ||
655 | u8 reserved; | ||
656 | u8 port_select; | ||
657 | __be16 counter_select; | ||
658 | __be32 reserved1; | ||
659 | __be64 port_xmit_data; | ||
660 | __be64 port_rcv_data; | ||
661 | __be64 port_xmit_packets; | ||
662 | __be64 port_rcv_packets; | ||
663 | __be64 port_unicast_xmit_packets; | ||
664 | __be64 port_unicast_rcv_packets; | ||
665 | __be64 port_multicast_xmit_packets; | ||
666 | __be64 port_multicast_rcv_packets; | ||
667 | } __attribute__ ((packed)); | ||
668 | |||
669 | #define IB_PMA_SELX_PORT_XMIT_DATA __constant_htons(0x0001) | ||
670 | #define IB_PMA_SELX_PORT_RCV_DATA __constant_htons(0x0002) | ||
671 | #define IB_PMA_SELX_PORT_XMIT_PACKETS __constant_htons(0x0004) | ||
672 | #define IB_PMA_SELX_PORT_RCV_PACKETS __constant_htons(0x0008) | ||
673 | #define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS __constant_htons(0x0010) | ||
674 | #define IB_PMA_SELX_PORT_UNI_RCV_PACKETS __constant_htons(0x0020) | ||
675 | #define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS __constant_htons(0x0040) | ||
676 | #define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS __constant_htons(0x0080) | ||
677 | |||
678 | static int recv_pma_get_classportinfo(struct ib_perf *pmp) | ||
679 | { | ||
680 | struct ib_pma_classportinfo *p = | ||
681 | (struct ib_pma_classportinfo *)pmp->data; | ||
682 | |||
683 | memset(pmp->data, 0, sizeof(pmp->data)); | ||
684 | |||
685 | if (pmp->attr_mod != 0) | ||
686 | pmp->status |= IB_SMP_INVALID_FIELD; | ||
687 | |||
688 | /* Indicate AllPortSelect is valid (only one port anyway) */ | ||
689 | p->cap_mask = __constant_cpu_to_be16(1 << 8); | ||
690 | p->base_version = 1; | ||
691 | p->class_version = 1; | ||
692 | /* | ||
693 | * Expected response time is 4.096 usec. * 2^18 == 1.073741824 | ||
694 | * sec. | ||
695 | */ | ||
696 | p->resp_time_value = 18; | ||
697 | |||
698 | return reply((struct ib_smp *) pmp); | ||
699 | } | ||
700 | |||
701 | /* | ||
702 | * The PortSamplesControl.CounterMasks field is an array of 3 bit fields | ||
703 | * which specify the N'th counter's capabilities. See ch. 16.1.3.2. | ||
704 | * We support 5 counters which only count the mandatory quantities. | ||
705 | */ | ||
706 | #define COUNTER_MASK(q, n) (q << ((9 - n) * 3)) | ||
707 | #define COUNTER_MASK0_9 \ | ||
708 | __constant_cpu_to_be32(COUNTER_MASK(1, 0) | \ | ||
709 | COUNTER_MASK(1, 1) | \ | ||
710 | COUNTER_MASK(1, 2) | \ | ||
711 | COUNTER_MASK(1, 3) | \ | ||
712 | COUNTER_MASK(1, 4)) | ||
713 | |||
714 | static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp, | ||
715 | struct ib_device *ibdev, u8 port) | ||
716 | { | ||
717 | struct ib_pma_portsamplescontrol *p = | ||
718 | (struct ib_pma_portsamplescontrol *)pmp->data; | ||
719 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
720 | unsigned long flags; | ||
721 | u8 port_select = p->port_select; | ||
722 | |||
723 | memset(pmp->data, 0, sizeof(pmp->data)); | ||
724 | |||
725 | p->port_select = port_select; | ||
726 | if (pmp->attr_mod != 0 || | ||
727 | (port_select != port && port_select != 0xFF)) | ||
728 | pmp->status |= IB_SMP_INVALID_FIELD; | ||
729 | /* | ||
730 | * Ticks are 10x the link transfer period which for 2.5Gbs is 4 | ||
731 | * nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample | ||
732 | * intervals are counted in ticks. Since we use Linux timers, that | ||
733 | * count in jiffies, we can't sample for less than 1000 ticks if HZ | ||
734 | * == 1000 (4000 ticks if HZ is 250). | ||
735 | */ | ||
736 | /* XXX This is WRONG. */ | ||
737 | p->tick = 250; /* 1 usec. */ | ||
738 | p->counter_width = 4; /* 32 bit counters */ | ||
739 | p->counter_mask0_9 = COUNTER_MASK0_9; | ||
740 | spin_lock_irqsave(&dev->pending_lock, flags); | ||
741 | p->sample_status = dev->pma_sample_status; | ||
742 | p->sample_start = cpu_to_be32(dev->pma_sample_start); | ||
743 | p->sample_interval = cpu_to_be32(dev->pma_sample_interval); | ||
744 | p->tag = cpu_to_be16(dev->pma_tag); | ||
745 | p->counter_select[0] = dev->pma_counter_select[0]; | ||
746 | p->counter_select[1] = dev->pma_counter_select[1]; | ||
747 | p->counter_select[2] = dev->pma_counter_select[2]; | ||
748 | p->counter_select[3] = dev->pma_counter_select[3]; | ||
749 | p->counter_select[4] = dev->pma_counter_select[4]; | ||
750 | spin_unlock_irqrestore(&dev->pending_lock, flags); | ||
751 | |||
752 | return reply((struct ib_smp *) pmp); | ||
753 | } | ||
754 | |||
755 | static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp, | ||
756 | struct ib_device *ibdev, u8 port) | ||
757 | { | ||
758 | struct ib_pma_portsamplescontrol *p = | ||
759 | (struct ib_pma_portsamplescontrol *)pmp->data; | ||
760 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
761 | unsigned long flags; | ||
762 | u32 start; | ||
763 | int ret; | ||
764 | |||
765 | if (pmp->attr_mod != 0 || | ||
766 | (p->port_select != port && p->port_select != 0xFF)) { | ||
767 | pmp->status |= IB_SMP_INVALID_FIELD; | ||
768 | ret = reply((struct ib_smp *) pmp); | ||
769 | goto bail; | ||
770 | } | ||
771 | |||
772 | start = be32_to_cpu(p->sample_start); | ||
773 | if (start != 0) { | ||
774 | spin_lock_irqsave(&dev->pending_lock, flags); | ||
775 | if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_DONE) { | ||
776 | dev->pma_sample_status = | ||
777 | IB_PMA_SAMPLE_STATUS_STARTED; | ||
778 | dev->pma_sample_start = start; | ||
779 | dev->pma_sample_interval = | ||
780 | be32_to_cpu(p->sample_interval); | ||
781 | dev->pma_tag = be16_to_cpu(p->tag); | ||
782 | if (p->counter_select[0]) | ||
783 | dev->pma_counter_select[0] = | ||
784 | p->counter_select[0]; | ||
785 | if (p->counter_select[1]) | ||
786 | dev->pma_counter_select[1] = | ||
787 | p->counter_select[1]; | ||
788 | if (p->counter_select[2]) | ||
789 | dev->pma_counter_select[2] = | ||
790 | p->counter_select[2]; | ||
791 | if (p->counter_select[3]) | ||
792 | dev->pma_counter_select[3] = | ||
793 | p->counter_select[3]; | ||
794 | if (p->counter_select[4]) | ||
795 | dev->pma_counter_select[4] = | ||
796 | p->counter_select[4]; | ||
797 | } | ||
798 | spin_unlock_irqrestore(&dev->pending_lock, flags); | ||
799 | } | ||
800 | ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port); | ||
801 | |||
802 | bail: | ||
803 | return ret; | ||
804 | } | ||
805 | |||
806 | static u64 get_counter(struct ipath_ibdev *dev, __be16 sel) | ||
807 | { | ||
808 | u64 ret; | ||
809 | |||
810 | switch (sel) { | ||
811 | case IB_PMA_PORT_XMIT_DATA: | ||
812 | ret = dev->ipath_sword; | ||
813 | break; | ||
814 | case IB_PMA_PORT_RCV_DATA: | ||
815 | ret = dev->ipath_rword; | ||
816 | break; | ||
817 | case IB_PMA_PORT_XMIT_PKTS: | ||
818 | ret = dev->ipath_spkts; | ||
819 | break; | ||
820 | case IB_PMA_PORT_RCV_PKTS: | ||
821 | ret = dev->ipath_rpkts; | ||
822 | break; | ||
823 | case IB_PMA_PORT_XMIT_WAIT: | ||
824 | ret = dev->ipath_xmit_wait; | ||
825 | break; | ||
826 | default: | ||
827 | ret = 0; | ||
828 | } | ||
829 | |||
830 | return ret; | ||
831 | } | ||
832 | |||
833 | static int recv_pma_get_portsamplesresult(struct ib_perf *pmp, | ||
834 | struct ib_device *ibdev) | ||
835 | { | ||
836 | struct ib_pma_portsamplesresult *p = | ||
837 | (struct ib_pma_portsamplesresult *)pmp->data; | ||
838 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
839 | int i; | ||
840 | |||
841 | memset(pmp->data, 0, sizeof(pmp->data)); | ||
842 | p->tag = cpu_to_be16(dev->pma_tag); | ||
843 | p->sample_status = cpu_to_be16(dev->pma_sample_status); | ||
844 | for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) | ||
845 | p->counter[i] = cpu_to_be32( | ||
846 | get_counter(dev, dev->pma_counter_select[i])); | ||
847 | |||
848 | return reply((struct ib_smp *) pmp); | ||
849 | } | ||
850 | |||
851 | static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp, | ||
852 | struct ib_device *ibdev) | ||
853 | { | ||
854 | struct ib_pma_portsamplesresult_ext *p = | ||
855 | (struct ib_pma_portsamplesresult_ext *)pmp->data; | ||
856 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
857 | int i; | ||
858 | |||
859 | memset(pmp->data, 0, sizeof(pmp->data)); | ||
860 | p->tag = cpu_to_be16(dev->pma_tag); | ||
861 | p->sample_status = cpu_to_be16(dev->pma_sample_status); | ||
862 | /* 64 bits */ | ||
863 | p->extended_width = __constant_cpu_to_be32(0x80000000); | ||
864 | for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) | ||
865 | p->counter[i] = cpu_to_be64( | ||
866 | get_counter(dev, dev->pma_counter_select[i])); | ||
867 | |||
868 | return reply((struct ib_smp *) pmp); | ||
869 | } | ||
870 | |||
871 | static int recv_pma_get_portcounters(struct ib_perf *pmp, | ||
872 | struct ib_device *ibdev, u8 port) | ||
873 | { | ||
874 | struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) | ||
875 | pmp->data; | ||
876 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
877 | struct ipath_layer_counters cntrs; | ||
878 | u8 port_select = p->port_select; | ||
879 | |||
880 | ipath_layer_get_counters(dev->dd, &cntrs); | ||
881 | |||
882 | /* Adjust counters for any resets done. */ | ||
883 | cntrs.symbol_error_counter -= dev->n_symbol_error_counter; | ||
884 | cntrs.link_error_recovery_counter -= | ||
885 | dev->n_link_error_recovery_counter; | ||
886 | cntrs.link_downed_counter -= dev->n_link_downed_counter; | ||
887 | cntrs.port_rcv_errors += dev->rcv_errors; | ||
888 | cntrs.port_rcv_errors -= dev->n_port_rcv_errors; | ||
889 | cntrs.port_rcv_remphys_errors -= dev->n_port_rcv_remphys_errors; | ||
890 | cntrs.port_xmit_discards -= dev->n_port_xmit_discards; | ||
891 | cntrs.port_xmit_data -= dev->n_port_xmit_data; | ||
892 | cntrs.port_rcv_data -= dev->n_port_rcv_data; | ||
893 | cntrs.port_xmit_packets -= dev->n_port_xmit_packets; | ||
894 | cntrs.port_rcv_packets -= dev->n_port_rcv_packets; | ||
895 | |||
896 | memset(pmp->data, 0, sizeof(pmp->data)); | ||
897 | |||
898 | p->port_select = port_select; | ||
899 | if (pmp->attr_mod != 0 || | ||
900 | (port_select != port && port_select != 0xFF)) | ||
901 | pmp->status |= IB_SMP_INVALID_FIELD; | ||
902 | |||
903 | if (cntrs.symbol_error_counter > 0xFFFFUL) | ||
904 | p->symbol_error_counter = __constant_cpu_to_be16(0xFFFF); | ||
905 | else | ||
906 | p->symbol_error_counter = | ||
907 | cpu_to_be16((u16)cntrs.symbol_error_counter); | ||
908 | if (cntrs.link_error_recovery_counter > 0xFFUL) | ||
909 | p->link_error_recovery_counter = 0xFF; | ||
910 | else | ||
911 | p->link_error_recovery_counter = | ||
912 | (u8)cntrs.link_error_recovery_counter; | ||
913 | if (cntrs.link_downed_counter > 0xFFUL) | ||
914 | p->link_downed_counter = 0xFF; | ||
915 | else | ||
916 | p->link_downed_counter = (u8)cntrs.link_downed_counter; | ||
917 | if (cntrs.port_rcv_errors > 0xFFFFUL) | ||
918 | p->port_rcv_errors = __constant_cpu_to_be16(0xFFFF); | ||
919 | else | ||
920 | p->port_rcv_errors = | ||
921 | cpu_to_be16((u16) cntrs.port_rcv_errors); | ||
922 | if (cntrs.port_rcv_remphys_errors > 0xFFFFUL) | ||
923 | p->port_rcv_remphys_errors = __constant_cpu_to_be16(0xFFFF); | ||
924 | else | ||
925 | p->port_rcv_remphys_errors = | ||
926 | cpu_to_be16((u16)cntrs.port_rcv_remphys_errors); | ||
927 | if (cntrs.port_xmit_discards > 0xFFFFUL) | ||
928 | p->port_xmit_discards = __constant_cpu_to_be16(0xFFFF); | ||
929 | else | ||
930 | p->port_xmit_discards = | ||
931 | cpu_to_be16((u16)cntrs.port_xmit_discards); | ||
932 | if (cntrs.port_xmit_data > 0xFFFFFFFFUL) | ||
933 | p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF); | ||
934 | else | ||
935 | p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data); | ||
936 | if (cntrs.port_rcv_data > 0xFFFFFFFFUL) | ||
937 | p->port_rcv_data = __constant_cpu_to_be32(0xFFFFFFFF); | ||
938 | else | ||
939 | p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data); | ||
940 | if (cntrs.port_xmit_packets > 0xFFFFFFFFUL) | ||
941 | p->port_xmit_packets = __constant_cpu_to_be32(0xFFFFFFFF); | ||
942 | else | ||
943 | p->port_xmit_packets = | ||
944 | cpu_to_be32((u32)cntrs.port_xmit_packets); | ||
945 | if (cntrs.port_rcv_packets > 0xFFFFFFFFUL) | ||
946 | p->port_rcv_packets = __constant_cpu_to_be32(0xFFFFFFFF); | ||
947 | else | ||
948 | p->port_rcv_packets = | ||
949 | cpu_to_be32((u32) cntrs.port_rcv_packets); | ||
950 | |||
951 | return reply((struct ib_smp *) pmp); | ||
952 | } | ||
953 | |||
954 | static int recv_pma_get_portcounters_ext(struct ib_perf *pmp, | ||
955 | struct ib_device *ibdev, u8 port) | ||
956 | { | ||
957 | struct ib_pma_portcounters_ext *p = | ||
958 | (struct ib_pma_portcounters_ext *)pmp->data; | ||
959 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
960 | u64 swords, rwords, spkts, rpkts, xwait; | ||
961 | u8 port_select = p->port_select; | ||
962 | |||
963 | ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts, | ||
964 | &rpkts, &xwait); | ||
965 | |||
966 | /* Adjust counters for any resets done. */ | ||
967 | swords -= dev->n_port_xmit_data; | ||
968 | rwords -= dev->n_port_rcv_data; | ||
969 | spkts -= dev->n_port_xmit_packets; | ||
970 | rpkts -= dev->n_port_rcv_packets; | ||
971 | |||
972 | memset(pmp->data, 0, sizeof(pmp->data)); | ||
973 | |||
974 | p->port_select = port_select; | ||
975 | if (pmp->attr_mod != 0 || | ||
976 | (port_select != port && port_select != 0xFF)) | ||
977 | pmp->status |= IB_SMP_INVALID_FIELD; | ||
978 | |||
979 | p->port_xmit_data = cpu_to_be64(swords); | ||
980 | p->port_rcv_data = cpu_to_be64(rwords); | ||
981 | p->port_xmit_packets = cpu_to_be64(spkts); | ||
982 | p->port_rcv_packets = cpu_to_be64(rpkts); | ||
983 | p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit); | ||
984 | p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv); | ||
985 | p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit); | ||
986 | p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv); | ||
987 | |||
988 | return reply((struct ib_smp *) pmp); | ||
989 | } | ||
990 | |||
991 | static int recv_pma_set_portcounters(struct ib_perf *pmp, | ||
992 | struct ib_device *ibdev, u8 port) | ||
993 | { | ||
994 | struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) | ||
995 | pmp->data; | ||
996 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
997 | struct ipath_layer_counters cntrs; | ||
998 | |||
999 | /* | ||
1000 | * Since the HW doesn't support clearing counters, we save the | ||
1001 | * current count and subtract it from future responses. | ||
1002 | */ | ||
1003 | ipath_layer_get_counters(dev->dd, &cntrs); | ||
1004 | |||
1005 | if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR) | ||
1006 | dev->n_symbol_error_counter = cntrs.symbol_error_counter; | ||
1007 | |||
1008 | if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY) | ||
1009 | dev->n_link_error_recovery_counter = | ||
1010 | cntrs.link_error_recovery_counter; | ||
1011 | |||
1012 | if (p->counter_select & IB_PMA_SEL_LINK_DOWNED) | ||
1013 | dev->n_link_downed_counter = cntrs.link_downed_counter; | ||
1014 | |||
1015 | if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS) | ||
1016 | dev->n_port_rcv_errors = | ||
1017 | cntrs.port_rcv_errors + dev->rcv_errors; | ||
1018 | |||
1019 | if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS) | ||
1020 | dev->n_port_rcv_remphys_errors = | ||
1021 | cntrs.port_rcv_remphys_errors; | ||
1022 | |||
1023 | if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS) | ||
1024 | dev->n_port_xmit_discards = cntrs.port_xmit_discards; | ||
1025 | |||
1026 | if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) | ||
1027 | dev->n_port_xmit_data = cntrs.port_xmit_data; | ||
1028 | |||
1029 | if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA) | ||
1030 | dev->n_port_rcv_data = cntrs.port_rcv_data; | ||
1031 | |||
1032 | if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS) | ||
1033 | dev->n_port_xmit_packets = cntrs.port_xmit_packets; | ||
1034 | |||
1035 | if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS) | ||
1036 | dev->n_port_rcv_packets = cntrs.port_rcv_packets; | ||
1037 | |||
1038 | return recv_pma_get_portcounters(pmp, ibdev, port); | ||
1039 | } | ||
1040 | |||
1041 | static int recv_pma_set_portcounters_ext(struct ib_perf *pmp, | ||
1042 | struct ib_device *ibdev, u8 port) | ||
1043 | { | ||
1044 | struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) | ||
1045 | pmp->data; | ||
1046 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
1047 | u64 swords, rwords, spkts, rpkts, xwait; | ||
1048 | |||
1049 | ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts, | ||
1050 | &rpkts, &xwait); | ||
1051 | |||
1052 | if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA) | ||
1053 | dev->n_port_xmit_data = swords; | ||
1054 | |||
1055 | if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA) | ||
1056 | dev->n_port_rcv_data = rwords; | ||
1057 | |||
1058 | if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS) | ||
1059 | dev->n_port_xmit_packets = spkts; | ||
1060 | |||
1061 | if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) | ||
1062 | dev->n_port_rcv_packets = rpkts; | ||
1063 | |||
1064 | if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) | ||
1065 | dev->n_unicast_xmit = 0; | ||
1066 | |||
1067 | if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) | ||
1068 | dev->n_unicast_rcv = 0; | ||
1069 | |||
1070 | if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) | ||
1071 | dev->n_multicast_xmit = 0; | ||
1072 | |||
1073 | if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) | ||
1074 | dev->n_multicast_rcv = 0; | ||
1075 | |||
1076 | return recv_pma_get_portcounters_ext(pmp, ibdev, port); | ||
1077 | } | ||
1078 | |||
1079 | static int process_subn(struct ib_device *ibdev, int mad_flags, | ||
1080 | u8 port_num, struct ib_mad *in_mad, | ||
1081 | struct ib_mad *out_mad) | ||
1082 | { | ||
1083 | struct ib_smp *smp = (struct ib_smp *)out_mad; | ||
1084 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
1085 | int ret; | ||
1086 | |||
1087 | *out_mad = *in_mad; | ||
1088 | if (smp->class_version != 1) { | ||
1089 | smp->status |= IB_SMP_UNSUP_VERSION; | ||
1090 | ret = reply(smp); | ||
1091 | goto bail; | ||
1092 | } | ||
1093 | |||
1094 | /* Is the mkey in the process of expiring? */ | ||
1095 | if (dev->mkey_lease_timeout && jiffies >= dev->mkey_lease_timeout) { | ||
1096 | /* Clear timeout and mkey protection field. */ | ||
1097 | dev->mkey_lease_timeout = 0; | ||
1098 | dev->mkeyprot_resv_lmc &= 0x3F; | ||
1099 | } | ||
1100 | |||
1101 | /* | ||
1102 | * M_Key checking depends on | ||
1103 | * Portinfo:M_Key_protect_bits | ||
1104 | */ | ||
1105 | if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 && | ||
1106 | dev->mkey != smp->mkey && | ||
1107 | (smp->method == IB_MGMT_METHOD_SET || | ||
1108 | (smp->method == IB_MGMT_METHOD_GET && | ||
1109 | (dev->mkeyprot_resv_lmc >> 7) != 0))) { | ||
1110 | if (dev->mkey_violations != 0xFFFF) | ||
1111 | ++dev->mkey_violations; | ||
1112 | if (dev->mkey_lease_timeout || | ||
1113 | dev->mkey_lease_period == 0) { | ||
1114 | ret = IB_MAD_RESULT_SUCCESS | | ||
1115 | IB_MAD_RESULT_CONSUMED; | ||
1116 | goto bail; | ||
1117 | } | ||
1118 | dev->mkey_lease_timeout = jiffies + | ||
1119 | dev->mkey_lease_period * HZ; | ||
1120 | /* Future: Generate a trap notice. */ | ||
1121 | ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; | ||
1122 | goto bail; | ||
1123 | } else if (dev->mkey_lease_timeout) | ||
1124 | dev->mkey_lease_timeout = 0; | ||
1125 | |||
1126 | switch (smp->method) { | ||
1127 | case IB_MGMT_METHOD_GET: | ||
1128 | switch (smp->attr_id) { | ||
1129 | case IB_SMP_ATTR_NODE_DESC: | ||
1130 | ret = recv_subn_get_nodedescription(smp, ibdev); | ||
1131 | goto bail; | ||
1132 | case IB_SMP_ATTR_NODE_INFO: | ||
1133 | ret = recv_subn_get_nodeinfo(smp, ibdev, port_num); | ||
1134 | goto bail; | ||
1135 | case IB_SMP_ATTR_GUID_INFO: | ||
1136 | ret = recv_subn_get_guidinfo(smp, ibdev); | ||
1137 | goto bail; | ||
1138 | case IB_SMP_ATTR_PORT_INFO: | ||
1139 | ret = recv_subn_get_portinfo(smp, ibdev, port_num); | ||
1140 | goto bail; | ||
1141 | case IB_SMP_ATTR_PKEY_TABLE: | ||
1142 | ret = recv_subn_get_pkeytable(smp, ibdev); | ||
1143 | goto bail; | ||
1144 | case IB_SMP_ATTR_SM_INFO: | ||
1145 | if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { | ||
1146 | ret = IB_MAD_RESULT_SUCCESS | | ||
1147 | IB_MAD_RESULT_CONSUMED; | ||
1148 | goto bail; | ||
1149 | } | ||
1150 | if (dev->port_cap_flags & IB_PORT_SM) { | ||
1151 | ret = IB_MAD_RESULT_SUCCESS; | ||
1152 | goto bail; | ||
1153 | } | ||
1154 | /* FALLTHROUGH */ | ||
1155 | default: | ||
1156 | smp->status |= IB_SMP_UNSUP_METH_ATTR; | ||
1157 | ret = reply(smp); | ||
1158 | goto bail; | ||
1159 | } | ||
1160 | |||
1161 | case IB_MGMT_METHOD_SET: | ||
1162 | switch (smp->attr_id) { | ||
1163 | case IB_SMP_ATTR_GUID_INFO: | ||
1164 | ret = recv_subn_set_guidinfo(smp, ibdev); | ||
1165 | goto bail; | ||
1166 | case IB_SMP_ATTR_PORT_INFO: | ||
1167 | ret = recv_subn_set_portinfo(smp, ibdev, port_num); | ||
1168 | goto bail; | ||
1169 | case IB_SMP_ATTR_PKEY_TABLE: | ||
1170 | ret = recv_subn_set_pkeytable(smp, ibdev); | ||
1171 | goto bail; | ||
1172 | case IB_SMP_ATTR_SM_INFO: | ||
1173 | if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { | ||
1174 | ret = IB_MAD_RESULT_SUCCESS | | ||
1175 | IB_MAD_RESULT_CONSUMED; | ||
1176 | goto bail; | ||
1177 | } | ||
1178 | if (dev->port_cap_flags & IB_PORT_SM) { | ||
1179 | ret = IB_MAD_RESULT_SUCCESS; | ||
1180 | goto bail; | ||
1181 | } | ||
1182 | /* FALLTHROUGH */ | ||
1183 | default: | ||
1184 | smp->status |= IB_SMP_UNSUP_METH_ATTR; | ||
1185 | ret = reply(smp); | ||
1186 | goto bail; | ||
1187 | } | ||
1188 | |||
1189 | case IB_MGMT_METHOD_GET_RESP: | ||
1190 | /* | ||
1191 | * The ib_mad module will call us to process responses | ||
1192 | * before checking for other consumers. | ||
1193 | * Just tell the caller to process it normally. | ||
1194 | */ | ||
1195 | ret = IB_MAD_RESULT_FAILURE; | ||
1196 | goto bail; | ||
1197 | default: | ||
1198 | smp->status |= IB_SMP_UNSUP_METHOD; | ||
1199 | ret = reply(smp); | ||
1200 | } | ||
1201 | |||
1202 | bail: | ||
1203 | return ret; | ||
1204 | } | ||
1205 | |||
1206 | static int process_perf(struct ib_device *ibdev, u8 port_num, | ||
1207 | struct ib_mad *in_mad, | ||
1208 | struct ib_mad *out_mad) | ||
1209 | { | ||
1210 | struct ib_perf *pmp = (struct ib_perf *)out_mad; | ||
1211 | int ret; | ||
1212 | |||
1213 | *out_mad = *in_mad; | ||
1214 | if (pmp->class_version != 1) { | ||
1215 | pmp->status |= IB_SMP_UNSUP_VERSION; | ||
1216 | ret = reply((struct ib_smp *) pmp); | ||
1217 | goto bail; | ||
1218 | } | ||
1219 | |||
1220 | switch (pmp->method) { | ||
1221 | case IB_MGMT_METHOD_GET: | ||
1222 | switch (pmp->attr_id) { | ||
1223 | case IB_PMA_CLASS_PORT_INFO: | ||
1224 | ret = recv_pma_get_classportinfo(pmp); | ||
1225 | goto bail; | ||
1226 | case IB_PMA_PORT_SAMPLES_CONTROL: | ||
1227 | ret = recv_pma_get_portsamplescontrol(pmp, ibdev, | ||
1228 | port_num); | ||
1229 | goto bail; | ||
1230 | case IB_PMA_PORT_SAMPLES_RESULT: | ||
1231 | ret = recv_pma_get_portsamplesresult(pmp, ibdev); | ||
1232 | goto bail; | ||
1233 | case IB_PMA_PORT_SAMPLES_RESULT_EXT: | ||
1234 | ret = recv_pma_get_portsamplesresult_ext(pmp, | ||
1235 | ibdev); | ||
1236 | goto bail; | ||
1237 | case IB_PMA_PORT_COUNTERS: | ||
1238 | ret = recv_pma_get_portcounters(pmp, ibdev, | ||
1239 | port_num); | ||
1240 | goto bail; | ||
1241 | case IB_PMA_PORT_COUNTERS_EXT: | ||
1242 | ret = recv_pma_get_portcounters_ext(pmp, ibdev, | ||
1243 | port_num); | ||
1244 | goto bail; | ||
1245 | default: | ||
1246 | pmp->status |= IB_SMP_UNSUP_METH_ATTR; | ||
1247 | ret = reply((struct ib_smp *) pmp); | ||
1248 | goto bail; | ||
1249 | } | ||
1250 | |||
1251 | case IB_MGMT_METHOD_SET: | ||
1252 | switch (pmp->attr_id) { | ||
1253 | case IB_PMA_PORT_SAMPLES_CONTROL: | ||
1254 | ret = recv_pma_set_portsamplescontrol(pmp, ibdev, | ||
1255 | port_num); | ||
1256 | goto bail; | ||
1257 | case IB_PMA_PORT_COUNTERS: | ||
1258 | ret = recv_pma_set_portcounters(pmp, ibdev, | ||
1259 | port_num); | ||
1260 | goto bail; | ||
1261 | case IB_PMA_PORT_COUNTERS_EXT: | ||
1262 | ret = recv_pma_set_portcounters_ext(pmp, ibdev, | ||
1263 | port_num); | ||
1264 | goto bail; | ||
1265 | default: | ||
1266 | pmp->status |= IB_SMP_UNSUP_METH_ATTR; | ||
1267 | ret = reply((struct ib_smp *) pmp); | ||
1268 | goto bail; | ||
1269 | } | ||
1270 | |||
1271 | case IB_MGMT_METHOD_GET_RESP: | ||
1272 | /* | ||
1273 | * The ib_mad module will call us to process responses | ||
1274 | * before checking for other consumers. | ||
1275 | * Just tell the caller to process it normally. | ||
1276 | */ | ||
1277 | ret = IB_MAD_RESULT_FAILURE; | ||
1278 | goto bail; | ||
1279 | default: | ||
1280 | pmp->status |= IB_SMP_UNSUP_METHOD; | ||
1281 | ret = reply((struct ib_smp *) pmp); | ||
1282 | } | ||
1283 | |||
1284 | bail: | ||
1285 | return ret; | ||
1286 | } | ||
1287 | |||
1288 | /** | ||
1289 | * ipath_process_mad - process an incoming MAD packet | ||
1290 | * @ibdev: the infiniband device this packet came in on | ||
1291 | * @mad_flags: MAD flags | ||
1292 | * @port_num: the port number this packet came in on | ||
1293 | * @in_wc: the work completion entry for this packet | ||
1294 | * @in_grh: the global route header for this packet | ||
1295 | * @in_mad: the incoming MAD | ||
1296 | * @out_mad: any outgoing MAD reply | ||
1297 | * | ||
1298 | * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not | ||
1299 | * interested in processing. | ||
1300 | * | ||
1301 | * Note that the verbs framework has already done the MAD sanity checks, | ||
1302 | * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE | ||
1303 | * MADs. | ||
1304 | * | ||
1305 | * This is called by the ib_mad module. | ||
1306 | */ | ||
1307 | int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, | ||
1308 | struct ib_wc *in_wc, struct ib_grh *in_grh, | ||
1309 | struct ib_mad *in_mad, struct ib_mad *out_mad) | ||
1310 | { | ||
1311 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
1312 | int ret; | ||
1313 | |||
1314 | /* | ||
1315 | * Snapshot current HW counters to "clear" them. | ||
1316 | * This should be done when the driver is loaded except that for | ||
1317 | * some reason we get a zillion errors when brining up the link. | ||
1318 | */ | ||
1319 | if (dev->rcv_errors == 0) { | ||
1320 | struct ipath_layer_counters cntrs; | ||
1321 | |||
1322 | ipath_layer_get_counters(to_idev(ibdev)->dd, &cntrs); | ||
1323 | dev->rcv_errors++; | ||
1324 | dev->n_symbol_error_counter = cntrs.symbol_error_counter; | ||
1325 | dev->n_link_error_recovery_counter = | ||
1326 | cntrs.link_error_recovery_counter; | ||
1327 | dev->n_link_downed_counter = cntrs.link_downed_counter; | ||
1328 | dev->n_port_rcv_errors = cntrs.port_rcv_errors + 1; | ||
1329 | dev->n_port_rcv_remphys_errors = | ||
1330 | cntrs.port_rcv_remphys_errors; | ||
1331 | dev->n_port_xmit_discards = cntrs.port_xmit_discards; | ||
1332 | dev->n_port_xmit_data = cntrs.port_xmit_data; | ||
1333 | dev->n_port_rcv_data = cntrs.port_rcv_data; | ||
1334 | dev->n_port_xmit_packets = cntrs.port_xmit_packets; | ||
1335 | dev->n_port_rcv_packets = cntrs.port_rcv_packets; | ||
1336 | } | ||
1337 | switch (in_mad->mad_hdr.mgmt_class) { | ||
1338 | case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: | ||
1339 | case IB_MGMT_CLASS_SUBN_LID_ROUTED: | ||
1340 | ret = process_subn(ibdev, mad_flags, port_num, | ||
1341 | in_mad, out_mad); | ||
1342 | goto bail; | ||
1343 | case IB_MGMT_CLASS_PERF_MGMT: | ||
1344 | ret = process_perf(ibdev, port_num, in_mad, out_mad); | ||
1345 | goto bail; | ||
1346 | default: | ||
1347 | ret = IB_MAD_RESULT_SUCCESS; | ||
1348 | } | ||
1349 | |||
1350 | bail: | ||
1351 | return ret; | ||
1352 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c new file mode 100644 index 000000000000..69ffec66d45d --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_mr.c | |||
@@ -0,0 +1,383 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <rdma/ib_pack.h> | ||
34 | #include <rdma/ib_smi.h> | ||
35 | |||
36 | #include "ipath_verbs.h" | ||
37 | |||
38 | /** | ||
39 | * ipath_get_dma_mr - get a DMA memory region | ||
40 | * @pd: protection domain for this memory region | ||
41 | * @acc: access flags | ||
42 | * | ||
43 | * Returns the memory region on success, otherwise returns an errno. | ||
44 | */ | ||
45 | struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc) | ||
46 | { | ||
47 | struct ipath_mr *mr; | ||
48 | struct ib_mr *ret; | ||
49 | |||
50 | mr = kzalloc(sizeof *mr, GFP_KERNEL); | ||
51 | if (!mr) { | ||
52 | ret = ERR_PTR(-ENOMEM); | ||
53 | goto bail; | ||
54 | } | ||
55 | |||
56 | mr->mr.access_flags = acc; | ||
57 | ret = &mr->ibmr; | ||
58 | |||
59 | bail: | ||
60 | return ret; | ||
61 | } | ||
62 | |||
63 | static struct ipath_mr *alloc_mr(int count, | ||
64 | struct ipath_lkey_table *lk_table) | ||
65 | { | ||
66 | struct ipath_mr *mr; | ||
67 | int m, i = 0; | ||
68 | |||
69 | /* Allocate struct plus pointers to first level page tables. */ | ||
70 | m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ; | ||
71 | mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); | ||
72 | if (!mr) | ||
73 | goto done; | ||
74 | |||
75 | /* Allocate first level page tables. */ | ||
76 | for (; i < m; i++) { | ||
77 | mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); | ||
78 | if (!mr->mr.map[i]) | ||
79 | goto bail; | ||
80 | } | ||
81 | mr->mr.mapsz = m; | ||
82 | |||
83 | /* | ||
84 | * ib_reg_phys_mr() will initialize mr->ibmr except for | ||
85 | * lkey and rkey. | ||
86 | */ | ||
87 | if (!ipath_alloc_lkey(lk_table, &mr->mr)) | ||
88 | goto bail; | ||
89 | mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey; | ||
90 | |||
91 | goto done; | ||
92 | |||
93 | bail: | ||
94 | while (i) { | ||
95 | i--; | ||
96 | kfree(mr->mr.map[i]); | ||
97 | } | ||
98 | kfree(mr); | ||
99 | mr = NULL; | ||
100 | |||
101 | done: | ||
102 | return mr; | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * ipath_reg_phys_mr - register a physical memory region | ||
107 | * @pd: protection domain for this memory region | ||
108 | * @buffer_list: pointer to the list of physical buffers to register | ||
109 | * @num_phys_buf: the number of physical buffers to register | ||
110 | * @iova_start: the starting address passed over IB which maps to this MR | ||
111 | * | ||
112 | * Returns the memory region on success, otherwise returns an errno. | ||
113 | */ | ||
114 | struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, | ||
115 | struct ib_phys_buf *buffer_list, | ||
116 | int num_phys_buf, int acc, u64 *iova_start) | ||
117 | { | ||
118 | struct ipath_mr *mr; | ||
119 | int n, m, i; | ||
120 | struct ib_mr *ret; | ||
121 | |||
122 | mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); | ||
123 | if (mr == NULL) { | ||
124 | ret = ERR_PTR(-ENOMEM); | ||
125 | goto bail; | ||
126 | } | ||
127 | |||
128 | mr->mr.user_base = *iova_start; | ||
129 | mr->mr.iova = *iova_start; | ||
130 | mr->mr.length = 0; | ||
131 | mr->mr.offset = 0; | ||
132 | mr->mr.access_flags = acc; | ||
133 | mr->mr.max_segs = num_phys_buf; | ||
134 | |||
135 | m = 0; | ||
136 | n = 0; | ||
137 | for (i = 0; i < num_phys_buf; i++) { | ||
138 | mr->mr.map[m]->segs[n].vaddr = | ||
139 | phys_to_virt(buffer_list[i].addr); | ||
140 | mr->mr.map[m]->segs[n].length = buffer_list[i].size; | ||
141 | mr->mr.length += buffer_list[i].size; | ||
142 | n++; | ||
143 | if (n == IPATH_SEGSZ) { | ||
144 | m++; | ||
145 | n = 0; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | ret = &mr->ibmr; | ||
150 | |||
151 | bail: | ||
152 | return ret; | ||
153 | } | ||
154 | |||
155 | /** | ||
156 | * ipath_reg_user_mr - register a userspace memory region | ||
157 | * @pd: protection domain for this memory region | ||
158 | * @region: the user memory region | ||
159 | * @mr_access_flags: access flags for this memory region | ||
160 | * @udata: unused by the InfiniPath driver | ||
161 | * | ||
162 | * Returns the memory region on success, otherwise returns an errno. | ||
163 | */ | ||
164 | struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | ||
165 | int mr_access_flags, struct ib_udata *udata) | ||
166 | { | ||
167 | struct ipath_mr *mr; | ||
168 | struct ib_umem_chunk *chunk; | ||
169 | int n, m, i; | ||
170 | struct ib_mr *ret; | ||
171 | |||
172 | n = 0; | ||
173 | list_for_each_entry(chunk, ®ion->chunk_list, list) | ||
174 | n += chunk->nents; | ||
175 | |||
176 | mr = alloc_mr(n, &to_idev(pd->device)->lk_table); | ||
177 | if (!mr) { | ||
178 | ret = ERR_PTR(-ENOMEM); | ||
179 | goto bail; | ||
180 | } | ||
181 | |||
182 | mr->mr.user_base = region->user_base; | ||
183 | mr->mr.iova = region->virt_base; | ||
184 | mr->mr.length = region->length; | ||
185 | mr->mr.offset = region->offset; | ||
186 | mr->mr.access_flags = mr_access_flags; | ||
187 | mr->mr.max_segs = n; | ||
188 | |||
189 | m = 0; | ||
190 | n = 0; | ||
191 | list_for_each_entry(chunk, ®ion->chunk_list, list) { | ||
192 | for (i = 0; i < chunk->nmap; i++) { | ||
193 | mr->mr.map[m]->segs[n].vaddr = | ||
194 | page_address(chunk->page_list[i].page); | ||
195 | mr->mr.map[m]->segs[n].length = region->page_size; | ||
196 | n++; | ||
197 | if (n == IPATH_SEGSZ) { | ||
198 | m++; | ||
199 | n = 0; | ||
200 | } | ||
201 | } | ||
202 | } | ||
203 | ret = &mr->ibmr; | ||
204 | |||
205 | bail: | ||
206 | return ret; | ||
207 | } | ||
208 | |||
209 | /** | ||
210 | * ipath_dereg_mr - unregister and free a memory region | ||
211 | * @ibmr: the memory region to free | ||
212 | * | ||
213 | * Returns 0 on success. | ||
214 | * | ||
215 | * Note that this is called to free MRs created by ipath_get_dma_mr() | ||
216 | * or ipath_reg_user_mr(). | ||
217 | */ | ||
218 | int ipath_dereg_mr(struct ib_mr *ibmr) | ||
219 | { | ||
220 | struct ipath_mr *mr = to_imr(ibmr); | ||
221 | int i; | ||
222 | |||
223 | ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey); | ||
224 | i = mr->mr.mapsz; | ||
225 | while (i) { | ||
226 | i--; | ||
227 | kfree(mr->mr.map[i]); | ||
228 | } | ||
229 | kfree(mr); | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | /** | ||
234 | * ipath_alloc_fmr - allocate a fast memory region | ||
235 | * @pd: the protection domain for this memory region | ||
236 | * @mr_access_flags: access flags for this memory region | ||
237 | * @fmr_attr: fast memory region attributes | ||
238 | * | ||
239 | * Returns the memory region on success, otherwise returns an errno. | ||
240 | */ | ||
241 | struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, | ||
242 | struct ib_fmr_attr *fmr_attr) | ||
243 | { | ||
244 | struct ipath_fmr *fmr; | ||
245 | int m, i = 0; | ||
246 | struct ib_fmr *ret; | ||
247 | |||
248 | /* Allocate struct plus pointers to first level page tables. */ | ||
249 | m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ; | ||
250 | fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); | ||
251 | if (!fmr) | ||
252 | goto bail; | ||
253 | |||
254 | /* Allocate first level page tables. */ | ||
255 | for (; i < m; i++) { | ||
256 | fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], | ||
257 | GFP_KERNEL); | ||
258 | if (!fmr->mr.map[i]) | ||
259 | goto bail; | ||
260 | } | ||
261 | fmr->mr.mapsz = m; | ||
262 | |||
263 | /* | ||
264 | * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & | ||
265 | * rkey. | ||
266 | */ | ||
267 | if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) | ||
268 | goto bail; | ||
269 | fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey; | ||
270 | /* | ||
271 | * Resources are allocated but no valid mapping (RKEY can't be | ||
272 | * used). | ||
273 | */ | ||
274 | fmr->mr.user_base = 0; | ||
275 | fmr->mr.iova = 0; | ||
276 | fmr->mr.length = 0; | ||
277 | fmr->mr.offset = 0; | ||
278 | fmr->mr.access_flags = mr_access_flags; | ||
279 | fmr->mr.max_segs = fmr_attr->max_pages; | ||
280 | fmr->page_shift = fmr_attr->page_shift; | ||
281 | |||
282 | ret = &fmr->ibfmr; | ||
283 | goto done; | ||
284 | |||
285 | bail: | ||
286 | while (i) | ||
287 | kfree(fmr->mr.map[--i]); | ||
288 | kfree(fmr); | ||
289 | ret = ERR_PTR(-ENOMEM); | ||
290 | |||
291 | done: | ||
292 | return ret; | ||
293 | } | ||
294 | |||
295 | /** | ||
296 | * ipath_map_phys_fmr - set up a fast memory region | ||
297 | * @ibmfr: the fast memory region to set up | ||
298 | * @page_list: the list of pages to associate with the fast memory region | ||
299 | * @list_len: the number of pages to associate with the fast memory region | ||
300 | * @iova: the virtual address of the start of the fast memory region | ||
301 | * | ||
302 | * This may be called from interrupt context. | ||
303 | */ | ||
304 | |||
305 | int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list, | ||
306 | int list_len, u64 iova) | ||
307 | { | ||
308 | struct ipath_fmr *fmr = to_ifmr(ibfmr); | ||
309 | struct ipath_lkey_table *rkt; | ||
310 | unsigned long flags; | ||
311 | int m, n, i; | ||
312 | u32 ps; | ||
313 | int ret; | ||
314 | |||
315 | if (list_len > fmr->mr.max_segs) { | ||
316 | ret = -EINVAL; | ||
317 | goto bail; | ||
318 | } | ||
319 | rkt = &to_idev(ibfmr->device)->lk_table; | ||
320 | spin_lock_irqsave(&rkt->lock, flags); | ||
321 | fmr->mr.user_base = iova; | ||
322 | fmr->mr.iova = iova; | ||
323 | ps = 1 << fmr->page_shift; | ||
324 | fmr->mr.length = list_len * ps; | ||
325 | m = 0; | ||
326 | n = 0; | ||
327 | ps = 1 << fmr->page_shift; | ||
328 | for (i = 0; i < list_len; i++) { | ||
329 | fmr->mr.map[m]->segs[n].vaddr = phys_to_virt(page_list[i]); | ||
330 | fmr->mr.map[m]->segs[n].length = ps; | ||
331 | if (++n == IPATH_SEGSZ) { | ||
332 | m++; | ||
333 | n = 0; | ||
334 | } | ||
335 | } | ||
336 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
337 | ret = 0; | ||
338 | |||
339 | bail: | ||
340 | return ret; | ||
341 | } | ||
342 | |||
343 | /** | ||
344 | * ipath_unmap_fmr - unmap fast memory regions | ||
345 | * @fmr_list: the list of fast memory regions to unmap | ||
346 | * | ||
347 | * Returns 0 on success. | ||
348 | */ | ||
349 | int ipath_unmap_fmr(struct list_head *fmr_list) | ||
350 | { | ||
351 | struct ipath_fmr *fmr; | ||
352 | struct ipath_lkey_table *rkt; | ||
353 | unsigned long flags; | ||
354 | |||
355 | list_for_each_entry(fmr, fmr_list, ibfmr.list) { | ||
356 | rkt = &to_idev(fmr->ibfmr.device)->lk_table; | ||
357 | spin_lock_irqsave(&rkt->lock, flags); | ||
358 | fmr->mr.user_base = 0; | ||
359 | fmr->mr.iova = 0; | ||
360 | fmr->mr.length = 0; | ||
361 | spin_unlock_irqrestore(&rkt->lock, flags); | ||
362 | } | ||
363 | return 0; | ||
364 | } | ||
365 | |||
366 | /** | ||
367 | * ipath_dealloc_fmr - deallocate a fast memory region | ||
368 | * @ibfmr: the fast memory region to deallocate | ||
369 | * | ||
370 | * Returns 0 on success. | ||
371 | */ | ||
372 | int ipath_dealloc_fmr(struct ib_fmr *ibfmr) | ||
373 | { | ||
374 | struct ipath_fmr *fmr = to_ifmr(ibfmr); | ||
375 | int i; | ||
376 | |||
377 | ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey); | ||
378 | i = fmr->mr.mapsz; | ||
379 | while (i) | ||
380 | kfree(fmr->mr.map[--i]); | ||
381 | kfree(fmr); | ||
382 | return 0; | ||
383 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_pe800.c new file mode 100644 index 000000000000..e1dc4f757062 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_pe800.c | |||
@@ -0,0 +1,1247 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | /* | ||
33 | * This file contains all of the code that is specific to the | ||
34 | * InfiniPath PE-800 chip. | ||
35 | */ | ||
36 | |||
37 | #include <linux/interrupt.h> | ||
38 | #include <linux/pci.h> | ||
39 | #include <linux/delay.h> | ||
40 | |||
41 | |||
42 | #include "ipath_kernel.h" | ||
43 | #include "ipath_registers.h" | ||
44 | |||
45 | /* | ||
46 | * This file contains all the chip-specific register information and | ||
47 | * access functions for the PathScale PE800, the PCI-Express chip. | ||
48 | * | ||
49 | * This lists the InfiniPath PE800 registers, in the actual chip layout. | ||
50 | * This structure should never be directly accessed. | ||
51 | */ | ||
52 | struct _infinipath_do_not_use_kernel_regs { | ||
53 | unsigned long long Revision; | ||
54 | unsigned long long Control; | ||
55 | unsigned long long PageAlign; | ||
56 | unsigned long long PortCnt; | ||
57 | unsigned long long DebugPortSelect; | ||
58 | unsigned long long Reserved0; | ||
59 | unsigned long long SendRegBase; | ||
60 | unsigned long long UserRegBase; | ||
61 | unsigned long long CounterRegBase; | ||
62 | unsigned long long Scratch; | ||
63 | unsigned long long Reserved1; | ||
64 | unsigned long long Reserved2; | ||
65 | unsigned long long IntBlocked; | ||
66 | unsigned long long IntMask; | ||
67 | unsigned long long IntStatus; | ||
68 | unsigned long long IntClear; | ||
69 | unsigned long long ErrorMask; | ||
70 | unsigned long long ErrorStatus; | ||
71 | unsigned long long ErrorClear; | ||
72 | unsigned long long HwErrMask; | ||
73 | unsigned long long HwErrStatus; | ||
74 | unsigned long long HwErrClear; | ||
75 | unsigned long long HwDiagCtrl; | ||
76 | unsigned long long MDIO; | ||
77 | unsigned long long IBCStatus; | ||
78 | unsigned long long IBCCtrl; | ||
79 | unsigned long long ExtStatus; | ||
80 | unsigned long long ExtCtrl; | ||
81 | unsigned long long GPIOOut; | ||
82 | unsigned long long GPIOMask; | ||
83 | unsigned long long GPIOStatus; | ||
84 | unsigned long long GPIOClear; | ||
85 | unsigned long long RcvCtrl; | ||
86 | unsigned long long RcvBTHQP; | ||
87 | unsigned long long RcvHdrSize; | ||
88 | unsigned long long RcvHdrCnt; | ||
89 | unsigned long long RcvHdrEntSize; | ||
90 | unsigned long long RcvTIDBase; | ||
91 | unsigned long long RcvTIDCnt; | ||
92 | unsigned long long RcvEgrBase; | ||
93 | unsigned long long RcvEgrCnt; | ||
94 | unsigned long long RcvBufBase; | ||
95 | unsigned long long RcvBufSize; | ||
96 | unsigned long long RxIntMemBase; | ||
97 | unsigned long long RxIntMemSize; | ||
98 | unsigned long long RcvPartitionKey; | ||
99 | unsigned long long Reserved3; | ||
100 | unsigned long long RcvPktLEDCnt; | ||
101 | unsigned long long Reserved4[8]; | ||
102 | unsigned long long SendCtrl; | ||
103 | unsigned long long SendPIOBufBase; | ||
104 | unsigned long long SendPIOSize; | ||
105 | unsigned long long SendPIOBufCnt; | ||
106 | unsigned long long SendPIOAvailAddr; | ||
107 | unsigned long long TxIntMemBase; | ||
108 | unsigned long long TxIntMemSize; | ||
109 | unsigned long long Reserved5; | ||
110 | unsigned long long PCIeRBufTestReg0; | ||
111 | unsigned long long PCIeRBufTestReg1; | ||
112 | unsigned long long Reserved51[6]; | ||
113 | unsigned long long SendBufferError; | ||
114 | unsigned long long SendBufferErrorCONT1; | ||
115 | unsigned long long Reserved6SBE[6]; | ||
116 | unsigned long long RcvHdrAddr0; | ||
117 | unsigned long long RcvHdrAddr1; | ||
118 | unsigned long long RcvHdrAddr2; | ||
119 | unsigned long long RcvHdrAddr3; | ||
120 | unsigned long long RcvHdrAddr4; | ||
121 | unsigned long long Reserved7RHA[11]; | ||
122 | unsigned long long RcvHdrTailAddr0; | ||
123 | unsigned long long RcvHdrTailAddr1; | ||
124 | unsigned long long RcvHdrTailAddr2; | ||
125 | unsigned long long RcvHdrTailAddr3; | ||
126 | unsigned long long RcvHdrTailAddr4; | ||
127 | unsigned long long Reserved8RHTA[11]; | ||
128 | unsigned long long Reserved9SW[8]; | ||
129 | unsigned long long SerdesConfig0; | ||
130 | unsigned long long SerdesConfig1; | ||
131 | unsigned long long SerdesStatus; | ||
132 | unsigned long long XGXSConfig; | ||
133 | unsigned long long IBPLLCfg; | ||
134 | unsigned long long Reserved10SW2[3]; | ||
135 | unsigned long long PCIEQ0SerdesConfig0; | ||
136 | unsigned long long PCIEQ0SerdesConfig1; | ||
137 | unsigned long long PCIEQ0SerdesStatus; | ||
138 | unsigned long long Reserved11; | ||
139 | unsigned long long PCIEQ1SerdesConfig0; | ||
140 | unsigned long long PCIEQ1SerdesConfig1; | ||
141 | unsigned long long PCIEQ1SerdesStatus; | ||
142 | unsigned long long Reserved12; | ||
143 | }; | ||
144 | |||
145 | #define IPATH_KREG_OFFSET(field) (offsetof(struct \ | ||
146 | _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) | ||
147 | #define IPATH_CREG_OFFSET(field) (offsetof( \ | ||
148 | struct infinipath_counters, field) / sizeof(u64)) | ||
149 | |||
150 | static const struct ipath_kregs ipath_pe_kregs = { | ||
151 | .kr_control = IPATH_KREG_OFFSET(Control), | ||
152 | .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase), | ||
153 | .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect), | ||
154 | .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear), | ||
155 | .kr_errormask = IPATH_KREG_OFFSET(ErrorMask), | ||
156 | .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus), | ||
157 | .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl), | ||
158 | .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus), | ||
159 | .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear), | ||
160 | .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask), | ||
161 | .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut), | ||
162 | .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus), | ||
163 | .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl), | ||
164 | .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear), | ||
165 | .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask), | ||
166 | .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus), | ||
167 | .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl), | ||
168 | .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus), | ||
169 | .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked), | ||
170 | .kr_intclear = IPATH_KREG_OFFSET(IntClear), | ||
171 | .kr_intmask = IPATH_KREG_OFFSET(IntMask), | ||
172 | .kr_intstatus = IPATH_KREG_OFFSET(IntStatus), | ||
173 | .kr_mdio = IPATH_KREG_OFFSET(MDIO), | ||
174 | .kr_pagealign = IPATH_KREG_OFFSET(PageAlign), | ||
175 | .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey), | ||
176 | .kr_portcnt = IPATH_KREG_OFFSET(PortCnt), | ||
177 | .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP), | ||
178 | .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase), | ||
179 | .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize), | ||
180 | .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl), | ||
181 | .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase), | ||
182 | .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt), | ||
183 | .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt), | ||
184 | .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize), | ||
185 | .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize), | ||
186 | .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase), | ||
187 | .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize), | ||
188 | .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase), | ||
189 | .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt), | ||
190 | .kr_revision = IPATH_KREG_OFFSET(Revision), | ||
191 | .kr_scratch = IPATH_KREG_OFFSET(Scratch), | ||
192 | .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError), | ||
193 | .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl), | ||
194 | .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr), | ||
195 | .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase), | ||
196 | .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt), | ||
197 | .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize), | ||
198 | .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase), | ||
199 | .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase), | ||
200 | .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize), | ||
201 | .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase), | ||
202 | .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0), | ||
203 | .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1), | ||
204 | .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), | ||
205 | .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), | ||
206 | .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg), | ||
207 | |||
208 | /* | ||
209 | * These should not be used directly via ipath_read_kreg64(), | ||
210 | * use them with ipath_read_kreg64_port() | ||
211 | */ | ||
212 | .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), | ||
213 | .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), | ||
214 | |||
215 | /* This group is pe-800-specific; and used only in this file */ | ||
216 | /* The rcvpktled register controls one of the debug port signals, so | ||
217 | * a packet activity LED can be connected to it. */ | ||
218 | .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt), | ||
219 | .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0), | ||
220 | .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1), | ||
221 | .kr_pcieq0serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig0), | ||
222 | .kr_pcieq0serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig1), | ||
223 | .kr_pcieq0serdesstatus = IPATH_KREG_OFFSET(PCIEQ0SerdesStatus), | ||
224 | .kr_pcieq1serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig0), | ||
225 | .kr_pcieq1serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig1), | ||
226 | .kr_pcieq1serdesstatus = IPATH_KREG_OFFSET(PCIEQ1SerdesStatus) | ||
227 | }; | ||
228 | |||
229 | static const struct ipath_cregs ipath_pe_cregs = { | ||
230 | .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt), | ||
231 | .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt), | ||
232 | .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt), | ||
233 | .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt), | ||
234 | .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt), | ||
235 | .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt), | ||
236 | .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt), | ||
237 | .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt), | ||
238 | .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt), | ||
239 | .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt), | ||
240 | .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt), | ||
241 | .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt), | ||
242 | .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt), | ||
243 | .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt), | ||
244 | .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt), | ||
245 | .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt), | ||
246 | .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt), | ||
247 | .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt), | ||
248 | .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt), | ||
249 | .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt), | ||
250 | .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt), | ||
251 | .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt), | ||
252 | .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt), | ||
253 | .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt), | ||
254 | .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt), | ||
255 | .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt), | ||
256 | .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt), | ||
257 | .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt), | ||
258 | .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt), | ||
259 | .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt), | ||
260 | .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt), | ||
261 | .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt), | ||
262 | .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt) | ||
263 | }; | ||
264 | |||
265 | /* kr_intstatus, kr_intclear, kr_intmask bits */ | ||
266 | #define INFINIPATH_I_RCVURG_MASK 0x1F | ||
267 | #define INFINIPATH_I_RCVAVAIL_MASK 0x1F | ||
268 | |||
269 | /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ | ||
270 | #define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL | ||
271 | #define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0 | ||
272 | #define INFINIPATH_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL | ||
273 | #define INFINIPATH_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL | ||
274 | #define INFINIPATH_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL | ||
275 | #define INFINIPATH_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL | ||
276 | #define INFINIPATH_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL | ||
277 | #define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL | ||
278 | #define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL | ||
279 | #define INFINIPATH_HWE_PCIE1PLLFAILED 0x0400000000000000ULL | ||
280 | #define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL | ||
281 | #define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL | ||
282 | |||
283 | /* kr_extstatus bits */ | ||
284 | #define INFINIPATH_EXTS_FREQSEL 0x2 | ||
285 | #define INFINIPATH_EXTS_SERDESSEL 0x4 | ||
286 | #define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 | ||
287 | #define INFINIPATH_EXTS_MEMBIST_FOUND 0x0000000000008000 | ||
288 | |||
289 | #define _IPATH_GPIO_SDA_NUM 1 | ||
290 | #define _IPATH_GPIO_SCL_NUM 0 | ||
291 | |||
292 | #define IPATH_GPIO_SDA (1ULL << \ | ||
293 | (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) | ||
294 | #define IPATH_GPIO_SCL (1ULL << \ | ||
295 | (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) | ||
296 | |||
297 | /** | ||
298 | * ipath_pe_handle_hwerrors - display hardware errors. | ||
299 | * @dd: the infinipath device | ||
300 | * @msg: the output buffer | ||
301 | * @msgl: the size of the output buffer | ||
302 | * | ||
303 | * Use same msg buffer as regular errors to avoid excessive stack | ||
304 | * use. Most hardware errors are catastrophic, but for right now, | ||
305 | * we'll print them and continue. We reuse the same message buffer as | ||
306 | * ipath_handle_errors() to avoid excessive stack usage. | ||
307 | */ | ||
308 | static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, | ||
309 | size_t msgl) | ||
310 | { | ||
311 | ipath_err_t hwerrs; | ||
312 | u32 bits, ctrl; | ||
313 | int isfatal = 0; | ||
314 | char bitsmsg[64]; | ||
315 | |||
316 | hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); | ||
317 | if (!hwerrs) { | ||
318 | /* | ||
319 | * better than printing cofusing messages | ||
320 | * This seems to be related to clearing the crc error, or | ||
321 | * the pll error during init. | ||
322 | */ | ||
323 | ipath_cdbg(VERBOSE, "Called but no hardware errors set\n"); | ||
324 | return; | ||
325 | } else if (hwerrs == ~0ULL) { | ||
326 | ipath_dev_err(dd, "Read of hardware error status failed " | ||
327 | "(all bits set); ignoring\n"); | ||
328 | return; | ||
329 | } | ||
330 | ipath_stats.sps_hwerrs++; | ||
331 | |||
332 | /* Always clear the error status register, except MEMBISTFAIL, | ||
333 | * regardless of whether we continue or stop using the chip. | ||
334 | * We want that set so we know it failed, even across driver reload. | ||
335 | * We'll still ignore it in the hwerrmask. We do this partly for | ||
336 | * diagnostics, but also for support */ | ||
337 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, | ||
338 | hwerrs&~INFINIPATH_HWE_MEMBISTFAILED); | ||
339 | |||
340 | hwerrs &= dd->ipath_hwerrmask; | ||
341 | |||
342 | /* | ||
343 | * make sure we get this much out, unless told to be quiet, | ||
344 | * or it's occurred within the last 5 seconds | ||
345 | */ | ||
346 | if ((hwerrs & ~dd->ipath_lasthwerror) || | ||
347 | (ipath_debug & __IPATH_VERBDBG)) | ||
348 | dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " | ||
349 | "(cleared)\n", (unsigned long long) hwerrs); | ||
350 | dd->ipath_lasthwerror |= hwerrs; | ||
351 | |||
352 | if (hwerrs & ~infinipath_hwe_bitsextant) | ||
353 | ipath_dev_err(dd, "hwerror interrupt with unknown errors " | ||
354 | "%llx set\n", (unsigned long long) | ||
355 | (hwerrs & ~infinipath_hwe_bitsextant)); | ||
356 | |||
357 | ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); | ||
358 | if (ctrl & INFINIPATH_C_FREEZEMODE) { | ||
359 | if (hwerrs) { | ||
360 | /* | ||
361 | * if any set that we aren't ignoring only make the | ||
362 | * complaint once, in case it's stuck or recurring, | ||
363 | * and we get here multiple times | ||
364 | */ | ||
365 | if (dd->ipath_flags & IPATH_INITTED) { | ||
366 | ipath_dev_err(dd, "Fatal Error (freeze " | ||
367 | "mode), no longer usable\n"); | ||
368 | isfatal = 1; | ||
369 | } | ||
370 | /* | ||
371 | * Mark as having had an error for driver, and also | ||
372 | * for /sys and status word mapped to user programs. | ||
373 | * This marks unit as not usable, until reset | ||
374 | */ | ||
375 | *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; | ||
376 | *dd->ipath_statusp |= IPATH_STATUS_HWERROR; | ||
377 | dd->ipath_flags &= ~IPATH_INITTED; | ||
378 | } else { | ||
379 | ipath_dbg("Clearing freezemode on ignored hardware " | ||
380 | "error\n"); | ||
381 | ctrl &= ~INFINIPATH_C_FREEZEMODE; | ||
382 | ipath_write_kreg(dd, dd->ipath_kregs->kr_control, | ||
383 | ctrl); | ||
384 | } | ||
385 | } | ||
386 | |||
387 | *msg = '\0'; | ||
388 | |||
389 | if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { | ||
390 | strlcat(msg, "[Memory BIST test failed, PE-800 unusable]", | ||
391 | msgl); | ||
392 | /* ignore from now on, so disable until driver reloaded */ | ||
393 | *dd->ipath_statusp |= IPATH_STATUS_HWERROR; | ||
394 | dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; | ||
395 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, | ||
396 | dd->ipath_hwerrmask); | ||
397 | } | ||
398 | if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK | ||
399 | << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) { | ||
400 | bits = (u32) ((hwerrs >> | ||
401 | INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) & | ||
402 | INFINIPATH_HWE_RXEMEMPARITYERR_MASK); | ||
403 | snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ", | ||
404 | bits); | ||
405 | strlcat(msg, bitsmsg, msgl); | ||
406 | } | ||
407 | if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK | ||
408 | << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { | ||
409 | bits = (u32) ((hwerrs >> | ||
410 | INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) & | ||
411 | INFINIPATH_HWE_TXEMEMPARITYERR_MASK); | ||
412 | snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ", | ||
413 | bits); | ||
414 | strlcat(msg, bitsmsg, msgl); | ||
415 | } | ||
416 | if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK | ||
417 | << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) { | ||
418 | bits = (u32) ((hwerrs >> | ||
419 | INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) & | ||
420 | INFINIPATH_HWE_PCIEMEMPARITYERR_MASK); | ||
421 | snprintf(bitsmsg, sizeof bitsmsg, | ||
422 | "[PCIe Mem Parity Errs %x] ", bits); | ||
423 | strlcat(msg, bitsmsg, msgl); | ||
424 | } | ||
425 | if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR) | ||
426 | strlcat(msg, "[IB2IPATH Parity]", msgl); | ||
427 | if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR) | ||
428 | strlcat(msg, "[IPATH2IB Parity]", msgl); | ||
429 | |||
430 | #define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \ | ||
431 | INFINIPATH_HWE_COREPLL_RFSLIP ) | ||
432 | |||
433 | if (hwerrs & _IPATH_PLL_FAIL) { | ||
434 | snprintf(bitsmsg, sizeof bitsmsg, | ||
435 | "[PLL failed (%llx), PE-800 unusable]", | ||
436 | (unsigned long long) hwerrs & _IPATH_PLL_FAIL); | ||
437 | strlcat(msg, bitsmsg, msgl); | ||
438 | /* ignore from now on, so disable until driver reloaded */ | ||
439 | dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL); | ||
440 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, | ||
441 | dd->ipath_hwerrmask); | ||
442 | } | ||
443 | |||
444 | if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) { | ||
445 | /* | ||
446 | * If it occurs, it is left masked since the eternal | ||
447 | * interface is unused | ||
448 | */ | ||
449 | dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED; | ||
450 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, | ||
451 | dd->ipath_hwerrmask); | ||
452 | } | ||
453 | |||
454 | if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP) | ||
455 | strlcat(msg, "[PCIe Poisoned TLP]", msgl); | ||
456 | if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT) | ||
457 | strlcat(msg, "[PCIe completion timeout]", msgl); | ||
458 | |||
459 | /* | ||
460 | * In practice, it's unlikely wthat we'll see PCIe PLL, or bus | ||
461 | * parity or memory parity error failures, because most likely we | ||
462 | * won't be able to talk to the core of the chip. Nonetheless, we | ||
463 | * might see them, if they are in parts of the PCIe core that aren't | ||
464 | * essential. | ||
465 | */ | ||
466 | if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED) | ||
467 | strlcat(msg, "[PCIePLL1]", msgl); | ||
468 | if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED) | ||
469 | strlcat(msg, "[PCIePLL0]", msgl); | ||
470 | if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH) | ||
471 | strlcat(msg, "[PCIe XTLH core parity]", msgl); | ||
472 | if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM) | ||
473 | strlcat(msg, "[PCIe ADM TX core parity]", msgl); | ||
474 | if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM) | ||
475 | strlcat(msg, "[PCIe ADM RX core parity]", msgl); | ||
476 | |||
477 | if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR) | ||
478 | strlcat(msg, "[Rx Dsync]", msgl); | ||
479 | if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) | ||
480 | strlcat(msg, "[SerDes PLL]", msgl); | ||
481 | |||
482 | ipath_dev_err(dd, "%s hardware error\n", msg); | ||
483 | if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) { | ||
484 | /* | ||
485 | * for /sys status file ; if no trailing } is copied, we'll | ||
486 | * know it was truncated. | ||
487 | */ | ||
488 | snprintf(dd->ipath_freezemsg, dd->ipath_freezelen, | ||
489 | "{%s}", msg); | ||
490 | } | ||
491 | } | ||
492 | |||
493 | /** | ||
494 | * ipath_pe_boardname - fill in the board name | ||
495 | * @dd: the infinipath device | ||
496 | * @name: the output buffer | ||
497 | * @namelen: the size of the output buffer | ||
498 | * | ||
499 | * info is based on the board revision register | ||
500 | */ | ||
501 | static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, | ||
502 | size_t namelen) | ||
503 | { | ||
504 | char *n = NULL; | ||
505 | u8 boardrev = dd->ipath_boardrev; | ||
506 | int ret; | ||
507 | |||
508 | switch (boardrev) { | ||
509 | case 0: | ||
510 | n = "InfiniPath_Emulation"; | ||
511 | break; | ||
512 | case 1: | ||
513 | n = "InfiniPath_PE-800-Bringup"; | ||
514 | break; | ||
515 | case 2: | ||
516 | n = "InfiniPath_PE-880"; | ||
517 | break; | ||
518 | case 3: | ||
519 | n = "InfiniPath_PE-850"; | ||
520 | break; | ||
521 | case 4: | ||
522 | n = "InfiniPath_PE-860"; | ||
523 | break; | ||
524 | default: | ||
525 | ipath_dev_err(dd, | ||
526 | "Don't yet know about board with ID %u\n", | ||
527 | boardrev); | ||
528 | snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u", | ||
529 | boardrev); | ||
530 | break; | ||
531 | } | ||
532 | if (n) | ||
533 | snprintf(name, namelen, "%s", n); | ||
534 | |||
535 | if (dd->ipath_majrev != 4 || dd->ipath_minrev != 1) { | ||
536 | ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n", | ||
537 | dd->ipath_majrev, dd->ipath_minrev); | ||
538 | ret = 1; | ||
539 | } else | ||
540 | ret = 0; | ||
541 | |||
542 | return ret; | ||
543 | } | ||
544 | |||
545 | /** | ||
546 | * ipath_pe_init_hwerrors - enable hardware errors | ||
547 | * @dd: the infinipath device | ||
548 | * | ||
549 | * now that we have finished initializing everything that might reasonably | ||
550 | * cause a hardware error, and cleared those errors bits as they occur, | ||
551 | * we can enable hardware errors in the mask (potentially enabling | ||
552 | * freeze mode), and enable hardware errors as errors (along with | ||
553 | * everything else) in errormask | ||
554 | */ | ||
555 | static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) | ||
556 | { | ||
557 | ipath_err_t val; | ||
558 | u64 extsval; | ||
559 | |||
560 | extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); | ||
561 | |||
562 | if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) | ||
563 | ipath_dev_err(dd, "MemBIST did not complete!\n"); | ||
564 | |||
565 | val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ | ||
566 | |||
567 | if (!dd->ipath_boardrev) // no PLL for Emulator | ||
568 | val &= ~INFINIPATH_HWE_SERDESPLLFAILED; | ||
569 | |||
570 | /* workaround bug 9460 in internal interface bus parity checking */ | ||
571 | val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM; | ||
572 | |||
573 | dd->ipath_hwerrmask = val; | ||
574 | } | ||
575 | |||
576 | /** | ||
577 | * ipath_pe_bringup_serdes - bring up the serdes | ||
578 | * @dd: the infinipath device | ||
579 | */ | ||
580 | static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) | ||
581 | { | ||
582 | u64 val, tmp, config1; | ||
583 | int ret = 0, change = 0; | ||
584 | |||
585 | ipath_dbg("Trying to bringup serdes\n"); | ||
586 | |||
587 | if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) & | ||
588 | INFINIPATH_HWE_SERDESPLLFAILED) { | ||
589 | ipath_dbg("At start, serdes PLL failed bit set " | ||
590 | "in hwerrstatus, clearing and continuing\n"); | ||
591 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, | ||
592 | INFINIPATH_HWE_SERDESPLLFAILED); | ||
593 | } | ||
594 | |||
595 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); | ||
596 | config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1); | ||
597 | |||
598 | ipath_cdbg(VERBOSE, "SerDes status config0=%llx config1=%llx, " | ||
599 | "xgxsconfig %llx\n", (unsigned long long) val, | ||
600 | (unsigned long long) config1, (unsigned long long) | ||
601 | ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); | ||
602 | |||
603 | /* | ||
604 | * Force reset on, also set rxdetect enable. Must do before reading | ||
605 | * serdesstatus at least for simulation, or some of the bits in | ||
606 | * serdes status will come back as undefined and cause simulation | ||
607 | * failures | ||
608 | */ | ||
609 | val |= INFINIPATH_SERDC0_RESET_PLL | INFINIPATH_SERDC0_RXDETECT_EN | ||
610 | | INFINIPATH_SERDC0_L1PWR_DN; | ||
611 | ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); | ||
612 | /* be sure chip saw it */ | ||
613 | tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); | ||
614 | udelay(5); /* need pll reset set at least for a bit */ | ||
615 | /* | ||
616 | * after PLL is reset, set the per-lane Resets and TxIdle and | ||
617 | * clear the PLL reset and rxdetect (to get falling edge). | ||
618 | * Leave L1PWR bits set (permanently) | ||
619 | */ | ||
620 | val &= ~(INFINIPATH_SERDC0_RXDETECT_EN | INFINIPATH_SERDC0_RESET_PLL | ||
621 | | INFINIPATH_SERDC0_L1PWR_DN); | ||
622 | val |= INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE; | ||
623 | ipath_cdbg(VERBOSE, "Clearing pll reset and setting lane resets " | ||
624 | "and txidle (%llx)\n", (unsigned long long) val); | ||
625 | ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); | ||
626 | /* be sure chip saw it */ | ||
627 | tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); | ||
628 | /* need PLL reset clear for at least 11 usec before lane | ||
629 | * resets cleared; give it a few more to be sure */ | ||
630 | udelay(15); | ||
631 | val &= ~(INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE); | ||
632 | |||
633 | ipath_cdbg(VERBOSE, "Clearing lane resets and txidle " | ||
634 | "(writing %llx)\n", (unsigned long long) val); | ||
635 | ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); | ||
636 | /* be sure chip saw it */ | ||
637 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); | ||
638 | |||
639 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); | ||
640 | if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) & | ||
641 | INFINIPATH_XGXS_MDIOADDR_MASK) != 3) { | ||
642 | val &= | ||
643 | ~(INFINIPATH_XGXS_MDIOADDR_MASK << | ||
644 | INFINIPATH_XGXS_MDIOADDR_SHIFT); | ||
645 | /* MDIO address 3 */ | ||
646 | val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT; | ||
647 | change = 1; | ||
648 | } | ||
649 | if (val & INFINIPATH_XGXS_RESET) { | ||
650 | val &= ~INFINIPATH_XGXS_RESET; | ||
651 | change = 1; | ||
652 | } | ||
653 | if (change) | ||
654 | ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); | ||
655 | |||
656 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); | ||
657 | |||
658 | /* clear current and de-emphasis bits */ | ||
659 | config1 &= ~0x0ffffffff00ULL; | ||
660 | /* set current to 20ma */ | ||
661 | config1 |= 0x00000000000ULL; | ||
662 | /* set de-emphasis to -5.68dB */ | ||
663 | config1 |= 0x0cccc000000ULL; | ||
664 | ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1); | ||
665 | |||
666 | ipath_cdbg(VERBOSE, "done: SerDes status config0=%llx " | ||
667 | "config1=%llx, sstatus=%llx xgxs=%llx\n", | ||
668 | (unsigned long long) val, (unsigned long long) config1, | ||
669 | (unsigned long long) | ||
670 | ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus), | ||
671 | (unsigned long long) | ||
672 | ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); | ||
673 | |||
674 | if (!ipath_waitfor_mdio_cmdready(dd)) { | ||
675 | ipath_write_kreg( | ||
676 | dd, dd->ipath_kregs->kr_mdio, | ||
677 | ipath_mdio_req(IPATH_MDIO_CMD_READ, 31, | ||
678 | IPATH_MDIO_CTRL_XGXS_REG_8, 0)); | ||
679 | if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio, | ||
680 | IPATH_MDIO_DATAVALID, &val)) | ||
681 | ipath_dbg("Never got MDIO data for XGXS " | ||
682 | "status read\n"); | ||
683 | else | ||
684 | ipath_cdbg(VERBOSE, "MDIO Read reg8, " | ||
685 | "'bank' 31 %x\n", (u32) val); | ||
686 | } else | ||
687 | ipath_dbg("Never got MDIO cmdready for XGXS status read\n"); | ||
688 | |||
689 | return ret; | ||
690 | } | ||
691 | |||
692 | /** | ||
693 | * ipath_pe_quiet_serdes - set serdes to txidle | ||
694 | * @dd: the infinipath device | ||
695 | * Called when driver is being unloaded | ||
696 | */ | ||
697 | static void ipath_pe_quiet_serdes(struct ipath_devdata *dd) | ||
698 | { | ||
699 | u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); | ||
700 | |||
701 | val |= INFINIPATH_SERDC0_TXIDLE; | ||
702 | ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n", | ||
703 | (unsigned long long) val); | ||
704 | ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); | ||
705 | } | ||
706 | |||
707 | /* this is not yet needed on the PE800, so just return 0. */ | ||
708 | static int ipath_pe_intconfig(struct ipath_devdata *dd) | ||
709 | { | ||
710 | return 0; | ||
711 | } | ||
712 | |||
713 | /** | ||
714 | * ipath_setup_pe_setextled - set the state of the two external LEDs | ||
715 | * @dd: the infinipath device | ||
716 | * @lst: the L state | ||
717 | * @ltst: the LT state | ||
718 | |||
719 | * These LEDs indicate the physical and logical state of IB link. | ||
720 | * For this chip (at least with recommended board pinouts), LED1 | ||
721 | * is Yellow (logical state) and LED2 is Green (physical state), | ||
722 | * | ||
723 | * Note: We try to match the Mellanox HCA LED behavior as best | ||
724 | * we can. Green indicates physical link state is OK (something is | ||
725 | * plugged in, and we can train). | ||
726 | * Amber indicates the link is logically up (ACTIVE). | ||
727 | * Mellanox further blinks the amber LED to indicate data packet | ||
728 | * activity, but we have no hardware support for that, so it would | ||
729 | * require waking up every 10-20 msecs and checking the counters | ||
730 | * on the chip, and then turning the LED off if appropriate. That's | ||
731 | * visible overhead, so not something we will do. | ||
732 | * | ||
733 | */ | ||
734 | static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, | ||
735 | u64 ltst) | ||
736 | { | ||
737 | u64 extctl; | ||
738 | |||
739 | /* the diags use the LED to indicate diag info, so we leave | ||
740 | * the external LED alone when the diags are running */ | ||
741 | if (ipath_diag_inuse) | ||
742 | return; | ||
743 | |||
744 | extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON | | ||
745 | INFINIPATH_EXTC_LED2PRIPORT_ON); | ||
746 | |||
747 | if (ltst & INFINIPATH_IBCS_LT_STATE_LINKUP) | ||
748 | extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON; | ||
749 | if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) | ||
750 | extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON; | ||
751 | dd->ipath_extctrl = extctl; | ||
752 | ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); | ||
753 | } | ||
754 | |||
755 | /** | ||
756 | * ipath_setup_pe_cleanup - clean up any per-chip chip-specific stuff | ||
757 | * @dd: the infinipath device | ||
758 | * | ||
759 | * This is called during driver unload. | ||
760 | * We do the pci_disable_msi here, not in generic code, because it | ||
761 | * isn't used for the HT-400. If we do end up needing pci_enable_msi | ||
762 | * at some point in the future for HT-400, we'll move the call back | ||
763 | * into the main init_one code. | ||
764 | */ | ||
765 | static void ipath_setup_pe_cleanup(struct ipath_devdata *dd) | ||
766 | { | ||
767 | dd->ipath_msi_lo = 0; /* just in case unload fails */ | ||
768 | pci_disable_msi(dd->pcidev); | ||
769 | } | ||
770 | |||
771 | /** | ||
772 | * ipath_setup_pe_config - setup PCIe config related stuff | ||
773 | * @dd: the infinipath device | ||
774 | * @pdev: the PCI device | ||
775 | * | ||
776 | * The pci_enable_msi() call will fail on systems with MSI quirks | ||
777 | * such as those with AMD8131, even if the device of interest is not | ||
778 | * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed | ||
779 | * late in 2.6.16). | ||
780 | * All that can be done is to edit the kernel source to remove the quirk | ||
781 | * check until that is fixed. | ||
782 | * We do not need to call enable_msi() for our HyperTransport chip (HT-400), | ||
783 | * even those it uses MSI, and we want to avoid the quirk warning, so | ||
784 | * So we call enable_msi only for the PE-800. If we do end up needing | ||
785 | * pci_enable_msi at some point in the future for HT-400, we'll move the | ||
786 | * call back into the main init_one code. | ||
787 | * We save the msi lo and hi values, so we can restore them after | ||
788 | * chip reset (the kernel PCI infrastructure doesn't yet handle that | ||
789 | * correctly). | ||
790 | */ | ||
791 | static int ipath_setup_pe_config(struct ipath_devdata *dd, | ||
792 | struct pci_dev *pdev) | ||
793 | { | ||
794 | int pos, ret; | ||
795 | |||
796 | dd->ipath_msi_lo = 0; /* used as a flag during reset processing */ | ||
797 | ret = pci_enable_msi(dd->pcidev); | ||
798 | if (ret) | ||
799 | ipath_dev_err(dd, "pci_enable_msi failed: %d, " | ||
800 | "interrupts may not work\n", ret); | ||
801 | /* continue even if it fails, we may still be OK... */ | ||
802 | |||
803 | if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) { | ||
804 | u16 control; | ||
805 | pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO, | ||
806 | &dd->ipath_msi_lo); | ||
807 | pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI, | ||
808 | &dd->ipath_msi_hi); | ||
809 | pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, | ||
810 | &control); | ||
811 | /* now save the data (vector) info */ | ||
812 | pci_read_config_word(dd->pcidev, | ||
813 | pos + ((control & PCI_MSI_FLAGS_64BIT) | ||
814 | ? 12 : 8), | ||
815 | &dd->ipath_msi_data); | ||
816 | ipath_cdbg(VERBOSE, "Read msi data 0x%x from config offset " | ||
817 | "0x%x, control=0x%x\n", dd->ipath_msi_data, | ||
818 | pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), | ||
819 | control); | ||
820 | /* we save the cachelinesize also, although it doesn't | ||
821 | * really matter */ | ||
822 | pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, | ||
823 | &dd->ipath_pci_cacheline); | ||
824 | } else | ||
825 | ipath_dev_err(dd, "Can't find MSI capability, " | ||
826 | "can't save MSI settings for reset\n"); | ||
827 | if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP))) { | ||
828 | u16 linkstat; | ||
829 | pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA, | ||
830 | &linkstat); | ||
831 | linkstat >>= 4; | ||
832 | linkstat &= 0x1f; | ||
833 | if (linkstat != 8) | ||
834 | ipath_dev_err(dd, "PCIe width %u, " | ||
835 | "performance reduced\n", linkstat); | ||
836 | } | ||
837 | else | ||
838 | ipath_dev_err(dd, "Can't find PCI Express " | ||
839 | "capability!\n"); | ||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | static void ipath_init_pe_variables(void) | ||
844 | { | ||
845 | /* | ||
846 | * bits for selecting i2c direction and values, | ||
847 | * used for I2C serial flash | ||
848 | */ | ||
849 | ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; | ||
850 | ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; | ||
851 | ipath_gpio_sda = IPATH_GPIO_SDA; | ||
852 | ipath_gpio_scl = IPATH_GPIO_SCL; | ||
853 | |||
854 | /* variables for sanity checking interrupt and errors */ | ||
855 | infinipath_hwe_bitsextant = | ||
856 | (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << | ||
857 | INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | | ||
858 | (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK << | ||
859 | INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) | | ||
860 | INFINIPATH_HWE_PCIE1PLLFAILED | | ||
861 | INFINIPATH_HWE_PCIE0PLLFAILED | | ||
862 | INFINIPATH_HWE_PCIEPOISONEDTLP | | ||
863 | INFINIPATH_HWE_PCIECPLTIMEOUT | | ||
864 | INFINIPATH_HWE_PCIEBUSPARITYXTLH | | ||
865 | INFINIPATH_HWE_PCIEBUSPARITYXADM | | ||
866 | INFINIPATH_HWE_PCIEBUSPARITYRADM | | ||
867 | INFINIPATH_HWE_MEMBISTFAILED | | ||
868 | INFINIPATH_HWE_COREPLL_FBSLIP | | ||
869 | INFINIPATH_HWE_COREPLL_RFSLIP | | ||
870 | INFINIPATH_HWE_SERDESPLLFAILED | | ||
871 | INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | | ||
872 | INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; | ||
873 | infinipath_i_bitsextant = | ||
874 | (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | | ||
875 | (INFINIPATH_I_RCVAVAIL_MASK << | ||
876 | INFINIPATH_I_RCVAVAIL_SHIFT) | | ||
877 | INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | | ||
878 | INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; | ||
879 | infinipath_e_bitsextant = | ||
880 | INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | | ||
881 | INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | | ||
882 | INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | | ||
883 | INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR | | ||
884 | INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP | | ||
885 | INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION | | ||
886 | INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | | ||
887 | INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN | | ||
888 | INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK | | ||
889 | INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN | | ||
890 | INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN | | ||
891 | INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT | | ||
892 | INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | | ||
893 | INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED | | ||
894 | INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | | ||
895 | INFINIPATH_E_HARDWARE; | ||
896 | |||
897 | infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; | ||
898 | infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; | ||
899 | } | ||
900 | |||
901 | /* setup the MSI stuff again after a reset. I'd like to just call | ||
902 | * pci_enable_msi() and request_irq() again, but when I do that, | ||
903 | * the MSI enable bit doesn't get set in the command word, and | ||
904 | * we switch to to a different interrupt vector, which is confusing, | ||
905 | * so I instead just do it all inline. Perhaps somehow can tie this | ||
906 | * into the PCIe hotplug support at some point | ||
907 | * Note, because I'm doing it all here, I don't call pci_disable_msi() | ||
908 | * or free_irq() at the start of ipath_setup_pe_reset(). | ||
909 | */ | ||
910 | static int ipath_reinit_msi(struct ipath_devdata *dd) | ||
911 | { | ||
912 | int pos; | ||
913 | u16 control; | ||
914 | int ret; | ||
915 | |||
916 | if (!dd->ipath_msi_lo) { | ||
917 | dev_info(&dd->pcidev->dev, "Can't restore MSI config, " | ||
918 | "initial setup failed?\n"); | ||
919 | ret = 0; | ||
920 | goto bail; | ||
921 | } | ||
922 | |||
923 | if (!(pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) { | ||
924 | ipath_dev_err(dd, "Can't find MSI capability, " | ||
925 | "can't restore MSI settings\n"); | ||
926 | ret = 0; | ||
927 | goto bail; | ||
928 | } | ||
929 | ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n", | ||
930 | dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO); | ||
931 | pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO, | ||
932 | dd->ipath_msi_lo); | ||
933 | ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n", | ||
934 | dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI); | ||
935 | pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI, | ||
936 | dd->ipath_msi_hi); | ||
937 | pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control); | ||
938 | if (!(control & PCI_MSI_FLAGS_ENABLE)) { | ||
939 | ipath_cdbg(VERBOSE, "MSI control at off %x was %x, " | ||
940 | "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS, | ||
941 | control, control | PCI_MSI_FLAGS_ENABLE); | ||
942 | control |= PCI_MSI_FLAGS_ENABLE; | ||
943 | pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, | ||
944 | control); | ||
945 | } | ||
946 | /* now rewrite the data (vector) info */ | ||
947 | pci_write_config_word(dd->pcidev, pos + | ||
948 | ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), | ||
949 | dd->ipath_msi_data); | ||
950 | /* we restore the cachelinesize also, although it doesn't really | ||
951 | * matter */ | ||
952 | pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, | ||
953 | dd->ipath_pci_cacheline); | ||
954 | /* and now set the pci master bit again */ | ||
955 | pci_set_master(dd->pcidev); | ||
956 | ret = 1; | ||
957 | |||
958 | bail: | ||
959 | return ret; | ||
960 | } | ||
961 | |||
962 | /* This routine sleeps, so it can only be called from user context, not | ||
963 | * from interrupt context. If we need interrupt context, we can split | ||
964 | * it into two routines. | ||
965 | */ | ||
966 | static int ipath_setup_pe_reset(struct ipath_devdata *dd) | ||
967 | { | ||
968 | u64 val; | ||
969 | int i; | ||
970 | int ret; | ||
971 | |||
972 | /* Use ERROR so it shows up in logs, etc. */ | ||
973 | ipath_dev_err(dd, "Resetting PE-800 unit %u\n", | ||
974 | dd->ipath_unit); | ||
975 | val = dd->ipath_control | INFINIPATH_C_RESET; | ||
976 | ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val); | ||
977 | mb(); | ||
978 | |||
979 | for (i = 1; i <= 5; i++) { | ||
980 | int r; | ||
981 | /* allow MBIST, etc. to complete; longer on each retry. | ||
982 | * We sometimes get machine checks from bus timeout if no | ||
983 | * response, so for now, make it *really* long. | ||
984 | */ | ||
985 | msleep(1000 + (1 + i) * 2000); | ||
986 | if ((r = | ||
987 | pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, | ||
988 | dd->ipath_pcibar0))) | ||
989 | ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n", | ||
990 | r); | ||
991 | if ((r = | ||
992 | pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, | ||
993 | dd->ipath_pcibar1))) | ||
994 | ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n", | ||
995 | r); | ||
996 | /* now re-enable memory access */ | ||
997 | if ((r = pci_enable_device(dd->pcidev))) | ||
998 | ipath_dev_err(dd, "pci_enable_device failed after " | ||
999 | "reset: %d\n", r); | ||
1000 | val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision); | ||
1001 | if (val == dd->ipath_revision) { | ||
1002 | ipath_cdbg(VERBOSE, "Got matching revision " | ||
1003 | "register %llx on try %d\n", | ||
1004 | (unsigned long long) val, i); | ||
1005 | ret = ipath_reinit_msi(dd); | ||
1006 | goto bail; | ||
1007 | } | ||
1008 | /* Probably getting -1 back */ | ||
1009 | ipath_dbg("Didn't get expected revision register, " | ||
1010 | "got %llx, try %d\n", (unsigned long long) val, | ||
1011 | i + 1); | ||
1012 | } | ||
1013 | ret = 0; /* failed */ | ||
1014 | |||
1015 | bail: | ||
1016 | return ret; | ||
1017 | } | ||
1018 | |||
1019 | /** | ||
1020 | * ipath_pe_put_tid - write a TID in chip | ||
1021 | * @dd: the infinipath device | ||
1022 | * @tidptr: pointer to the expected TID (in chip) to udpate | ||
1023 | * @tidtype: 0 for eager, 1 for expected | ||
1024 | * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing | ||
1025 | * | ||
1026 | * This exists as a separate routine to allow for special locking etc. | ||
1027 | * It's used for both the full cleanup on exit, as well as the normal | ||
1028 | * setup and teardown. | ||
1029 | */ | ||
1030 | static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, | ||
1031 | u32 type, unsigned long pa) | ||
1032 | { | ||
1033 | u32 __iomem *tidp32 = (u32 __iomem *)tidptr; | ||
1034 | unsigned long flags = 0; /* keep gcc quiet */ | ||
1035 | |||
1036 | if (pa != dd->ipath_tidinvalid) { | ||
1037 | if (pa & ((1U << 11) - 1)) { | ||
1038 | dev_info(&dd->pcidev->dev, "BUG: physaddr %lx " | ||
1039 | "not 4KB aligned!\n", pa); | ||
1040 | return; | ||
1041 | } | ||
1042 | pa >>= 11; | ||
1043 | /* paranoia check */ | ||
1044 | if (pa & (7<<29)) | ||
1045 | ipath_dev_err(dd, | ||
1046 | "BUG: Physical page address 0x%lx " | ||
1047 | "has bits set in 31-29\n", pa); | ||
1048 | |||
1049 | if (type == 0) | ||
1050 | pa |= dd->ipath_tidtemplate; | ||
1051 | else /* for now, always full 4KB page */ | ||
1052 | pa |= 2 << 29; | ||
1053 | } | ||
1054 | |||
1055 | /* workaround chip bug 9437 by writing each TID twice | ||
1056 | * and holding a spinlock around the writes, so they don't | ||
1057 | * intermix with other TID (eager or expected) writes | ||
1058 | * Unfortunately, this call can be done from interrupt level | ||
1059 | * for the port 0 eager TIDs, so we have to use irqsave | ||
1060 | */ | ||
1061 | spin_lock_irqsave(&dd->ipath_tid_lock, flags); | ||
1062 | ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf); | ||
1063 | if (dd->ipath_kregbase) | ||
1064 | writel(pa, tidp32); | ||
1065 | ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xdeadbeef); | ||
1066 | mmiowb(); | ||
1067 | spin_unlock_irqrestore(&dd->ipath_tid_lock, flags); | ||
1068 | } | ||
1069 | |||
1070 | /** | ||
1071 | * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager | ||
1072 | * @dd: the infinipath device | ||
1073 | * @port: the port | ||
1074 | * | ||
1075 | * clear all TID entries for a port, expected and eager. | ||
1076 | * Used from ipath_close(). On PE800, TIDs are only 32 bits, | ||
1077 | * not 64, but they are still on 64 bit boundaries, so tidbase | ||
1078 | * is declared as u64 * for the pointer math, even though we write 32 bits | ||
1079 | */ | ||
1080 | static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port) | ||
1081 | { | ||
1082 | u64 __iomem *tidbase; | ||
1083 | unsigned long tidinv; | ||
1084 | int i; | ||
1085 | |||
1086 | if (!dd->ipath_kregbase) | ||
1087 | return; | ||
1088 | |||
1089 | ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port); | ||
1090 | |||
1091 | tidinv = dd->ipath_tidinvalid; | ||
1092 | tidbase = (u64 __iomem *) | ||
1093 | ((char __iomem *)(dd->ipath_kregbase) + | ||
1094 | dd->ipath_rcvtidbase + | ||
1095 | port * dd->ipath_rcvtidcnt * sizeof(*tidbase)); | ||
1096 | |||
1097 | for (i = 0; i < dd->ipath_rcvtidcnt; i++) | ||
1098 | ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv); | ||
1099 | |||
1100 | tidbase = (u64 __iomem *) | ||
1101 | ((char __iomem *)(dd->ipath_kregbase) + | ||
1102 | dd->ipath_rcvegrbase + | ||
1103 | port * dd->ipath_rcvegrcnt * sizeof(*tidbase)); | ||
1104 | |||
1105 | for (i = 0; i < dd->ipath_rcvegrcnt; i++) | ||
1106 | ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv); | ||
1107 | } | ||
1108 | |||
1109 | /** | ||
1110 | * ipath_pe_tidtemplate - setup constants for TID updates | ||
1111 | * @dd: the infinipath device | ||
1112 | * | ||
1113 | * We setup stuff that we use a lot, to avoid calculating each time | ||
1114 | */ | ||
1115 | static void ipath_pe_tidtemplate(struct ipath_devdata *dd) | ||
1116 | { | ||
1117 | u32 egrsize = dd->ipath_rcvegrbufsize; | ||
1118 | |||
1119 | /* For now, we always allocate 4KB buffers (at init) so we can | ||
1120 | * receive max size packets. We may want a module parameter to | ||
1121 | * specify 2KB or 4KB and/or make be per port instead of per device | ||
1122 | * for those who want to reduce memory footprint. Note that the | ||
1123 | * ipath_rcvhdrentsize size must be large enough to hold the largest | ||
1124 | * IB header (currently 96 bytes) that we expect to handle (plus of | ||
1125 | * course the 2 dwords of RHF). | ||
1126 | */ | ||
1127 | if (egrsize == 2048) | ||
1128 | dd->ipath_tidtemplate = 1U << 29; | ||
1129 | else if (egrsize == 4096) | ||
1130 | dd->ipath_tidtemplate = 2U << 29; | ||
1131 | else { | ||
1132 | egrsize = 4096; | ||
1133 | dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize " | ||
1134 | "%u, using %u\n", dd->ipath_rcvegrbufsize, | ||
1135 | egrsize); | ||
1136 | dd->ipath_tidtemplate = 2U << 29; | ||
1137 | } | ||
1138 | dd->ipath_tidinvalid = 0; | ||
1139 | } | ||
1140 | |||
1141 | static int ipath_pe_early_init(struct ipath_devdata *dd) | ||
1142 | { | ||
1143 | dd->ipath_flags |= IPATH_4BYTE_TID; | ||
1144 | |||
1145 | /* | ||
1146 | * For openib, we need to be able to handle an IB header of 96 bytes | ||
1147 | * or 24 dwords. HT-400 has arbitrary sized receive buffers, so we | ||
1148 | * made them the same size as the PIO buffers. The PE-800 does not | ||
1149 | * handle arbitrary size buffers, so we need the header large enough | ||
1150 | * to handle largest IB header, but still have room for a 2KB MTU | ||
1151 | * standard IB packet. | ||
1152 | */ | ||
1153 | dd->ipath_rcvhdrentsize = 24; | ||
1154 | dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; | ||
1155 | |||
1156 | /* For HT-400, we allocate a somewhat overly large eager buffer, | ||
1157 | * such that we can guarantee that we can receive the largest packet | ||
1158 | * that we can send out. To truly support a 4KB MTU, we need to | ||
1159 | * bump this to a larger value. We'll do this when I get around to | ||
1160 | * testing 4KB sends on the PE-800, which I have not yet done. | ||
1161 | */ | ||
1162 | dd->ipath_rcvegrbufsize = 2048; | ||
1163 | /* | ||
1164 | * the min() check here is currently a nop, but it may not always | ||
1165 | * be, depending on just how we do ipath_rcvegrbufsize | ||
1166 | */ | ||
1167 | dd->ipath_ibmaxlen = min(dd->ipath_piosize2k, | ||
1168 | dd->ipath_rcvegrbufsize + | ||
1169 | (dd->ipath_rcvhdrentsize << 2)); | ||
1170 | dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; | ||
1171 | |||
1172 | /* | ||
1173 | * For PE-800, we can request a receive interrupt for 1 or | ||
1174 | * more packets from current offset. For now, we set this | ||
1175 | * up for a single packet, to match the HT-400 behavior. | ||
1176 | */ | ||
1177 | dd->ipath_rhdrhead_intr_off = 1ULL<<32; | ||
1178 | |||
1179 | return 0; | ||
1180 | } | ||
1181 | |||
1182 | int __attribute__((weak)) ipath_unordered_wc(void) | ||
1183 | { | ||
1184 | return 0; | ||
1185 | } | ||
1186 | |||
1187 | /** | ||
1188 | * ipath_init_pe_get_base_info - set chip-specific flags for user code | ||
1189 | * @dd: the infinipath device | ||
1190 | * @kbase: ipath_base_info pointer | ||
1191 | * | ||
1192 | * We set the PCIE flag because the lower bandwidth on PCIe vs | ||
1193 | * HyperTransport can affect some user packet algorithims. | ||
1194 | */ | ||
1195 | static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) | ||
1196 | { | ||
1197 | struct ipath_base_info *kinfo = kbase; | ||
1198 | |||
1199 | if (ipath_unordered_wc()) { | ||
1200 | kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER; | ||
1201 | ipath_cdbg(PROC, "Intel processor, forcing WC order\n"); | ||
1202 | } | ||
1203 | else | ||
1204 | ipath_cdbg(PROC, "Not Intel processor, WC ordered\n"); | ||
1205 | |||
1206 | kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE; | ||
1207 | |||
1208 | return 0; | ||
1209 | } | ||
1210 | |||
1211 | /** | ||
1212 | * ipath_init_pe800_funcs - set up the chip-specific function pointers | ||
1213 | * @dd: the infinipath device | ||
1214 | * | ||
1215 | * This is global, and is called directly at init to set up the | ||
1216 | * chip-specific function pointers for later use. | ||
1217 | */ | ||
1218 | void ipath_init_pe800_funcs(struct ipath_devdata *dd) | ||
1219 | { | ||
1220 | dd->ipath_f_intrsetup = ipath_pe_intconfig; | ||
1221 | dd->ipath_f_bus = ipath_setup_pe_config; | ||
1222 | dd->ipath_f_reset = ipath_setup_pe_reset; | ||
1223 | dd->ipath_f_get_boardname = ipath_pe_boardname; | ||
1224 | dd->ipath_f_init_hwerrors = ipath_pe_init_hwerrors; | ||
1225 | dd->ipath_f_early_init = ipath_pe_early_init; | ||
1226 | dd->ipath_f_handle_hwerrors = ipath_pe_handle_hwerrors; | ||
1227 | dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes; | ||
1228 | dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes; | ||
1229 | dd->ipath_f_clear_tids = ipath_pe_clear_tids; | ||
1230 | dd->ipath_f_put_tid = ipath_pe_put_tid; | ||
1231 | dd->ipath_f_cleanup = ipath_setup_pe_cleanup; | ||
1232 | dd->ipath_f_setextled = ipath_setup_pe_setextled; | ||
1233 | dd->ipath_f_get_base_info = ipath_pe_get_base_info; | ||
1234 | |||
1235 | /* initialize chip-specific variables */ | ||
1236 | dd->ipath_f_tidtemplate = ipath_pe_tidtemplate; | ||
1237 | |||
1238 | /* | ||
1239 | * setup the register offsets, since they are different for each | ||
1240 | * chip | ||
1241 | */ | ||
1242 | dd->ipath_kregs = &ipath_pe_kregs; | ||
1243 | dd->ipath_cregs = &ipath_pe_cregs; | ||
1244 | |||
1245 | ipath_init_pe_variables(); | ||
1246 | } | ||
1247 | |||
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c new file mode 100644 index 000000000000..18890716db1e --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_qp.c | |||
@@ -0,0 +1,913 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/err.h> | ||
34 | #include <linux/vmalloc.h> | ||
35 | |||
36 | #include "ipath_verbs.h" | ||
37 | #include "ips_common.h" | ||
38 | |||
39 | #define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) | ||
40 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) | ||
41 | #define mk_qpn(qpt, map, off) (((map) - (qpt)->map) * BITS_PER_PAGE + \ | ||
42 | (off)) | ||
43 | #define find_next_offset(map, off) find_next_zero_bit((map)->page, \ | ||
44 | BITS_PER_PAGE, off) | ||
45 | |||
46 | #define TRANS_INVALID 0 | ||
47 | #define TRANS_ANY2RST 1 | ||
48 | #define TRANS_RST2INIT 2 | ||
49 | #define TRANS_INIT2INIT 3 | ||
50 | #define TRANS_INIT2RTR 4 | ||
51 | #define TRANS_RTR2RTS 5 | ||
52 | #define TRANS_RTS2RTS 6 | ||
53 | #define TRANS_SQERR2RTS 7 | ||
54 | #define TRANS_ANY2ERR 8 | ||
55 | #define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */ | ||
56 | #define TRANS_SQD2SQD 10 /* error if not drained & parameter change */ | ||
57 | #define TRANS_SQD2RTS 11 /* error if not drained */ | ||
58 | |||
59 | /* | ||
60 | * Convert the AETH credit code into the number of credits. | ||
61 | */ | ||
62 | static u32 credit_table[31] = { | ||
63 | 0, /* 0 */ | ||
64 | 1, /* 1 */ | ||
65 | 2, /* 2 */ | ||
66 | 3, /* 3 */ | ||
67 | 4, /* 4 */ | ||
68 | 6, /* 5 */ | ||
69 | 8, /* 6 */ | ||
70 | 12, /* 7 */ | ||
71 | 16, /* 8 */ | ||
72 | 24, /* 9 */ | ||
73 | 32, /* A */ | ||
74 | 48, /* B */ | ||
75 | 64, /* C */ | ||
76 | 96, /* D */ | ||
77 | 128, /* E */ | ||
78 | 192, /* F */ | ||
79 | 256, /* 10 */ | ||
80 | 384, /* 11 */ | ||
81 | 512, /* 12 */ | ||
82 | 768, /* 13 */ | ||
83 | 1024, /* 14 */ | ||
84 | 1536, /* 15 */ | ||
85 | 2048, /* 16 */ | ||
86 | 3072, /* 17 */ | ||
87 | 4096, /* 18 */ | ||
88 | 6144, /* 19 */ | ||
89 | 8192, /* 1A */ | ||
90 | 12288, /* 1B */ | ||
91 | 16384, /* 1C */ | ||
92 | 24576, /* 1D */ | ||
93 | 32768 /* 1E */ | ||
94 | }; | ||
95 | |||
96 | static u32 alloc_qpn(struct ipath_qp_table *qpt) | ||
97 | { | ||
98 | u32 i, offset, max_scan, qpn; | ||
99 | struct qpn_map *map; | ||
100 | u32 ret; | ||
101 | |||
102 | qpn = qpt->last + 1; | ||
103 | if (qpn >= QPN_MAX) | ||
104 | qpn = 2; | ||
105 | offset = qpn & BITS_PER_PAGE_MASK; | ||
106 | map = &qpt->map[qpn / BITS_PER_PAGE]; | ||
107 | max_scan = qpt->nmaps - !offset; | ||
108 | for (i = 0;;) { | ||
109 | if (unlikely(!map->page)) { | ||
110 | unsigned long page = get_zeroed_page(GFP_KERNEL); | ||
111 | unsigned long flags; | ||
112 | |||
113 | /* | ||
114 | * Free the page if someone raced with us | ||
115 | * installing it: | ||
116 | */ | ||
117 | spin_lock_irqsave(&qpt->lock, flags); | ||
118 | if (map->page) | ||
119 | free_page(page); | ||
120 | else | ||
121 | map->page = (void *)page; | ||
122 | spin_unlock_irqrestore(&qpt->lock, flags); | ||
123 | if (unlikely(!map->page)) | ||
124 | break; | ||
125 | } | ||
126 | if (likely(atomic_read(&map->n_free))) { | ||
127 | do { | ||
128 | if (!test_and_set_bit(offset, map->page)) { | ||
129 | atomic_dec(&map->n_free); | ||
130 | qpt->last = qpn; | ||
131 | ret = qpn; | ||
132 | goto bail; | ||
133 | } | ||
134 | offset = find_next_offset(map, offset); | ||
135 | qpn = mk_qpn(qpt, map, offset); | ||
136 | /* | ||
137 | * This test differs from alloc_pidmap(). | ||
138 | * If find_next_offset() does find a zero | ||
139 | * bit, we don't need to check for QPN | ||
140 | * wrapping around past our starting QPN. | ||
141 | * We just need to be sure we don't loop | ||
142 | * forever. | ||
143 | */ | ||
144 | } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); | ||
145 | } | ||
146 | /* | ||
147 | * In order to keep the number of pages allocated to a | ||
148 | * minimum, we scan the all existing pages before increasing | ||
149 | * the size of the bitmap table. | ||
150 | */ | ||
151 | if (++i > max_scan) { | ||
152 | if (qpt->nmaps == QPNMAP_ENTRIES) | ||
153 | break; | ||
154 | map = &qpt->map[qpt->nmaps++]; | ||
155 | offset = 0; | ||
156 | } else if (map < &qpt->map[qpt->nmaps]) { | ||
157 | ++map; | ||
158 | offset = 0; | ||
159 | } else { | ||
160 | map = &qpt->map[0]; | ||
161 | offset = 2; | ||
162 | } | ||
163 | qpn = mk_qpn(qpt, map, offset); | ||
164 | } | ||
165 | |||
166 | ret = 0; | ||
167 | |||
168 | bail: | ||
169 | return ret; | ||
170 | } | ||
171 | |||
172 | static void free_qpn(struct ipath_qp_table *qpt, u32 qpn) | ||
173 | { | ||
174 | struct qpn_map *map; | ||
175 | |||
176 | map = qpt->map + qpn / BITS_PER_PAGE; | ||
177 | if (map->page) | ||
178 | clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); | ||
179 | atomic_inc(&map->n_free); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * ipath_alloc_qpn - allocate a QP number | ||
184 | * @qpt: the QP table | ||
185 | * @qp: the QP | ||
186 | * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special) | ||
187 | * | ||
188 | * Allocate the next available QPN and put the QP into the hash table. | ||
189 | * The hash table holds a reference to the QP. | ||
190 | */ | ||
191 | static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp, | ||
192 | enum ib_qp_type type) | ||
193 | { | ||
194 | unsigned long flags; | ||
195 | u32 qpn; | ||
196 | int ret; | ||
197 | |||
198 | if (type == IB_QPT_SMI) | ||
199 | qpn = 0; | ||
200 | else if (type == IB_QPT_GSI) | ||
201 | qpn = 1; | ||
202 | else { | ||
203 | /* Allocate the next available QPN */ | ||
204 | qpn = alloc_qpn(qpt); | ||
205 | if (qpn == 0) { | ||
206 | ret = -ENOMEM; | ||
207 | goto bail; | ||
208 | } | ||
209 | } | ||
210 | qp->ibqp.qp_num = qpn; | ||
211 | |||
212 | /* Add the QP to the hash table. */ | ||
213 | spin_lock_irqsave(&qpt->lock, flags); | ||
214 | |||
215 | qpn %= qpt->max; | ||
216 | qp->next = qpt->table[qpn]; | ||
217 | qpt->table[qpn] = qp; | ||
218 | atomic_inc(&qp->refcount); | ||
219 | |||
220 | spin_unlock_irqrestore(&qpt->lock, flags); | ||
221 | ret = 0; | ||
222 | |||
223 | bail: | ||
224 | return ret; | ||
225 | } | ||
226 | |||
227 | /** | ||
228 | * ipath_free_qp - remove a QP from the QP table | ||
229 | * @qpt: the QP table | ||
230 | * @qp: the QP to remove | ||
231 | * | ||
232 | * Remove the QP from the table so it can't be found asynchronously by | ||
233 | * the receive interrupt routine. | ||
234 | */ | ||
235 | static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp) | ||
236 | { | ||
237 | struct ipath_qp *q, **qpp; | ||
238 | unsigned long flags; | ||
239 | int fnd = 0; | ||
240 | |||
241 | spin_lock_irqsave(&qpt->lock, flags); | ||
242 | |||
243 | /* Remove QP from the hash table. */ | ||
244 | qpp = &qpt->table[qp->ibqp.qp_num % qpt->max]; | ||
245 | for (; (q = *qpp) != NULL; qpp = &q->next) { | ||
246 | if (q == qp) { | ||
247 | *qpp = qp->next; | ||
248 | qp->next = NULL; | ||
249 | atomic_dec(&qp->refcount); | ||
250 | fnd = 1; | ||
251 | break; | ||
252 | } | ||
253 | } | ||
254 | |||
255 | spin_unlock_irqrestore(&qpt->lock, flags); | ||
256 | |||
257 | if (!fnd) | ||
258 | return; | ||
259 | |||
260 | /* If QPN is not reserved, mark QPN free in the bitmap. */ | ||
261 | if (qp->ibqp.qp_num > 1) | ||
262 | free_qpn(qpt, qp->ibqp.qp_num); | ||
263 | |||
264 | wait_event(qp->wait, !atomic_read(&qp->refcount)); | ||
265 | } | ||
266 | |||
267 | /** | ||
268 | * ipath_free_all_qps - remove all QPs from the table | ||
269 | * @qpt: the QP table to empty | ||
270 | */ | ||
271 | void ipath_free_all_qps(struct ipath_qp_table *qpt) | ||
272 | { | ||
273 | unsigned long flags; | ||
274 | struct ipath_qp *qp, *nqp; | ||
275 | u32 n; | ||
276 | |||
277 | for (n = 0; n < qpt->max; n++) { | ||
278 | spin_lock_irqsave(&qpt->lock, flags); | ||
279 | qp = qpt->table[n]; | ||
280 | qpt->table[n] = NULL; | ||
281 | spin_unlock_irqrestore(&qpt->lock, flags); | ||
282 | |||
283 | while (qp) { | ||
284 | nqp = qp->next; | ||
285 | if (qp->ibqp.qp_num > 1) | ||
286 | free_qpn(qpt, qp->ibqp.qp_num); | ||
287 | if (!atomic_dec_and_test(&qp->refcount) || | ||
288 | !ipath_destroy_qp(&qp->ibqp)) | ||
289 | _VERBS_INFO("QP memory leak!\n"); | ||
290 | qp = nqp; | ||
291 | } | ||
292 | } | ||
293 | |||
294 | for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { | ||
295 | if (qpt->map[n].page) | ||
296 | free_page((unsigned long)qpt->map[n].page); | ||
297 | } | ||
298 | } | ||
299 | |||
300 | /** | ||
301 | * ipath_lookup_qpn - return the QP with the given QPN | ||
302 | * @qpt: the QP table | ||
303 | * @qpn: the QP number to look up | ||
304 | * | ||
305 | * The caller is responsible for decrementing the QP reference count | ||
306 | * when done. | ||
307 | */ | ||
308 | struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn) | ||
309 | { | ||
310 | unsigned long flags; | ||
311 | struct ipath_qp *qp; | ||
312 | |||
313 | spin_lock_irqsave(&qpt->lock, flags); | ||
314 | |||
315 | for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) { | ||
316 | if (qp->ibqp.qp_num == qpn) { | ||
317 | atomic_inc(&qp->refcount); | ||
318 | break; | ||
319 | } | ||
320 | } | ||
321 | |||
322 | spin_unlock_irqrestore(&qpt->lock, flags); | ||
323 | return qp; | ||
324 | } | ||
325 | |||
326 | /** | ||
327 | * ipath_reset_qp - initialize the QP state to the reset state | ||
328 | * @qp: the QP to reset | ||
329 | */ | ||
330 | static void ipath_reset_qp(struct ipath_qp *qp) | ||
331 | { | ||
332 | qp->remote_qpn = 0; | ||
333 | qp->qkey = 0; | ||
334 | qp->qp_access_flags = 0; | ||
335 | qp->s_hdrwords = 0; | ||
336 | qp->s_psn = 0; | ||
337 | qp->r_psn = 0; | ||
338 | atomic_set(&qp->msn, 0); | ||
339 | if (qp->ibqp.qp_type == IB_QPT_RC) { | ||
340 | qp->s_state = IB_OPCODE_RC_SEND_LAST; | ||
341 | qp->r_state = IB_OPCODE_RC_SEND_LAST; | ||
342 | } else { | ||
343 | qp->s_state = IB_OPCODE_UC_SEND_LAST; | ||
344 | qp->r_state = IB_OPCODE_UC_SEND_LAST; | ||
345 | } | ||
346 | qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; | ||
347 | qp->s_nak_state = 0; | ||
348 | qp->s_rnr_timeout = 0; | ||
349 | qp->s_head = 0; | ||
350 | qp->s_tail = 0; | ||
351 | qp->s_cur = 0; | ||
352 | qp->s_last = 0; | ||
353 | qp->s_ssn = 1; | ||
354 | qp->s_lsn = 0; | ||
355 | qp->r_rq.head = 0; | ||
356 | qp->r_rq.tail = 0; | ||
357 | qp->r_reuse_sge = 0; | ||
358 | } | ||
359 | |||
360 | /** | ||
361 | * ipath_error_qp - put a QP into an error state | ||
362 | * @qp: the QP to put into an error state | ||
363 | * | ||
364 | * Flushes both send and receive work queues. | ||
365 | * QP r_rq.lock and s_lock should be held. | ||
366 | */ | ||
367 | |||
368 | static void ipath_error_qp(struct ipath_qp *qp) | ||
369 | { | ||
370 | struct ipath_ibdev *dev = to_idev(qp->ibqp.device); | ||
371 | struct ib_wc wc; | ||
372 | |||
373 | _VERBS_INFO("QP%d/%d in error state\n", | ||
374 | qp->ibqp.qp_num, qp->remote_qpn); | ||
375 | |||
376 | spin_lock(&dev->pending_lock); | ||
377 | /* XXX What if its already removed by the timeout code? */ | ||
378 | if (qp->timerwait.next != LIST_POISON1) | ||
379 | list_del(&qp->timerwait); | ||
380 | if (qp->piowait.next != LIST_POISON1) | ||
381 | list_del(&qp->piowait); | ||
382 | spin_unlock(&dev->pending_lock); | ||
383 | |||
384 | wc.status = IB_WC_WR_FLUSH_ERR; | ||
385 | wc.vendor_err = 0; | ||
386 | wc.byte_len = 0; | ||
387 | wc.imm_data = 0; | ||
388 | wc.qp_num = qp->ibqp.qp_num; | ||
389 | wc.src_qp = 0; | ||
390 | wc.wc_flags = 0; | ||
391 | wc.pkey_index = 0; | ||
392 | wc.slid = 0; | ||
393 | wc.sl = 0; | ||
394 | wc.dlid_path_bits = 0; | ||
395 | wc.port_num = 0; | ||
396 | |||
397 | while (qp->s_last != qp->s_head) { | ||
398 | struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); | ||
399 | |||
400 | wc.wr_id = wqe->wr.wr_id; | ||
401 | wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; | ||
402 | if (++qp->s_last >= qp->s_size) | ||
403 | qp->s_last = 0; | ||
404 | ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); | ||
405 | } | ||
406 | qp->s_cur = qp->s_tail = qp->s_head; | ||
407 | qp->s_hdrwords = 0; | ||
408 | qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; | ||
409 | |||
410 | wc.opcode = IB_WC_RECV; | ||
411 | while (qp->r_rq.tail != qp->r_rq.head) { | ||
412 | wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id; | ||
413 | if (++qp->r_rq.tail >= qp->r_rq.size) | ||
414 | qp->r_rq.tail = 0; | ||
415 | ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); | ||
416 | } | ||
417 | } | ||
418 | |||
419 | /** | ||
420 | * ipath_modify_qp - modify the attributes of a queue pair | ||
421 | * @ibqp: the queue pair who's attributes we're modifying | ||
422 | * @attr: the new attributes | ||
423 | * @attr_mask: the mask of attributes to modify | ||
424 | * | ||
425 | * Returns 0 on success, otherwise returns an errno. | ||
426 | */ | ||
427 | int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, | ||
428 | int attr_mask) | ||
429 | { | ||
430 | struct ipath_qp *qp = to_iqp(ibqp); | ||
431 | enum ib_qp_state cur_state, new_state; | ||
432 | unsigned long flags; | ||
433 | int ret; | ||
434 | |||
435 | spin_lock_irqsave(&qp->r_rq.lock, flags); | ||
436 | spin_lock(&qp->s_lock); | ||
437 | |||
438 | cur_state = attr_mask & IB_QP_CUR_STATE ? | ||
439 | attr->cur_qp_state : qp->state; | ||
440 | new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; | ||
441 | |||
442 | if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, | ||
443 | attr_mask)) | ||
444 | goto inval; | ||
445 | |||
446 | switch (new_state) { | ||
447 | case IB_QPS_RESET: | ||
448 | ipath_reset_qp(qp); | ||
449 | break; | ||
450 | |||
451 | case IB_QPS_ERR: | ||
452 | ipath_error_qp(qp); | ||
453 | break; | ||
454 | |||
455 | default: | ||
456 | break; | ||
457 | |||
458 | } | ||
459 | |||
460 | if (attr_mask & IB_QP_PKEY_INDEX) { | ||
461 | struct ipath_ibdev *dev = to_idev(ibqp->device); | ||
462 | |||
463 | if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd)) | ||
464 | goto inval; | ||
465 | qp->s_pkey_index = attr->pkey_index; | ||
466 | } | ||
467 | |||
468 | if (attr_mask & IB_QP_DEST_QPN) | ||
469 | qp->remote_qpn = attr->dest_qp_num; | ||
470 | |||
471 | if (attr_mask & IB_QP_SQ_PSN) { | ||
472 | qp->s_next_psn = attr->sq_psn; | ||
473 | qp->s_last_psn = qp->s_next_psn - 1; | ||
474 | } | ||
475 | |||
476 | if (attr_mask & IB_QP_RQ_PSN) | ||
477 | qp->r_psn = attr->rq_psn; | ||
478 | |||
479 | if (attr_mask & IB_QP_ACCESS_FLAGS) | ||
480 | qp->qp_access_flags = attr->qp_access_flags; | ||
481 | |||
482 | if (attr_mask & IB_QP_AV) { | ||
483 | if (attr->ah_attr.dlid == 0 || | ||
484 | attr->ah_attr.dlid >= IPS_MULTICAST_LID_BASE) | ||
485 | goto inval; | ||
486 | qp->remote_ah_attr = attr->ah_attr; | ||
487 | } | ||
488 | |||
489 | if (attr_mask & IB_QP_PATH_MTU) | ||
490 | qp->path_mtu = attr->path_mtu; | ||
491 | |||
492 | if (attr_mask & IB_QP_RETRY_CNT) | ||
493 | qp->s_retry = qp->s_retry_cnt = attr->retry_cnt; | ||
494 | |||
495 | if (attr_mask & IB_QP_RNR_RETRY) { | ||
496 | qp->s_rnr_retry = attr->rnr_retry; | ||
497 | if (qp->s_rnr_retry > 7) | ||
498 | qp->s_rnr_retry = 7; | ||
499 | qp->s_rnr_retry_cnt = qp->s_rnr_retry; | ||
500 | } | ||
501 | |||
502 | if (attr_mask & IB_QP_MIN_RNR_TIMER) { | ||
503 | if (attr->min_rnr_timer > 31) | ||
504 | goto inval; | ||
505 | qp->s_min_rnr_timer = attr->min_rnr_timer; | ||
506 | } | ||
507 | |||
508 | if (attr_mask & IB_QP_QKEY) | ||
509 | qp->qkey = attr->qkey; | ||
510 | |||
511 | if (attr_mask & IB_QP_PKEY_INDEX) | ||
512 | qp->s_pkey_index = attr->pkey_index; | ||
513 | |||
514 | qp->state = new_state; | ||
515 | spin_unlock(&qp->s_lock); | ||
516 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
517 | |||
518 | /* | ||
519 | * If QP1 changed to the RTS state, try to move to the link to INIT | ||
520 | * even if it was ACTIVE so the SM will reinitialize the SMA's | ||
521 | * state. | ||
522 | */ | ||
523 | if (qp->ibqp.qp_num == 1 && new_state == IB_QPS_RTS) { | ||
524 | struct ipath_ibdev *dev = to_idev(ibqp->device); | ||
525 | |||
526 | ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); | ||
527 | } | ||
528 | ret = 0; | ||
529 | goto bail; | ||
530 | |||
531 | inval: | ||
532 | spin_unlock(&qp->s_lock); | ||
533 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
534 | ret = -EINVAL; | ||
535 | |||
536 | bail: | ||
537 | return ret; | ||
538 | } | ||
539 | |||
540 | int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, | ||
541 | int attr_mask, struct ib_qp_init_attr *init_attr) | ||
542 | { | ||
543 | struct ipath_qp *qp = to_iqp(ibqp); | ||
544 | |||
545 | attr->qp_state = qp->state; | ||
546 | attr->cur_qp_state = attr->qp_state; | ||
547 | attr->path_mtu = qp->path_mtu; | ||
548 | attr->path_mig_state = 0; | ||
549 | attr->qkey = qp->qkey; | ||
550 | attr->rq_psn = qp->r_psn; | ||
551 | attr->sq_psn = qp->s_next_psn; | ||
552 | attr->dest_qp_num = qp->remote_qpn; | ||
553 | attr->qp_access_flags = qp->qp_access_flags; | ||
554 | attr->cap.max_send_wr = qp->s_size - 1; | ||
555 | attr->cap.max_recv_wr = qp->r_rq.size - 1; | ||
556 | attr->cap.max_send_sge = qp->s_max_sge; | ||
557 | attr->cap.max_recv_sge = qp->r_rq.max_sge; | ||
558 | attr->cap.max_inline_data = 0; | ||
559 | attr->ah_attr = qp->remote_ah_attr; | ||
560 | memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr)); | ||
561 | attr->pkey_index = qp->s_pkey_index; | ||
562 | attr->alt_pkey_index = 0; | ||
563 | attr->en_sqd_async_notify = 0; | ||
564 | attr->sq_draining = 0; | ||
565 | attr->max_rd_atomic = 1; | ||
566 | attr->max_dest_rd_atomic = 1; | ||
567 | attr->min_rnr_timer = qp->s_min_rnr_timer; | ||
568 | attr->port_num = 1; | ||
569 | attr->timeout = 0; | ||
570 | attr->retry_cnt = qp->s_retry_cnt; | ||
571 | attr->rnr_retry = qp->s_rnr_retry; | ||
572 | attr->alt_port_num = 0; | ||
573 | attr->alt_timeout = 0; | ||
574 | |||
575 | init_attr->event_handler = qp->ibqp.event_handler; | ||
576 | init_attr->qp_context = qp->ibqp.qp_context; | ||
577 | init_attr->send_cq = qp->ibqp.send_cq; | ||
578 | init_attr->recv_cq = qp->ibqp.recv_cq; | ||
579 | init_attr->srq = qp->ibqp.srq; | ||
580 | init_attr->cap = attr->cap; | ||
581 | init_attr->sq_sig_type = | ||
582 | (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) | ||
583 | ? IB_SIGNAL_REQ_WR : 0; | ||
584 | init_attr->qp_type = qp->ibqp.qp_type; | ||
585 | init_attr->port_num = 1; | ||
586 | return 0; | ||
587 | } | ||
588 | |||
589 | /** | ||
590 | * ipath_compute_aeth - compute the AETH (syndrome + MSN) | ||
591 | * @qp: the queue pair to compute the AETH for | ||
592 | * | ||
593 | * Returns the AETH. | ||
594 | * | ||
595 | * The QP s_lock should be held. | ||
596 | */ | ||
597 | __be32 ipath_compute_aeth(struct ipath_qp *qp) | ||
598 | { | ||
599 | u32 aeth = atomic_read(&qp->msn) & IPS_MSN_MASK; | ||
600 | |||
601 | if (qp->s_nak_state) { | ||
602 | aeth |= qp->s_nak_state << IPS_AETH_CREDIT_SHIFT; | ||
603 | } else if (qp->ibqp.srq) { | ||
604 | /* | ||
605 | * Shared receive queues don't generate credits. | ||
606 | * Set the credit field to the invalid value. | ||
607 | */ | ||
608 | aeth |= IPS_AETH_CREDIT_INVAL << IPS_AETH_CREDIT_SHIFT; | ||
609 | } else { | ||
610 | u32 min, max, x; | ||
611 | u32 credits; | ||
612 | |||
613 | /* | ||
614 | * Compute the number of credits available (RWQEs). | ||
615 | * XXX Not holding the r_rq.lock here so there is a small | ||
616 | * chance that the pair of reads are not atomic. | ||
617 | */ | ||
618 | credits = qp->r_rq.head - qp->r_rq.tail; | ||
619 | if ((int)credits < 0) | ||
620 | credits += qp->r_rq.size; | ||
621 | /* | ||
622 | * Binary search the credit table to find the code to | ||
623 | * use. | ||
624 | */ | ||
625 | min = 0; | ||
626 | max = 31; | ||
627 | for (;;) { | ||
628 | x = (min + max) / 2; | ||
629 | if (credit_table[x] == credits) | ||
630 | break; | ||
631 | if (credit_table[x] > credits) | ||
632 | max = x; | ||
633 | else if (min == x) | ||
634 | break; | ||
635 | else | ||
636 | min = x; | ||
637 | } | ||
638 | aeth |= x << IPS_AETH_CREDIT_SHIFT; | ||
639 | } | ||
640 | return cpu_to_be32(aeth); | ||
641 | } | ||
642 | |||
643 | /** | ||
644 | * ipath_create_qp - create a queue pair for a device | ||
645 | * @ibpd: the protection domain who's device we create the queue pair for | ||
646 | * @init_attr: the attributes of the queue pair | ||
647 | * @udata: unused by InfiniPath | ||
648 | * | ||
649 | * Returns the queue pair on success, otherwise returns an errno. | ||
650 | * | ||
651 | * Called by the ib_create_qp() core verbs function. | ||
652 | */ | ||
653 | struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, | ||
654 | struct ib_qp_init_attr *init_attr, | ||
655 | struct ib_udata *udata) | ||
656 | { | ||
657 | struct ipath_qp *qp; | ||
658 | int err; | ||
659 | struct ipath_swqe *swq = NULL; | ||
660 | struct ipath_ibdev *dev; | ||
661 | size_t sz; | ||
662 | struct ib_qp *ret; | ||
663 | |||
664 | if (init_attr->cap.max_send_sge > 255 || | ||
665 | init_attr->cap.max_recv_sge > 255) { | ||
666 | ret = ERR_PTR(-ENOMEM); | ||
667 | goto bail; | ||
668 | } | ||
669 | |||
670 | switch (init_attr->qp_type) { | ||
671 | case IB_QPT_UC: | ||
672 | case IB_QPT_RC: | ||
673 | sz = sizeof(struct ipath_sge) * | ||
674 | init_attr->cap.max_send_sge + | ||
675 | sizeof(struct ipath_swqe); | ||
676 | swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); | ||
677 | if (swq == NULL) { | ||
678 | ret = ERR_PTR(-ENOMEM); | ||
679 | goto bail; | ||
680 | } | ||
681 | /* FALLTHROUGH */ | ||
682 | case IB_QPT_UD: | ||
683 | case IB_QPT_SMI: | ||
684 | case IB_QPT_GSI: | ||
685 | qp = kmalloc(sizeof(*qp), GFP_KERNEL); | ||
686 | if (!qp) { | ||
687 | ret = ERR_PTR(-ENOMEM); | ||
688 | goto bail; | ||
689 | } | ||
690 | qp->r_rq.size = init_attr->cap.max_recv_wr + 1; | ||
691 | sz = sizeof(struct ipath_sge) * | ||
692 | init_attr->cap.max_recv_sge + | ||
693 | sizeof(struct ipath_rwqe); | ||
694 | qp->r_rq.wq = vmalloc(qp->r_rq.size * sz); | ||
695 | if (!qp->r_rq.wq) { | ||
696 | kfree(qp); | ||
697 | ret = ERR_PTR(-ENOMEM); | ||
698 | goto bail; | ||
699 | } | ||
700 | |||
701 | /* | ||
702 | * ib_create_qp() will initialize qp->ibqp | ||
703 | * except for qp->ibqp.qp_num. | ||
704 | */ | ||
705 | spin_lock_init(&qp->s_lock); | ||
706 | spin_lock_init(&qp->r_rq.lock); | ||
707 | atomic_set(&qp->refcount, 0); | ||
708 | init_waitqueue_head(&qp->wait); | ||
709 | tasklet_init(&qp->s_task, | ||
710 | init_attr->qp_type == IB_QPT_RC ? | ||
711 | ipath_do_rc_send : ipath_do_uc_send, | ||
712 | (unsigned long)qp); | ||
713 | qp->piowait.next = LIST_POISON1; | ||
714 | qp->piowait.prev = LIST_POISON2; | ||
715 | qp->timerwait.next = LIST_POISON1; | ||
716 | qp->timerwait.prev = LIST_POISON2; | ||
717 | qp->state = IB_QPS_RESET; | ||
718 | qp->s_wq = swq; | ||
719 | qp->s_size = init_attr->cap.max_send_wr + 1; | ||
720 | qp->s_max_sge = init_attr->cap.max_send_sge; | ||
721 | qp->r_rq.max_sge = init_attr->cap.max_recv_sge; | ||
722 | qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ? | ||
723 | 1 << IPATH_S_SIGNAL_REQ_WR : 0; | ||
724 | dev = to_idev(ibpd->device); | ||
725 | err = ipath_alloc_qpn(&dev->qp_table, qp, | ||
726 | init_attr->qp_type); | ||
727 | if (err) { | ||
728 | vfree(swq); | ||
729 | vfree(qp->r_rq.wq); | ||
730 | kfree(qp); | ||
731 | ret = ERR_PTR(err); | ||
732 | goto bail; | ||
733 | } | ||
734 | ipath_reset_qp(qp); | ||
735 | |||
736 | /* Tell the core driver that the kernel SMA is present. */ | ||
737 | if (qp->ibqp.qp_type == IB_QPT_SMI) | ||
738 | ipath_layer_set_verbs_flags(dev->dd, | ||
739 | IPATH_VERBS_KERNEL_SMA); | ||
740 | break; | ||
741 | |||
742 | default: | ||
743 | /* Don't support raw QPs */ | ||
744 | ret = ERR_PTR(-ENOSYS); | ||
745 | goto bail; | ||
746 | } | ||
747 | |||
748 | init_attr->cap.max_inline_data = 0; | ||
749 | |||
750 | ret = &qp->ibqp; | ||
751 | |||
752 | bail: | ||
753 | return ret; | ||
754 | } | ||
755 | |||
756 | /** | ||
757 | * ipath_destroy_qp - destroy a queue pair | ||
758 | * @ibqp: the queue pair to destroy | ||
759 | * | ||
760 | * Returns 0 on success. | ||
761 | * | ||
762 | * Note that this can be called while the QP is actively sending or | ||
763 | * receiving! | ||
764 | */ | ||
765 | int ipath_destroy_qp(struct ib_qp *ibqp) | ||
766 | { | ||
767 | struct ipath_qp *qp = to_iqp(ibqp); | ||
768 | struct ipath_ibdev *dev = to_idev(ibqp->device); | ||
769 | unsigned long flags; | ||
770 | |||
771 | /* Tell the core driver that the kernel SMA is gone. */ | ||
772 | if (qp->ibqp.qp_type == IB_QPT_SMI) | ||
773 | ipath_layer_set_verbs_flags(dev->dd, 0); | ||
774 | |||
775 | spin_lock_irqsave(&qp->r_rq.lock, flags); | ||
776 | spin_lock(&qp->s_lock); | ||
777 | qp->state = IB_QPS_ERR; | ||
778 | spin_unlock(&qp->s_lock); | ||
779 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
780 | |||
781 | /* Stop the sending tasklet. */ | ||
782 | tasklet_kill(&qp->s_task); | ||
783 | |||
784 | /* Make sure the QP isn't on the timeout list. */ | ||
785 | spin_lock_irqsave(&dev->pending_lock, flags); | ||
786 | if (qp->timerwait.next != LIST_POISON1) | ||
787 | list_del(&qp->timerwait); | ||
788 | if (qp->piowait.next != LIST_POISON1) | ||
789 | list_del(&qp->piowait); | ||
790 | spin_unlock_irqrestore(&dev->pending_lock, flags); | ||
791 | |||
792 | /* | ||
793 | * Make sure that the QP is not in the QPN table so receive | ||
794 | * interrupts will discard packets for this QP. XXX Also remove QP | ||
795 | * from multicast table. | ||
796 | */ | ||
797 | if (atomic_read(&qp->refcount) != 0) | ||
798 | ipath_free_qp(&dev->qp_table, qp); | ||
799 | |||
800 | vfree(qp->s_wq); | ||
801 | vfree(qp->r_rq.wq); | ||
802 | kfree(qp); | ||
803 | return 0; | ||
804 | } | ||
805 | |||
806 | /** | ||
807 | * ipath_init_qp_table - initialize the QP table for a device | ||
808 | * @idev: the device who's QP table we're initializing | ||
809 | * @size: the size of the QP table | ||
810 | * | ||
811 | * Returns 0 on success, otherwise returns an errno. | ||
812 | */ | ||
813 | int ipath_init_qp_table(struct ipath_ibdev *idev, int size) | ||
814 | { | ||
815 | int i; | ||
816 | int ret; | ||
817 | |||
818 | idev->qp_table.last = 1; /* QPN 0 and 1 are special. */ | ||
819 | idev->qp_table.max = size; | ||
820 | idev->qp_table.nmaps = 1; | ||
821 | idev->qp_table.table = kzalloc(size * sizeof(*idev->qp_table.table), | ||
822 | GFP_KERNEL); | ||
823 | if (idev->qp_table.table == NULL) { | ||
824 | ret = -ENOMEM; | ||
825 | goto bail; | ||
826 | } | ||
827 | |||
828 | for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) { | ||
829 | atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE); | ||
830 | idev->qp_table.map[i].page = NULL; | ||
831 | } | ||
832 | |||
833 | ret = 0; | ||
834 | |||
835 | bail: | ||
836 | return ret; | ||
837 | } | ||
838 | |||
839 | /** | ||
840 | * ipath_sqerror_qp - put a QP's send queue into an error state | ||
841 | * @qp: QP who's send queue will be put into an error state | ||
842 | * @wc: the WC responsible for putting the QP in this state | ||
843 | * | ||
844 | * Flushes the send work queue. | ||
845 | * The QP s_lock should be held. | ||
846 | */ | ||
847 | |||
848 | void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc) | ||
849 | { | ||
850 | struct ipath_ibdev *dev = to_idev(qp->ibqp.device); | ||
851 | struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); | ||
852 | |||
853 | _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n", | ||
854 | qp->ibqp.qp_num, qp->remote_qpn, wc->status); | ||
855 | |||
856 | spin_lock(&dev->pending_lock); | ||
857 | /* XXX What if its already removed by the timeout code? */ | ||
858 | if (qp->timerwait.next != LIST_POISON1) | ||
859 | list_del(&qp->timerwait); | ||
860 | if (qp->piowait.next != LIST_POISON1) | ||
861 | list_del(&qp->piowait); | ||
862 | spin_unlock(&dev->pending_lock); | ||
863 | |||
864 | ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); | ||
865 | if (++qp->s_last >= qp->s_size) | ||
866 | qp->s_last = 0; | ||
867 | |||
868 | wc->status = IB_WC_WR_FLUSH_ERR; | ||
869 | |||
870 | while (qp->s_last != qp->s_head) { | ||
871 | wc->wr_id = wqe->wr.wr_id; | ||
872 | wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; | ||
873 | ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1); | ||
874 | if (++qp->s_last >= qp->s_size) | ||
875 | qp->s_last = 0; | ||
876 | wqe = get_swqe_ptr(qp, qp->s_last); | ||
877 | } | ||
878 | qp->s_cur = qp->s_tail = qp->s_head; | ||
879 | qp->state = IB_QPS_SQE; | ||
880 | } | ||
881 | |||
882 | /** | ||
883 | * ipath_get_credit - flush the send work queue of a QP | ||
884 | * @qp: the qp who's send work queue to flush | ||
885 | * @aeth: the Acknowledge Extended Transport Header | ||
886 | * | ||
887 | * The QP s_lock should be held. | ||
888 | */ | ||
889 | void ipath_get_credit(struct ipath_qp *qp, u32 aeth) | ||
890 | { | ||
891 | u32 credit = (aeth >> IPS_AETH_CREDIT_SHIFT) & IPS_AETH_CREDIT_MASK; | ||
892 | |||
893 | /* | ||
894 | * If the credit is invalid, we can send | ||
895 | * as many packets as we like. Otherwise, we have to | ||
896 | * honor the credit field. | ||
897 | */ | ||
898 | if (credit == IPS_AETH_CREDIT_INVAL) { | ||
899 | qp->s_lsn = (u32) -1; | ||
900 | } else if (qp->s_lsn != (u32) -1) { | ||
901 | /* Compute new LSN (i.e., MSN + credit) */ | ||
902 | credit = (aeth + credit_table[credit]) & IPS_MSN_MASK; | ||
903 | if (ipath_cmp24(credit, qp->s_lsn) > 0) | ||
904 | qp->s_lsn = credit; | ||
905 | } | ||
906 | |||
907 | /* Restart sending if it was blocked due to lack of credits. */ | ||
908 | if (qp->s_cur != qp->s_head && | ||
909 | (qp->s_lsn == (u32) -1 || | ||
910 | ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, | ||
911 | qp->s_lsn + 1) <= 0)) | ||
912 | tasklet_hi_schedule(&qp->s_task); | ||
913 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c new file mode 100644 index 000000000000..a4055ca00614 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_rc.c | |||
@@ -0,0 +1,1857 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include "ipath_verbs.h" | ||
34 | #include "ips_common.h" | ||
35 | |||
36 | /* cut down ridiculously long IB macro names */ | ||
37 | #define OP(x) IB_OPCODE_RC_##x | ||
38 | |||
39 | /** | ||
40 | * ipath_init_restart- initialize the qp->s_sge after a restart | ||
41 | * @qp: the QP who's SGE we're restarting | ||
42 | * @wqe: the work queue to initialize the QP's SGE from | ||
43 | * | ||
44 | * The QP s_lock should be held. | ||
45 | */ | ||
46 | static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe) | ||
47 | { | ||
48 | struct ipath_ibdev *dev; | ||
49 | u32 len; | ||
50 | |||
51 | len = ((qp->s_psn - wqe->psn) & IPS_PSN_MASK) * | ||
52 | ib_mtu_enum_to_int(qp->path_mtu); | ||
53 | qp->s_sge.sge = wqe->sg_list[0]; | ||
54 | qp->s_sge.sg_list = wqe->sg_list + 1; | ||
55 | qp->s_sge.num_sge = wqe->wr.num_sge; | ||
56 | ipath_skip_sge(&qp->s_sge, len); | ||
57 | qp->s_len = wqe->length - len; | ||
58 | dev = to_idev(qp->ibqp.device); | ||
59 | spin_lock(&dev->pending_lock); | ||
60 | if (qp->timerwait.next == LIST_POISON1) | ||
61 | list_add_tail(&qp->timerwait, | ||
62 | &dev->pending[dev->pending_index]); | ||
63 | spin_unlock(&dev->pending_lock); | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) | ||
68 | * @qp: a pointer to the QP | ||
69 | * @ohdr: a pointer to the IB header being constructed | ||
70 | * @pmtu: the path MTU | ||
71 | * | ||
72 | * Return bth0 if constructed; otherwise, return 0. | ||
73 | * Note the QP s_lock must be held. | ||
74 | */ | ||
75 | static inline u32 ipath_make_rc_ack(struct ipath_qp *qp, | ||
76 | struct ipath_other_headers *ohdr, | ||
77 | u32 pmtu) | ||
78 | { | ||
79 | struct ipath_sge_state *ss; | ||
80 | u32 hwords; | ||
81 | u32 len; | ||
82 | u32 bth0; | ||
83 | |||
84 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | ||
85 | hwords = 5; | ||
86 | |||
87 | /* | ||
88 | * Send a response. Note that we are in the responder's | ||
89 | * side of the QP context. | ||
90 | */ | ||
91 | switch (qp->s_ack_state) { | ||
92 | case OP(RDMA_READ_REQUEST): | ||
93 | ss = &qp->s_rdma_sge; | ||
94 | len = qp->s_rdma_len; | ||
95 | if (len > pmtu) { | ||
96 | len = pmtu; | ||
97 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); | ||
98 | } | ||
99 | else | ||
100 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); | ||
101 | qp->s_rdma_len -= len; | ||
102 | bth0 = qp->s_ack_state << 24; | ||
103 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
104 | hwords++; | ||
105 | break; | ||
106 | |||
107 | case OP(RDMA_READ_RESPONSE_FIRST): | ||
108 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); | ||
109 | /* FALLTHROUGH */ | ||
110 | case OP(RDMA_READ_RESPONSE_MIDDLE): | ||
111 | ss = &qp->s_rdma_sge; | ||
112 | len = qp->s_rdma_len; | ||
113 | if (len > pmtu) | ||
114 | len = pmtu; | ||
115 | else { | ||
116 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
117 | hwords++; | ||
118 | qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); | ||
119 | } | ||
120 | qp->s_rdma_len -= len; | ||
121 | bth0 = qp->s_ack_state << 24; | ||
122 | break; | ||
123 | |||
124 | case OP(RDMA_READ_RESPONSE_LAST): | ||
125 | case OP(RDMA_READ_RESPONSE_ONLY): | ||
126 | /* | ||
127 | * We have to prevent new requests from changing | ||
128 | * the r_sge state while a ipath_verbs_send() | ||
129 | * is in progress. | ||
130 | * Changing r_state allows the receiver | ||
131 | * to continue processing new packets. | ||
132 | * We do it here now instead of above so | ||
133 | * that we are sure the packet was sent before | ||
134 | * changing the state. | ||
135 | */ | ||
136 | qp->r_state = OP(RDMA_READ_RESPONSE_LAST); | ||
137 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
138 | return 0; | ||
139 | |||
140 | case OP(COMPARE_SWAP): | ||
141 | case OP(FETCH_ADD): | ||
142 | ss = NULL; | ||
143 | len = 0; | ||
144 | qp->r_state = OP(SEND_LAST); | ||
145 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
146 | bth0 = IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; | ||
147 | ohdr->u.at.aeth = ipath_compute_aeth(qp); | ||
148 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); | ||
149 | hwords += sizeof(ohdr->u.at) / 4; | ||
150 | break; | ||
151 | |||
152 | default: | ||
153 | /* Send a regular ACK. */ | ||
154 | ss = NULL; | ||
155 | len = 0; | ||
156 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
157 | bth0 = qp->s_ack_state << 24; | ||
158 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
159 | hwords++; | ||
160 | } | ||
161 | qp->s_hdrwords = hwords; | ||
162 | qp->s_cur_sge = ss; | ||
163 | qp->s_cur_size = len; | ||
164 | |||
165 | return bth0; | ||
166 | } | ||
167 | |||
168 | /** | ||
169 | * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) | ||
170 | * @qp: a pointer to the QP | ||
171 | * @ohdr: a pointer to the IB header being constructed | ||
172 | * @pmtu: the path MTU | ||
173 | * @bth0p: pointer to the BTH opcode word | ||
174 | * @bth2p: pointer to the BTH PSN word | ||
175 | * | ||
176 | * Return 1 if constructed; otherwise, return 0. | ||
177 | * Note the QP s_lock must be held. | ||
178 | */ | ||
179 | static inline int ipath_make_rc_req(struct ipath_qp *qp, | ||
180 | struct ipath_other_headers *ohdr, | ||
181 | u32 pmtu, u32 *bth0p, u32 *bth2p) | ||
182 | { | ||
183 | struct ipath_ibdev *dev = to_idev(qp->ibqp.device); | ||
184 | struct ipath_sge_state *ss; | ||
185 | struct ipath_swqe *wqe; | ||
186 | u32 hwords; | ||
187 | u32 len; | ||
188 | u32 bth0; | ||
189 | u32 bth2; | ||
190 | char newreq; | ||
191 | |||
192 | if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || | ||
193 | qp->s_rnr_timeout) | ||
194 | goto done; | ||
195 | |||
196 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | ||
197 | hwords = 5; | ||
198 | bth0 = 0; | ||
199 | |||
200 | /* Send a request. */ | ||
201 | wqe = get_swqe_ptr(qp, qp->s_cur); | ||
202 | switch (qp->s_state) { | ||
203 | default: | ||
204 | /* | ||
205 | * Resend an old request or start a new one. | ||
206 | * | ||
207 | * We keep track of the current SWQE so that | ||
208 | * we don't reset the "furthest progress" state | ||
209 | * if we need to back up. | ||
210 | */ | ||
211 | newreq = 0; | ||
212 | if (qp->s_cur == qp->s_tail) { | ||
213 | /* Check if send work queue is empty. */ | ||
214 | if (qp->s_tail == qp->s_head) | ||
215 | goto done; | ||
216 | qp->s_psn = wqe->psn = qp->s_next_psn; | ||
217 | newreq = 1; | ||
218 | } | ||
219 | /* | ||
220 | * Note that we have to be careful not to modify the | ||
221 | * original work request since we may need to resend | ||
222 | * it. | ||
223 | */ | ||
224 | qp->s_sge.sge = wqe->sg_list[0]; | ||
225 | qp->s_sge.sg_list = wqe->sg_list + 1; | ||
226 | qp->s_sge.num_sge = wqe->wr.num_sge; | ||
227 | qp->s_len = len = wqe->length; | ||
228 | ss = &qp->s_sge; | ||
229 | bth2 = 0; | ||
230 | switch (wqe->wr.opcode) { | ||
231 | case IB_WR_SEND: | ||
232 | case IB_WR_SEND_WITH_IMM: | ||
233 | /* If no credit, return. */ | ||
234 | if (qp->s_lsn != (u32) -1 && | ||
235 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) | ||
236 | goto done; | ||
237 | wqe->lpsn = wqe->psn; | ||
238 | if (len > pmtu) { | ||
239 | wqe->lpsn += (len - 1) / pmtu; | ||
240 | qp->s_state = OP(SEND_FIRST); | ||
241 | len = pmtu; | ||
242 | break; | ||
243 | } | ||
244 | if (wqe->wr.opcode == IB_WR_SEND) | ||
245 | qp->s_state = OP(SEND_ONLY); | ||
246 | else { | ||
247 | qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); | ||
248 | /* Immediate data comes after the BTH */ | ||
249 | ohdr->u.imm_data = wqe->wr.imm_data; | ||
250 | hwords += 1; | ||
251 | } | ||
252 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | ||
253 | bth0 |= 1 << 23; | ||
254 | bth2 = 1 << 31; /* Request ACK. */ | ||
255 | if (++qp->s_cur == qp->s_size) | ||
256 | qp->s_cur = 0; | ||
257 | break; | ||
258 | |||
259 | case IB_WR_RDMA_WRITE: | ||
260 | if (newreq) | ||
261 | qp->s_lsn++; | ||
262 | /* FALLTHROUGH */ | ||
263 | case IB_WR_RDMA_WRITE_WITH_IMM: | ||
264 | /* If no credit, return. */ | ||
265 | if (qp->s_lsn != (u32) -1 && | ||
266 | ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) | ||
267 | goto done; | ||
268 | ohdr->u.rc.reth.vaddr = | ||
269 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | ||
270 | ohdr->u.rc.reth.rkey = | ||
271 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | ||
272 | ohdr->u.rc.reth.length = cpu_to_be32(len); | ||
273 | hwords += sizeof(struct ib_reth) / 4; | ||
274 | wqe->lpsn = wqe->psn; | ||
275 | if (len > pmtu) { | ||
276 | wqe->lpsn += (len - 1) / pmtu; | ||
277 | qp->s_state = OP(RDMA_WRITE_FIRST); | ||
278 | len = pmtu; | ||
279 | break; | ||
280 | } | ||
281 | if (wqe->wr.opcode == IB_WR_RDMA_WRITE) | ||
282 | qp->s_state = OP(RDMA_WRITE_ONLY); | ||
283 | else { | ||
284 | qp->s_state = | ||
285 | OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); | ||
286 | /* Immediate data comes | ||
287 | * after RETH */ | ||
288 | ohdr->u.rc.imm_data = wqe->wr.imm_data; | ||
289 | hwords += 1; | ||
290 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | ||
291 | bth0 |= 1 << 23; | ||
292 | } | ||
293 | bth2 = 1 << 31; /* Request ACK. */ | ||
294 | if (++qp->s_cur == qp->s_size) | ||
295 | qp->s_cur = 0; | ||
296 | break; | ||
297 | |||
298 | case IB_WR_RDMA_READ: | ||
299 | ohdr->u.rc.reth.vaddr = | ||
300 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | ||
301 | ohdr->u.rc.reth.rkey = | ||
302 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | ||
303 | ohdr->u.rc.reth.length = cpu_to_be32(len); | ||
304 | qp->s_state = OP(RDMA_READ_REQUEST); | ||
305 | hwords += sizeof(ohdr->u.rc.reth) / 4; | ||
306 | if (newreq) { | ||
307 | qp->s_lsn++; | ||
308 | /* | ||
309 | * Adjust s_next_psn to count the | ||
310 | * expected number of responses. | ||
311 | */ | ||
312 | if (len > pmtu) | ||
313 | qp->s_next_psn += (len - 1) / pmtu; | ||
314 | wqe->lpsn = qp->s_next_psn++; | ||
315 | } | ||
316 | ss = NULL; | ||
317 | len = 0; | ||
318 | if (++qp->s_cur == qp->s_size) | ||
319 | qp->s_cur = 0; | ||
320 | break; | ||
321 | |||
322 | case IB_WR_ATOMIC_CMP_AND_SWP: | ||
323 | case IB_WR_ATOMIC_FETCH_AND_ADD: | ||
324 | if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) | ||
325 | qp->s_state = OP(COMPARE_SWAP); | ||
326 | else | ||
327 | qp->s_state = OP(FETCH_ADD); | ||
328 | ohdr->u.atomic_eth.vaddr = cpu_to_be64( | ||
329 | wqe->wr.wr.atomic.remote_addr); | ||
330 | ohdr->u.atomic_eth.rkey = cpu_to_be32( | ||
331 | wqe->wr.wr.atomic.rkey); | ||
332 | ohdr->u.atomic_eth.swap_data = cpu_to_be64( | ||
333 | wqe->wr.wr.atomic.swap); | ||
334 | ohdr->u.atomic_eth.compare_data = cpu_to_be64( | ||
335 | wqe->wr.wr.atomic.compare_add); | ||
336 | hwords += sizeof(struct ib_atomic_eth) / 4; | ||
337 | if (newreq) { | ||
338 | qp->s_lsn++; | ||
339 | wqe->lpsn = wqe->psn; | ||
340 | } | ||
341 | if (++qp->s_cur == qp->s_size) | ||
342 | qp->s_cur = 0; | ||
343 | ss = NULL; | ||
344 | len = 0; | ||
345 | break; | ||
346 | |||
347 | default: | ||
348 | goto done; | ||
349 | } | ||
350 | if (newreq) { | ||
351 | qp->s_tail++; | ||
352 | if (qp->s_tail >= qp->s_size) | ||
353 | qp->s_tail = 0; | ||
354 | } | ||
355 | bth2 |= qp->s_psn++ & IPS_PSN_MASK; | ||
356 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | ||
357 | qp->s_next_psn = qp->s_psn; | ||
358 | spin_lock(&dev->pending_lock); | ||
359 | if (qp->timerwait.next == LIST_POISON1) | ||
360 | list_add_tail(&qp->timerwait, | ||
361 | &dev->pending[dev->pending_index]); | ||
362 | spin_unlock(&dev->pending_lock); | ||
363 | break; | ||
364 | |||
365 | case OP(RDMA_READ_RESPONSE_FIRST): | ||
366 | /* | ||
367 | * This case can only happen if a send is restarted. See | ||
368 | * ipath_restart_rc(). | ||
369 | */ | ||
370 | ipath_init_restart(qp, wqe); | ||
371 | /* FALLTHROUGH */ | ||
372 | case OP(SEND_FIRST): | ||
373 | qp->s_state = OP(SEND_MIDDLE); | ||
374 | /* FALLTHROUGH */ | ||
375 | case OP(SEND_MIDDLE): | ||
376 | bth2 = qp->s_psn++ & IPS_PSN_MASK; | ||
377 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | ||
378 | qp->s_next_psn = qp->s_psn; | ||
379 | ss = &qp->s_sge; | ||
380 | len = qp->s_len; | ||
381 | if (len > pmtu) { | ||
382 | /* | ||
383 | * Request an ACK every 1/2 MB to avoid retransmit | ||
384 | * timeouts. | ||
385 | */ | ||
386 | if (((wqe->length - len) % (512 * 1024)) == 0) | ||
387 | bth2 |= 1 << 31; | ||
388 | len = pmtu; | ||
389 | break; | ||
390 | } | ||
391 | if (wqe->wr.opcode == IB_WR_SEND) | ||
392 | qp->s_state = OP(SEND_LAST); | ||
393 | else { | ||
394 | qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); | ||
395 | /* Immediate data comes after the BTH */ | ||
396 | ohdr->u.imm_data = wqe->wr.imm_data; | ||
397 | hwords += 1; | ||
398 | } | ||
399 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | ||
400 | bth0 |= 1 << 23; | ||
401 | bth2 |= 1 << 31; /* Request ACK. */ | ||
402 | qp->s_cur++; | ||
403 | if (qp->s_cur >= qp->s_size) | ||
404 | qp->s_cur = 0; | ||
405 | break; | ||
406 | |||
407 | case OP(RDMA_READ_RESPONSE_LAST): | ||
408 | /* | ||
409 | * This case can only happen if a RDMA write is restarted. | ||
410 | * See ipath_restart_rc(). | ||
411 | */ | ||
412 | ipath_init_restart(qp, wqe); | ||
413 | /* FALLTHROUGH */ | ||
414 | case OP(RDMA_WRITE_FIRST): | ||
415 | qp->s_state = OP(RDMA_WRITE_MIDDLE); | ||
416 | /* FALLTHROUGH */ | ||
417 | case OP(RDMA_WRITE_MIDDLE): | ||
418 | bth2 = qp->s_psn++ & IPS_PSN_MASK; | ||
419 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | ||
420 | qp->s_next_psn = qp->s_psn; | ||
421 | ss = &qp->s_sge; | ||
422 | len = qp->s_len; | ||
423 | if (len > pmtu) { | ||
424 | /* | ||
425 | * Request an ACK every 1/2 MB to avoid retransmit | ||
426 | * timeouts. | ||
427 | */ | ||
428 | if (((wqe->length - len) % (512 * 1024)) == 0) | ||
429 | bth2 |= 1 << 31; | ||
430 | len = pmtu; | ||
431 | break; | ||
432 | } | ||
433 | if (wqe->wr.opcode == IB_WR_RDMA_WRITE) | ||
434 | qp->s_state = OP(RDMA_WRITE_LAST); | ||
435 | else { | ||
436 | qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); | ||
437 | /* Immediate data comes after the BTH */ | ||
438 | ohdr->u.imm_data = wqe->wr.imm_data; | ||
439 | hwords += 1; | ||
440 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | ||
441 | bth0 |= 1 << 23; | ||
442 | } | ||
443 | bth2 |= 1 << 31; /* Request ACK. */ | ||
444 | qp->s_cur++; | ||
445 | if (qp->s_cur >= qp->s_size) | ||
446 | qp->s_cur = 0; | ||
447 | break; | ||
448 | |||
449 | case OP(RDMA_READ_RESPONSE_MIDDLE): | ||
450 | /* | ||
451 | * This case can only happen if a RDMA read is restarted. | ||
452 | * See ipath_restart_rc(). | ||
453 | */ | ||
454 | ipath_init_restart(qp, wqe); | ||
455 | len = ((qp->s_psn - wqe->psn) & IPS_PSN_MASK) * pmtu; | ||
456 | ohdr->u.rc.reth.vaddr = | ||
457 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); | ||
458 | ohdr->u.rc.reth.rkey = | ||
459 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | ||
460 | ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); | ||
461 | qp->s_state = OP(RDMA_READ_REQUEST); | ||
462 | hwords += sizeof(ohdr->u.rc.reth) / 4; | ||
463 | bth2 = qp->s_psn++ & IPS_PSN_MASK; | ||
464 | if ((int)(qp->s_psn - qp->s_next_psn) > 0) | ||
465 | qp->s_next_psn = qp->s_psn; | ||
466 | ss = NULL; | ||
467 | len = 0; | ||
468 | qp->s_cur++; | ||
469 | if (qp->s_cur == qp->s_size) | ||
470 | qp->s_cur = 0; | ||
471 | break; | ||
472 | |||
473 | case OP(RDMA_READ_REQUEST): | ||
474 | case OP(COMPARE_SWAP): | ||
475 | case OP(FETCH_ADD): | ||
476 | /* | ||
477 | * We shouldn't start anything new until this request is | ||
478 | * finished. The ACK will handle rescheduling us. XXX The | ||
479 | * number of outstanding ones is negotiated at connection | ||
480 | * setup time (see pg. 258,289)? XXX Also, if we support | ||
481 | * multiple outstanding requests, we need to check the WQE | ||
482 | * IB_SEND_FENCE flag and not send a new request if a RDMA | ||
483 | * read or atomic is pending. | ||
484 | */ | ||
485 | goto done; | ||
486 | } | ||
487 | qp->s_len -= len; | ||
488 | qp->s_hdrwords = hwords; | ||
489 | qp->s_cur_sge = ss; | ||
490 | qp->s_cur_size = len; | ||
491 | *bth0p = bth0 | (qp->s_state << 24); | ||
492 | *bth2p = bth2; | ||
493 | return 1; | ||
494 | |||
495 | done: | ||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | static inline void ipath_make_rc_grh(struct ipath_qp *qp, | ||
500 | struct ib_global_route *grh, | ||
501 | u32 nwords) | ||
502 | { | ||
503 | struct ipath_ibdev *dev = to_idev(qp->ibqp.device); | ||
504 | |||
505 | /* GRH header size in 32-bit words. */ | ||
506 | qp->s_hdrwords += 10; | ||
507 | qp->s_hdr.u.l.grh.version_tclass_flow = | ||
508 | cpu_to_be32((6 << 28) | | ||
509 | (grh->traffic_class << 20) | | ||
510 | grh->flow_label); | ||
511 | qp->s_hdr.u.l.grh.paylen = | ||
512 | cpu_to_be16(((qp->s_hdrwords - 12) + nwords + | ||
513 | SIZE_OF_CRC) << 2); | ||
514 | /* next_hdr is defined by C8-7 in ch. 8.4.1 */ | ||
515 | qp->s_hdr.u.l.grh.next_hdr = 0x1B; | ||
516 | qp->s_hdr.u.l.grh.hop_limit = grh->hop_limit; | ||
517 | /* The SGID is 32-bit aligned. */ | ||
518 | qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = dev->gid_prefix; | ||
519 | qp->s_hdr.u.l.grh.sgid.global.interface_id = | ||
520 | ipath_layer_get_guid(dev->dd); | ||
521 | qp->s_hdr.u.l.grh.dgid = grh->dgid; | ||
522 | } | ||
523 | |||
524 | /** | ||
525 | * ipath_do_rc_send - perform a send on an RC QP | ||
526 | * @data: contains a pointer to the QP | ||
527 | * | ||
528 | * Process entries in the send work queue until credit or queue is | ||
529 | * exhausted. Only allow one CPU to send a packet per QP (tasklet). | ||
530 | * Otherwise, after we drop the QP s_lock, two threads could send | ||
531 | * packets out of order. | ||
532 | */ | ||
533 | void ipath_do_rc_send(unsigned long data) | ||
534 | { | ||
535 | struct ipath_qp *qp = (struct ipath_qp *)data; | ||
536 | struct ipath_ibdev *dev = to_idev(qp->ibqp.device); | ||
537 | unsigned long flags; | ||
538 | u16 lrh0; | ||
539 | u32 nwords; | ||
540 | u32 extra_bytes; | ||
541 | u32 bth0; | ||
542 | u32 bth2; | ||
543 | u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); | ||
544 | struct ipath_other_headers *ohdr; | ||
545 | |||
546 | if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) | ||
547 | goto bail; | ||
548 | |||
549 | if (unlikely(qp->remote_ah_attr.dlid == | ||
550 | ipath_layer_get_lid(dev->dd))) { | ||
551 | struct ib_wc wc; | ||
552 | |||
553 | /* | ||
554 | * Pass in an uninitialized ib_wc to be consistent with | ||
555 | * other places where ipath_ruc_loopback() is called. | ||
556 | */ | ||
557 | ipath_ruc_loopback(qp, &wc); | ||
558 | goto clear; | ||
559 | } | ||
560 | |||
561 | ohdr = &qp->s_hdr.u.oth; | ||
562 | if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) | ||
563 | ohdr = &qp->s_hdr.u.l.oth; | ||
564 | |||
565 | again: | ||
566 | /* Check for a constructed packet to be sent. */ | ||
567 | if (qp->s_hdrwords != 0) { | ||
568 | /* | ||
569 | * If no PIO bufs are available, return. An interrupt will | ||
570 | * call ipath_ib_piobufavail() when one is available. | ||
571 | */ | ||
572 | _VERBS_INFO("h %u %p\n", qp->s_hdrwords, &qp->s_hdr); | ||
573 | _VERBS_INFO("d %u %p %u %p %u %u %u %u\n", qp->s_cur_size, | ||
574 | qp->s_cur_sge->sg_list, | ||
575 | qp->s_cur_sge->num_sge, | ||
576 | qp->s_cur_sge->sge.vaddr, | ||
577 | qp->s_cur_sge->sge.sge_length, | ||
578 | qp->s_cur_sge->sge.length, | ||
579 | qp->s_cur_sge->sge.m, | ||
580 | qp->s_cur_sge->sge.n); | ||
581 | if (ipath_verbs_send(dev->dd, qp->s_hdrwords, | ||
582 | (u32 *) &qp->s_hdr, qp->s_cur_size, | ||
583 | qp->s_cur_sge)) { | ||
584 | ipath_no_bufs_available(qp, dev); | ||
585 | goto bail; | ||
586 | } | ||
587 | dev->n_unicast_xmit++; | ||
588 | /* Record that we sent the packet and s_hdr is empty. */ | ||
589 | qp->s_hdrwords = 0; | ||
590 | } | ||
591 | |||
592 | /* | ||
593 | * The lock is needed to synchronize between setting | ||
594 | * qp->s_ack_state, resend timer, and post_send(). | ||
595 | */ | ||
596 | spin_lock_irqsave(&qp->s_lock, flags); | ||
597 | |||
598 | /* Sending responses has higher priority over sending requests. */ | ||
599 | if (qp->s_ack_state != OP(ACKNOWLEDGE) && | ||
600 | (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0) | ||
601 | bth2 = qp->s_ack_psn++ & IPS_PSN_MASK; | ||
602 | else if (!ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2)) | ||
603 | goto done; | ||
604 | |||
605 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
606 | |||
607 | /* Construct the header. */ | ||
608 | extra_bytes = (4 - qp->s_cur_size) & 3; | ||
609 | nwords = (qp->s_cur_size + extra_bytes) >> 2; | ||
610 | lrh0 = IPS_LRH_BTH; | ||
611 | if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { | ||
612 | ipath_make_rc_grh(qp, &qp->remote_ah_attr.grh, nwords); | ||
613 | lrh0 = IPS_LRH_GRH; | ||
614 | } | ||
615 | lrh0 |= qp->remote_ah_attr.sl << 4; | ||
616 | qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); | ||
617 | qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); | ||
618 | qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + | ||
619 | SIZE_OF_CRC); | ||
620 | qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); | ||
621 | bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); | ||
622 | bth0 |= extra_bytes << 20; | ||
623 | ohdr->bth[0] = cpu_to_be32(bth0); | ||
624 | ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); | ||
625 | ohdr->bth[2] = cpu_to_be32(bth2); | ||
626 | |||
627 | /* Check for more work to do. */ | ||
628 | goto again; | ||
629 | |||
630 | done: | ||
631 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
632 | clear: | ||
633 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | ||
634 | bail: | ||
635 | return; | ||
636 | } | ||
637 | |||
638 | static void send_rc_ack(struct ipath_qp *qp) | ||
639 | { | ||
640 | struct ipath_ibdev *dev = to_idev(qp->ibqp.device); | ||
641 | u16 lrh0; | ||
642 | u32 bth0; | ||
643 | struct ipath_other_headers *ohdr; | ||
644 | |||
645 | /* Construct the header. */ | ||
646 | ohdr = &qp->s_hdr.u.oth; | ||
647 | lrh0 = IPS_LRH_BTH; | ||
648 | /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */ | ||
649 | qp->s_hdrwords = 6; | ||
650 | if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { | ||
651 | ipath_make_rc_grh(qp, &qp->remote_ah_attr.grh, 0); | ||
652 | ohdr = &qp->s_hdr.u.l.oth; | ||
653 | lrh0 = IPS_LRH_GRH; | ||
654 | } | ||
655 | bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); | ||
656 | ohdr->u.aeth = ipath_compute_aeth(qp); | ||
657 | if (qp->s_ack_state >= OP(COMPARE_SWAP)) { | ||
658 | bth0 |= IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24; | ||
659 | ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic); | ||
660 | qp->s_hdrwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4; | ||
661 | } | ||
662 | else | ||
663 | bth0 |= OP(ACKNOWLEDGE) << 24; | ||
664 | lrh0 |= qp->remote_ah_attr.sl << 4; | ||
665 | qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); | ||
666 | qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); | ||
667 | qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + SIZE_OF_CRC); | ||
668 | qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); | ||
669 | ohdr->bth[0] = cpu_to_be32(bth0); | ||
670 | ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); | ||
671 | ohdr->bth[2] = cpu_to_be32(qp->s_ack_psn & IPS_PSN_MASK); | ||
672 | |||
673 | /* | ||
674 | * If we can send the ACK, clear the ACK state. | ||
675 | */ | ||
676 | if (ipath_verbs_send(dev->dd, qp->s_hdrwords, (u32 *) &qp->s_hdr, | ||
677 | 0, NULL) == 0) { | ||
678 | qp->s_ack_state = OP(ACKNOWLEDGE); | ||
679 | dev->n_rc_qacks++; | ||
680 | dev->n_unicast_xmit++; | ||
681 | } | ||
682 | } | ||
683 | |||
684 | /** | ||
685 | * ipath_restart_rc - back up requester to resend the last un-ACKed request | ||
686 | * @qp: the QP to restart | ||
687 | * @psn: packet sequence number for the request | ||
688 | * @wc: the work completion request | ||
689 | * | ||
690 | * The QP s_lock should be held. | ||
691 | */ | ||
692 | void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) | ||
693 | { | ||
694 | struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); | ||
695 | struct ipath_ibdev *dev; | ||
696 | u32 n; | ||
697 | |||
698 | /* | ||
699 | * If there are no requests pending, we are done. | ||
700 | */ | ||
701 | if (ipath_cmp24(psn, qp->s_next_psn) >= 0 || | ||
702 | qp->s_last == qp->s_tail) | ||
703 | goto done; | ||
704 | |||
705 | if (qp->s_retry == 0) { | ||
706 | wc->wr_id = wqe->wr.wr_id; | ||
707 | wc->status = IB_WC_RETRY_EXC_ERR; | ||
708 | wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; | ||
709 | wc->vendor_err = 0; | ||
710 | wc->byte_len = 0; | ||
711 | wc->qp_num = qp->ibqp.qp_num; | ||
712 | wc->src_qp = qp->remote_qpn; | ||
713 | wc->pkey_index = 0; | ||
714 | wc->slid = qp->remote_ah_attr.dlid; | ||
715 | wc->sl = qp->remote_ah_attr.sl; | ||
716 | wc->dlid_path_bits = 0; | ||
717 | wc->port_num = 0; | ||
718 | ipath_sqerror_qp(qp, wc); | ||
719 | goto bail; | ||
720 | } | ||
721 | qp->s_retry--; | ||
722 | |||
723 | /* | ||
724 | * Remove the QP from the timeout queue. | ||
725 | * Note: it may already have been removed by ipath_ib_timer(). | ||
726 | */ | ||
727 | dev = to_idev(qp->ibqp.device); | ||
728 | spin_lock(&dev->pending_lock); | ||
729 | if (qp->timerwait.next != LIST_POISON1) | ||
730 | list_del(&qp->timerwait); | ||
731 | spin_unlock(&dev->pending_lock); | ||
732 | |||
733 | if (wqe->wr.opcode == IB_WR_RDMA_READ) | ||
734 | dev->n_rc_resends++; | ||
735 | else | ||
736 | dev->n_rc_resends += (int)qp->s_psn - (int)psn; | ||
737 | |||
738 | /* | ||
739 | * If we are starting the request from the beginning, let the normal | ||
740 | * send code handle initialization. | ||
741 | */ | ||
742 | qp->s_cur = qp->s_last; | ||
743 | if (ipath_cmp24(psn, wqe->psn) <= 0) { | ||
744 | qp->s_state = OP(SEND_LAST); | ||
745 | qp->s_psn = wqe->psn; | ||
746 | } else { | ||
747 | n = qp->s_cur; | ||
748 | for (;;) { | ||
749 | if (++n == qp->s_size) | ||
750 | n = 0; | ||
751 | if (n == qp->s_tail) { | ||
752 | if (ipath_cmp24(psn, qp->s_next_psn) >= 0) { | ||
753 | qp->s_cur = n; | ||
754 | wqe = get_swqe_ptr(qp, n); | ||
755 | } | ||
756 | break; | ||
757 | } | ||
758 | wqe = get_swqe_ptr(qp, n); | ||
759 | if (ipath_cmp24(psn, wqe->psn) < 0) | ||
760 | break; | ||
761 | qp->s_cur = n; | ||
762 | } | ||
763 | qp->s_psn = psn; | ||
764 | |||
765 | /* | ||
766 | * Reset the state to restart in the middle of a request. | ||
767 | * Don't change the s_sge, s_cur_sge, or s_cur_size. | ||
768 | * See ipath_do_rc_send(). | ||
769 | */ | ||
770 | switch (wqe->wr.opcode) { | ||
771 | case IB_WR_SEND: | ||
772 | case IB_WR_SEND_WITH_IMM: | ||
773 | qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); | ||
774 | break; | ||
775 | |||
776 | case IB_WR_RDMA_WRITE: | ||
777 | case IB_WR_RDMA_WRITE_WITH_IMM: | ||
778 | qp->s_state = OP(RDMA_READ_RESPONSE_LAST); | ||
779 | break; | ||
780 | |||
781 | case IB_WR_RDMA_READ: | ||
782 | qp->s_state = | ||
783 | OP(RDMA_READ_RESPONSE_MIDDLE); | ||
784 | break; | ||
785 | |||
786 | default: | ||
787 | /* | ||
788 | * This case shouldn't happen since its only | ||
789 | * one PSN per req. | ||
790 | */ | ||
791 | qp->s_state = OP(SEND_LAST); | ||
792 | } | ||
793 | } | ||
794 | |||
795 | done: | ||
796 | tasklet_hi_schedule(&qp->s_task); | ||
797 | |||
798 | bail: | ||
799 | return; | ||
800 | } | ||
801 | |||
802 | /** | ||
803 | * reset_psn - reset the QP state to send starting from PSN | ||
804 | * @qp: the QP | ||
805 | * @psn: the packet sequence number to restart at | ||
806 | * | ||
807 | * This is called from ipath_rc_rcv() to process an incoming RC ACK | ||
808 | * for the given QP. | ||
809 | * Called at interrupt level with the QP s_lock held. | ||
810 | */ | ||
811 | static void reset_psn(struct ipath_qp *qp, u32 psn) | ||
812 | { | ||
813 | struct ipath_swqe *wqe; | ||
814 | u32 n; | ||
815 | |||
816 | n = qp->s_cur; | ||
817 | wqe = get_swqe_ptr(qp, n); | ||
818 | for (;;) { | ||
819 | if (++n == qp->s_size) | ||
820 | n = 0; | ||
821 | if (n == qp->s_tail) { | ||
822 | if (ipath_cmp24(psn, qp->s_next_psn) >= 0) { | ||
823 | qp->s_cur = n; | ||
824 | wqe = get_swqe_ptr(qp, n); | ||
825 | } | ||
826 | break; | ||
827 | } | ||
828 | wqe = get_swqe_ptr(qp, n); | ||
829 | if (ipath_cmp24(psn, wqe->psn) < 0) | ||
830 | break; | ||
831 | qp->s_cur = n; | ||
832 | } | ||
833 | qp->s_psn = psn; | ||
834 | |||
835 | /* | ||
836 | * Set the state to restart in the middle of a | ||
837 | * request. Don't change the s_sge, s_cur_sge, or | ||
838 | * s_cur_size. See ipath_do_rc_send(). | ||
839 | */ | ||
840 | switch (wqe->wr.opcode) { | ||
841 | case IB_WR_SEND: | ||
842 | case IB_WR_SEND_WITH_IMM: | ||
843 | qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); | ||
844 | break; | ||
845 | |||
846 | case IB_WR_RDMA_WRITE: | ||
847 | case IB_WR_RDMA_WRITE_WITH_IMM: | ||
848 | qp->s_state = OP(RDMA_READ_RESPONSE_LAST); | ||
849 | break; | ||
850 | |||
851 | case IB_WR_RDMA_READ: | ||
852 | qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); | ||
853 | break; | ||
854 | |||
855 | default: | ||
856 | /* | ||
857 | * This case shouldn't happen since its only | ||
858 | * one PSN per req. | ||
859 | */ | ||
860 | qp->s_state = OP(SEND_LAST); | ||
861 | } | ||
862 | } | ||
863 | |||
864 | /** | ||
865 | * do_rc_ack - process an incoming RC ACK | ||
866 | * @qp: the QP the ACK came in on | ||
867 | * @psn: the packet sequence number of the ACK | ||
868 | * @opcode: the opcode of the request that resulted in the ACK | ||
869 | * | ||
870 | * This is called from ipath_rc_rcv() to process an incoming RC ACK | ||
871 | * for the given QP. | ||
872 | * Called at interrupt level with the QP s_lock held. | ||
873 | * Returns 1 if OK, 0 if current operation should be aborted (NAK). | ||
874 | */ | ||
875 | static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) | ||
876 | { | ||
877 | struct ipath_ibdev *dev = to_idev(qp->ibqp.device); | ||
878 | struct ib_wc wc; | ||
879 | struct ipath_swqe *wqe; | ||
880 | int ret = 0; | ||
881 | |||
882 | /* | ||
883 | * Remove the QP from the timeout queue (or RNR timeout queue). | ||
884 | * If ipath_ib_timer() has already removed it, | ||
885 | * it's OK since we hold the QP s_lock and ipath_restart_rc() | ||
886 | * just won't find anything to restart if we ACK everything. | ||
887 | */ | ||
888 | spin_lock(&dev->pending_lock); | ||
889 | if (qp->timerwait.next != LIST_POISON1) | ||
890 | list_del(&qp->timerwait); | ||
891 | spin_unlock(&dev->pending_lock); | ||
892 | |||
893 | /* | ||
894 | * Note that NAKs implicitly ACK outstanding SEND and RDMA write | ||
895 | * requests and implicitly NAK RDMA read and atomic requests issued | ||
896 | * before the NAK'ed request. The MSN won't include the NAK'ed | ||
897 | * request but will include an ACK'ed request(s). | ||
898 | */ | ||
899 | wqe = get_swqe_ptr(qp, qp->s_last); | ||
900 | |||
901 | /* Nothing is pending to ACK/NAK. */ | ||
902 | if (qp->s_last == qp->s_tail) | ||
903 | goto bail; | ||
904 | |||
905 | /* | ||
906 | * The MSN might be for a later WQE than the PSN indicates so | ||
907 | * only complete WQEs that the PSN finishes. | ||
908 | */ | ||
909 | while (ipath_cmp24(psn, wqe->lpsn) >= 0) { | ||
910 | /* If we are ACKing a WQE, the MSN should be >= the SSN. */ | ||
911 | if (ipath_cmp24(aeth, wqe->ssn) < 0) | ||
912 | break; | ||
913 | /* | ||
914 | * If this request is a RDMA read or atomic, and the ACK is | ||
915 | * for a later operation, this ACK NAKs the RDMA read or | ||
916 | * atomic. In other words, only a RDMA_READ_LAST or ONLY | ||
917 | * can ACK a RDMA read and likewise for atomic ops. Note | ||
918 | * that the NAK case can only happen if relaxed ordering is | ||
919 | * used and requests are sent after an RDMA read or atomic | ||
920 | * is sent but before the response is received. | ||
921 | */ | ||
922 | if ((wqe->wr.opcode == IB_WR_RDMA_READ && | ||
923 | opcode != OP(RDMA_READ_RESPONSE_LAST)) || | ||
924 | ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || | ||
925 | wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && | ||
926 | (opcode != OP(ATOMIC_ACKNOWLEDGE) || | ||
927 | ipath_cmp24(wqe->psn, psn) != 0))) { | ||
928 | /* | ||
929 | * The last valid PSN seen is the previous | ||
930 | * request's. | ||
931 | */ | ||
932 | qp->s_last_psn = wqe->psn - 1; | ||
933 | /* Retry this request. */ | ||
934 | ipath_restart_rc(qp, wqe->psn, &wc); | ||
935 | /* | ||
936 | * No need to process the ACK/NAK since we are | ||
937 | * restarting an earlier request. | ||
938 | */ | ||
939 | goto bail; | ||
940 | } | ||
941 | /* Post a send completion queue entry if requested. */ | ||
942 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || | ||
943 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { | ||
944 | wc.wr_id = wqe->wr.wr_id; | ||
945 | wc.status = IB_WC_SUCCESS; | ||
946 | wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; | ||
947 | wc.vendor_err = 0; | ||
948 | wc.byte_len = wqe->length; | ||
949 | wc.qp_num = qp->ibqp.qp_num; | ||
950 | wc.src_qp = qp->remote_qpn; | ||
951 | wc.pkey_index = 0; | ||
952 | wc.slid = qp->remote_ah_attr.dlid; | ||
953 | wc.sl = qp->remote_ah_attr.sl; | ||
954 | wc.dlid_path_bits = 0; | ||
955 | wc.port_num = 0; | ||
956 | ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); | ||
957 | } | ||
958 | qp->s_retry = qp->s_retry_cnt; | ||
959 | /* | ||
960 | * If we are completing a request which is in the process of | ||
961 | * being resent, we can stop resending it since we know the | ||
962 | * responder has already seen it. | ||
963 | */ | ||
964 | if (qp->s_last == qp->s_cur) { | ||
965 | if (++qp->s_cur >= qp->s_size) | ||
966 | qp->s_cur = 0; | ||
967 | wqe = get_swqe_ptr(qp, qp->s_cur); | ||
968 | qp->s_state = OP(SEND_LAST); | ||
969 | qp->s_psn = wqe->psn; | ||
970 | } | ||
971 | if (++qp->s_last >= qp->s_size) | ||
972 | qp->s_last = 0; | ||
973 | wqe = get_swqe_ptr(qp, qp->s_last); | ||
974 | if (qp->s_last == qp->s_tail) | ||
975 | break; | ||
976 | } | ||
977 | |||
978 | switch (aeth >> 29) { | ||
979 | case 0: /* ACK */ | ||
980 | dev->n_rc_acks++; | ||
981 | /* If this is a partial ACK, reset the retransmit timer. */ | ||
982 | if (qp->s_last != qp->s_tail) { | ||
983 | spin_lock(&dev->pending_lock); | ||
984 | list_add_tail(&qp->timerwait, | ||
985 | &dev->pending[dev->pending_index]); | ||
986 | spin_unlock(&dev->pending_lock); | ||
987 | } | ||
988 | ipath_get_credit(qp, aeth); | ||
989 | qp->s_rnr_retry = qp->s_rnr_retry_cnt; | ||
990 | qp->s_retry = qp->s_retry_cnt; | ||
991 | qp->s_last_psn = psn; | ||
992 | ret = 1; | ||
993 | goto bail; | ||
994 | |||
995 | case 1: /* RNR NAK */ | ||
996 | dev->n_rnr_naks++; | ||
997 | if (qp->s_rnr_retry == 0) { | ||
998 | if (qp->s_last == qp->s_tail) | ||
999 | goto bail; | ||
1000 | |||
1001 | wc.status = IB_WC_RNR_RETRY_EXC_ERR; | ||
1002 | goto class_b; | ||
1003 | } | ||
1004 | if (qp->s_rnr_retry_cnt < 7) | ||
1005 | qp->s_rnr_retry--; | ||
1006 | if (qp->s_last == qp->s_tail) | ||
1007 | goto bail; | ||
1008 | |||
1009 | /* The last valid PSN seen is the previous request's. */ | ||
1010 | qp->s_last_psn = wqe->psn - 1; | ||
1011 | |||
1012 | dev->n_rc_resends += (int)qp->s_psn - (int)psn; | ||
1013 | |||
1014 | /* | ||
1015 | * If we are starting the request from the beginning, let | ||
1016 | * the normal send code handle initialization. | ||
1017 | */ | ||
1018 | qp->s_cur = qp->s_last; | ||
1019 | wqe = get_swqe_ptr(qp, qp->s_cur); | ||
1020 | if (ipath_cmp24(psn, wqe->psn) <= 0) { | ||
1021 | qp->s_state = OP(SEND_LAST); | ||
1022 | qp->s_psn = wqe->psn; | ||
1023 | } else | ||
1024 | reset_psn(qp, psn); | ||
1025 | |||
1026 | qp->s_rnr_timeout = | ||
1027 | ib_ipath_rnr_table[(aeth >> IPS_AETH_CREDIT_SHIFT) & | ||
1028 | IPS_AETH_CREDIT_MASK]; | ||
1029 | ipath_insert_rnr_queue(qp); | ||
1030 | goto bail; | ||
1031 | |||
1032 | case 3: /* NAK */ | ||
1033 | /* The last valid PSN seen is the previous request's. */ | ||
1034 | if (qp->s_last != qp->s_tail) | ||
1035 | qp->s_last_psn = wqe->psn - 1; | ||
1036 | switch ((aeth >> IPS_AETH_CREDIT_SHIFT) & | ||
1037 | IPS_AETH_CREDIT_MASK) { | ||
1038 | case 0: /* PSN sequence error */ | ||
1039 | dev->n_seq_naks++; | ||
1040 | /* | ||
1041 | * Back up to the responder's expected PSN. XXX | ||
1042 | * Note that we might get a NAK in the middle of an | ||
1043 | * RDMA READ response which terminates the RDMA | ||
1044 | * READ. | ||
1045 | */ | ||
1046 | if (qp->s_last == qp->s_tail) | ||
1047 | break; | ||
1048 | |||
1049 | if (ipath_cmp24(psn, wqe->psn) < 0) | ||
1050 | break; | ||
1051 | |||
1052 | /* Retry the request. */ | ||
1053 | ipath_restart_rc(qp, psn, &wc); | ||
1054 | break; | ||
1055 | |||
1056 | case 1: /* Invalid Request */ | ||
1057 | wc.status = IB_WC_REM_INV_REQ_ERR; | ||
1058 | dev->n_other_naks++; | ||
1059 | goto class_b; | ||
1060 | |||
1061 | case 2: /* Remote Access Error */ | ||
1062 | wc.status = IB_WC_REM_ACCESS_ERR; | ||
1063 | dev->n_other_naks++; | ||
1064 | goto class_b; | ||
1065 | |||
1066 | case 3: /* Remote Operation Error */ | ||
1067 | wc.status = IB_WC_REM_OP_ERR; | ||
1068 | dev->n_other_naks++; | ||
1069 | class_b: | ||
1070 | wc.wr_id = wqe->wr.wr_id; | ||
1071 | wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; | ||
1072 | wc.vendor_err = 0; | ||
1073 | wc.byte_len = 0; | ||
1074 | wc.qp_num = qp->ibqp.qp_num; | ||
1075 | wc.src_qp = qp->remote_qpn; | ||
1076 | wc.pkey_index = 0; | ||
1077 | wc.slid = qp->remote_ah_attr.dlid; | ||
1078 | wc.sl = qp->remote_ah_attr.sl; | ||
1079 | wc.dlid_path_bits = 0; | ||
1080 | wc.port_num = 0; | ||
1081 | ipath_sqerror_qp(qp, &wc); | ||
1082 | break; | ||
1083 | |||
1084 | default: | ||
1085 | /* Ignore other reserved NAK error codes */ | ||
1086 | goto reserved; | ||
1087 | } | ||
1088 | qp->s_rnr_retry = qp->s_rnr_retry_cnt; | ||
1089 | goto bail; | ||
1090 | |||
1091 | default: /* 2: reserved */ | ||
1092 | reserved: | ||
1093 | /* Ignore reserved NAK codes. */ | ||
1094 | goto bail; | ||
1095 | } | ||
1096 | |||
1097 | bail: | ||
1098 | return ret; | ||
1099 | } | ||
1100 | |||
1101 | /** | ||
1102 | * ipath_rc_rcv_resp - process an incoming RC response packet | ||
1103 | * @dev: the device this packet came in on | ||
1104 | * @ohdr: the other headers for this packet | ||
1105 | * @data: the packet data | ||
1106 | * @tlen: the packet length | ||
1107 | * @qp: the QP for this packet | ||
1108 | * @opcode: the opcode for this packet | ||
1109 | * @psn: the packet sequence number for this packet | ||
1110 | * @hdrsize: the header length | ||
1111 | * @pmtu: the path MTU | ||
1112 | * @header_in_data: true if part of the header data is in the data buffer | ||
1113 | * | ||
1114 | * This is called from ipath_rc_rcv() to process an incoming RC response | ||
1115 | * packet for the given QP. | ||
1116 | * Called at interrupt level. | ||
1117 | */ | ||
1118 | static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, | ||
1119 | struct ipath_other_headers *ohdr, | ||
1120 | void *data, u32 tlen, | ||
1121 | struct ipath_qp *qp, | ||
1122 | u32 opcode, | ||
1123 | u32 psn, u32 hdrsize, u32 pmtu, | ||
1124 | int header_in_data) | ||
1125 | { | ||
1126 | unsigned long flags; | ||
1127 | struct ib_wc wc; | ||
1128 | int diff; | ||
1129 | u32 pad; | ||
1130 | u32 aeth; | ||
1131 | |||
1132 | spin_lock_irqsave(&qp->s_lock, flags); | ||
1133 | |||
1134 | /* Ignore invalid responses. */ | ||
1135 | if (ipath_cmp24(psn, qp->s_next_psn) >= 0) | ||
1136 | goto ack_done; | ||
1137 | |||
1138 | /* Ignore duplicate responses. */ | ||
1139 | diff = ipath_cmp24(psn, qp->s_last_psn); | ||
1140 | if (unlikely(diff <= 0)) { | ||
1141 | /* Update credits for "ghost" ACKs */ | ||
1142 | if (diff == 0 && opcode == OP(ACKNOWLEDGE)) { | ||
1143 | if (!header_in_data) | ||
1144 | aeth = be32_to_cpu(ohdr->u.aeth); | ||
1145 | else { | ||
1146 | aeth = be32_to_cpu(((__be32 *) data)[0]); | ||
1147 | data += sizeof(__be32); | ||
1148 | } | ||
1149 | if ((aeth >> 29) == 0) | ||
1150 | ipath_get_credit(qp, aeth); | ||
1151 | } | ||
1152 | goto ack_done; | ||
1153 | } | ||
1154 | |||
1155 | switch (opcode) { | ||
1156 | case OP(ACKNOWLEDGE): | ||
1157 | case OP(ATOMIC_ACKNOWLEDGE): | ||
1158 | case OP(RDMA_READ_RESPONSE_FIRST): | ||
1159 | if (!header_in_data) | ||
1160 | aeth = be32_to_cpu(ohdr->u.aeth); | ||
1161 | else { | ||
1162 | aeth = be32_to_cpu(((__be32 *) data)[0]); | ||
1163 | data += sizeof(__be32); | ||
1164 | } | ||
1165 | if (opcode == OP(ATOMIC_ACKNOWLEDGE)) | ||
1166 | *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data; | ||
1167 | if (!do_rc_ack(qp, aeth, psn, opcode) || | ||
1168 | opcode != OP(RDMA_READ_RESPONSE_FIRST)) | ||
1169 | goto ack_done; | ||
1170 | hdrsize += 4; | ||
1171 | /* | ||
1172 | * do_rc_ack() has already checked the PSN so skip | ||
1173 | * the sequence check. | ||
1174 | */ | ||
1175 | goto rdma_read; | ||
1176 | |||
1177 | case OP(RDMA_READ_RESPONSE_MIDDLE): | ||
1178 | /* no AETH, no ACK */ | ||
1179 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | ||
1180 | dev->n_rdma_seq++; | ||
1181 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1182 | goto ack_done; | ||
1183 | } | ||
1184 | rdma_read: | ||
1185 | if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) | ||
1186 | goto ack_done; | ||
1187 | if (unlikely(tlen != (hdrsize + pmtu + 4))) | ||
1188 | goto ack_done; | ||
1189 | if (unlikely(pmtu >= qp->s_len)) | ||
1190 | goto ack_done; | ||
1191 | /* We got a response so update the timeout. */ | ||
1192 | if (unlikely(qp->s_last == qp->s_tail || | ||
1193 | get_swqe_ptr(qp, qp->s_last)->wr.opcode != | ||
1194 | IB_WR_RDMA_READ)) | ||
1195 | goto ack_done; | ||
1196 | spin_lock(&dev->pending_lock); | ||
1197 | if (qp->s_rnr_timeout == 0 && | ||
1198 | qp->timerwait.next != LIST_POISON1) | ||
1199 | list_move_tail(&qp->timerwait, | ||
1200 | &dev->pending[dev->pending_index]); | ||
1201 | spin_unlock(&dev->pending_lock); | ||
1202 | /* | ||
1203 | * Update the RDMA receive state but do the copy w/o holding the | ||
1204 | * locks and blocking interrupts. XXX Yet another place that | ||
1205 | * affects relaxed RDMA order since we don't want s_sge modified. | ||
1206 | */ | ||
1207 | qp->s_len -= pmtu; | ||
1208 | qp->s_last_psn = psn; | ||
1209 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
1210 | ipath_copy_sge(&qp->s_sge, data, pmtu); | ||
1211 | goto bail; | ||
1212 | |||
1213 | case OP(RDMA_READ_RESPONSE_LAST): | ||
1214 | /* ACKs READ req. */ | ||
1215 | if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { | ||
1216 | dev->n_rdma_seq++; | ||
1217 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
1218 | goto ack_done; | ||
1219 | } | ||
1220 | /* FALLTHROUGH */ | ||
1221 | case OP(RDMA_READ_RESPONSE_ONLY): | ||
1222 | if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST))) | ||
1223 | goto ack_done; | ||
1224 | /* | ||
1225 | * Get the number of bytes the message was padded by. | ||
1226 | */ | ||
1227 | pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; | ||
1228 | /* | ||
1229 | * Check that the data size is >= 1 && <= pmtu. | ||
1230 | * Remember to account for the AETH header (4) and | ||
1231 | * ICRC (4). | ||
1232 | */ | ||
1233 | if (unlikely(tlen <= (hdrsize + pad + 8))) { | ||
1234 | /* | ||
1235 | * XXX Need to generate an error CQ | ||
1236 | * entry. | ||
1237 | */ | ||
1238 | goto ack_done; | ||
1239 | } | ||
1240 | tlen -= hdrsize + pad + 8; | ||
1241 | if (unlikely(tlen != qp->s_len)) { | ||
1242 | /* | ||
1243 | * XXX Need to generate an error CQ | ||
1244 | * entry. | ||
1245 | */ | ||
1246 | goto ack_done; | ||
1247 | } | ||
1248 | if (!header_in_data) | ||
1249 | aeth = be32_to_cpu(ohdr->u.aeth); | ||
1250 | else { | ||
1251 | aeth = be32_to_cpu(((__be32 *) data)[0]); | ||
1252 | data += sizeof(__be32); | ||
1253 | } | ||
1254 | ipath_copy_sge(&qp->s_sge, data, tlen); | ||
1255 | if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) { | ||
1256 | /* | ||
1257 | * Change the state so we contimue | ||
1258 | * processing new requests. | ||
1259 | */ | ||
1260 | qp->s_state = OP(SEND_LAST); | ||
1261 | } | ||
1262 | goto ack_done; | ||
1263 | } | ||
1264 | |||
1265 | ack_done: | ||
1266 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
1267 | bail: | ||
1268 | return; | ||
1269 | } | ||
1270 | |||
1271 | /** | ||
1272 | * ipath_rc_rcv_error - process an incoming duplicate or error RC packet | ||
1273 | * @dev: the device this packet came in on | ||
1274 | * @ohdr: the other headers for this packet | ||
1275 | * @data: the packet data | ||
1276 | * @qp: the QP for this packet | ||
1277 | * @opcode: the opcode for this packet | ||
1278 | * @psn: the packet sequence number for this packet | ||
1279 | * @diff: the difference between the PSN and the expected PSN | ||
1280 | * @header_in_data: true if part of the header data is in the data buffer | ||
1281 | * | ||
1282 | * This is called from ipath_rc_rcv() to process an unexpected | ||
1283 | * incoming RC packet for the given QP. | ||
1284 | * Called at interrupt level. | ||
1285 | * Return 1 if no more processing is needed; otherwise return 0 to | ||
1286 | * schedule a response to be sent and the s_lock unlocked. | ||
1287 | */ | ||
1288 | static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev, | ||
1289 | struct ipath_other_headers *ohdr, | ||
1290 | void *data, | ||
1291 | struct ipath_qp *qp, | ||
1292 | u32 opcode, | ||
1293 | u32 psn, | ||
1294 | int diff, | ||
1295 | int header_in_data) | ||
1296 | { | ||
1297 | struct ib_reth *reth; | ||
1298 | |||
1299 | if (diff > 0) { | ||
1300 | /* | ||
1301 | * Packet sequence error. | ||
1302 | * A NAK will ACK earlier sends and RDMA writes. | ||
1303 | * Don't queue the NAK if a RDMA read, atomic, or | ||
1304 | * NAK is pending though. | ||
1305 | */ | ||
1306 | spin_lock(&qp->s_lock); | ||
1307 | if ((qp->s_ack_state >= OP(RDMA_READ_REQUEST) && | ||
1308 | qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) || | ||
1309 | qp->s_nak_state != 0) { | ||
1310 | spin_unlock(&qp->s_lock); | ||
1311 | goto done; | ||
1312 | } | ||
1313 | qp->s_ack_state = OP(SEND_ONLY); | ||
1314 | qp->s_nak_state = IB_NAK_PSN_ERROR; | ||
1315 | /* Use the expected PSN. */ | ||
1316 | qp->s_ack_psn = qp->r_psn; | ||
1317 | goto resched; | ||
1318 | } | ||
1319 | |||
1320 | /* | ||
1321 | * Handle a duplicate request. Don't re-execute SEND, RDMA | ||
1322 | * write or atomic op. Don't NAK errors, just silently drop | ||
1323 | * the duplicate request. Note that r_sge, r_len, and | ||
1324 | * r_rcv_len may be in use so don't modify them. | ||
1325 | * | ||
1326 | * We are supposed to ACK the earliest duplicate PSN but we | ||
1327 | * can coalesce an outstanding duplicate ACK. We have to | ||
1328 | * send the earliest so that RDMA reads can be restarted at | ||
1329 | * the requester's expected PSN. | ||
1330 | */ | ||
1331 | spin_lock(&qp->s_lock); | ||
1332 | if (qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE && | ||
1333 | ipath_cmp24(psn, qp->s_ack_psn) >= 0) { | ||
1334 | if (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) | ||
1335 | qp->s_ack_psn = psn; | ||
1336 | spin_unlock(&qp->s_lock); | ||
1337 | goto done; | ||
1338 | } | ||
1339 | switch (opcode) { | ||
1340 | case OP(RDMA_READ_REQUEST): | ||
1341 | /* | ||
1342 | * We have to be careful to not change s_rdma_sge | ||
1343 | * while ipath_do_rc_send() is using it and not | ||
1344 | * holding the s_lock. | ||
1345 | */ | ||
1346 | if (qp->s_ack_state != OP(ACKNOWLEDGE) && | ||
1347 | qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) { | ||
1348 | spin_unlock(&qp->s_lock); | ||
1349 | dev->n_rdma_dup_busy++; | ||
1350 | goto done; | ||
1351 | } | ||
1352 | /* RETH comes after BTH */ | ||
1353 | if (!header_in_data) | ||
1354 | reth = &ohdr->u.rc.reth; | ||
1355 | else { | ||
1356 | reth = (struct ib_reth *)data; | ||
1357 | data += sizeof(*reth); | ||
1358 | } | ||
1359 | qp->s_rdma_len = be32_to_cpu(reth->length); | ||
1360 | if (qp->s_rdma_len != 0) { | ||
1361 | u32 rkey = be32_to_cpu(reth->rkey); | ||
1362 | u64 vaddr = be64_to_cpu(reth->vaddr); | ||
1363 | int ok; | ||
1364 | |||
1365 | /* | ||
1366 | * Address range must be a subset of the original | ||
1367 | * request and start on pmtu boundaries. | ||
1368 | */ | ||
1369 | ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, | ||
1370 | qp->s_rdma_len, vaddr, rkey, | ||
1371 | IB_ACCESS_REMOTE_READ); | ||
1372 | if (unlikely(!ok)) | ||
1373 | goto done; | ||
1374 | } else { | ||
1375 | qp->s_rdma_sge.sg_list = NULL; | ||
1376 | qp->s_rdma_sge.num_sge = 0; | ||
1377 | qp->s_rdma_sge.sge.mr = NULL; | ||
1378 | qp->s_rdma_sge.sge.vaddr = NULL; | ||
1379 | qp->s_rdma_sge.sge.length = 0; | ||
1380 | qp->s_rdma_sge.sge.sge_length = 0; | ||
1381 | } | ||
1382 | break; | ||
1383 | |||
1384 | case OP(COMPARE_SWAP): | ||
1385 | case OP(FETCH_ADD): | ||
1386 | /* | ||
1387 | * Check for the PSN of the last atomic operations | ||
1388 | * performed and resend the result if found. | ||
1389 | */ | ||
1390 | if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn) { | ||
1391 | spin_unlock(&qp->s_lock); | ||
1392 | goto done; | ||
1393 | } | ||
1394 | qp->s_ack_atomic = qp->r_atomic_data; | ||
1395 | break; | ||
1396 | } | ||
1397 | qp->s_ack_state = opcode; | ||
1398 | qp->s_nak_state = 0; | ||
1399 | qp->s_ack_psn = psn; | ||
1400 | resched: | ||
1401 | return 0; | ||
1402 | |||
1403 | done: | ||
1404 | return 1; | ||
1405 | } | ||
1406 | |||
1407 | /** | ||
1408 | * ipath_rc_rcv - process an incoming RC packet | ||
1409 | * @dev: the device this packet came in on | ||
1410 | * @hdr: the header of this packet | ||
1411 | * @has_grh: true if the header has a GRH | ||
1412 | * @data: the packet data | ||
1413 | * @tlen: the packet length | ||
1414 | * @qp: the QP for this packet | ||
1415 | * | ||
1416 | * This is called from ipath_qp_rcv() to process an incoming RC packet | ||
1417 | * for the given QP. | ||
1418 | * Called at interrupt level. | ||
1419 | */ | ||
1420 | void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | ||
1421 | int has_grh, void *data, u32 tlen, struct ipath_qp *qp) | ||
1422 | { | ||
1423 | struct ipath_other_headers *ohdr; | ||
1424 | u32 opcode; | ||
1425 | u32 hdrsize; | ||
1426 | u32 psn; | ||
1427 | u32 pad; | ||
1428 | unsigned long flags; | ||
1429 | struct ib_wc wc; | ||
1430 | u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); | ||
1431 | int diff; | ||
1432 | struct ib_reth *reth; | ||
1433 | int header_in_data; | ||
1434 | |||
1435 | /* Check for GRH */ | ||
1436 | if (!has_grh) { | ||
1437 | ohdr = &hdr->u.oth; | ||
1438 | hdrsize = 8 + 12; /* LRH + BTH */ | ||
1439 | psn = be32_to_cpu(ohdr->bth[2]); | ||
1440 | header_in_data = 0; | ||
1441 | } else { | ||
1442 | ohdr = &hdr->u.l.oth; | ||
1443 | hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ | ||
1444 | /* | ||
1445 | * The header with GRH is 60 bytes and the core driver sets | ||
1446 | * the eager header buffer size to 56 bytes so the last 4 | ||
1447 | * bytes of the BTH header (PSN) is in the data buffer. | ||
1448 | */ | ||
1449 | header_in_data = | ||
1450 | ipath_layer_get_rcvhdrentsize(dev->dd) == 16; | ||
1451 | if (header_in_data) { | ||
1452 | psn = be32_to_cpu(((__be32 *) data)[0]); | ||
1453 | data += sizeof(__be32); | ||
1454 | } else | ||
1455 | psn = be32_to_cpu(ohdr->bth[2]); | ||
1456 | } | ||
1457 | /* | ||
1458 | * The opcode is in the low byte when its in network order | ||
1459 | * (top byte when in host order). | ||
1460 | */ | ||
1461 | opcode = be32_to_cpu(ohdr->bth[0]) >> 24; | ||
1462 | |||
1463 | /* | ||
1464 | * Process responses (ACKs) before anything else. Note that the | ||
1465 | * packet sequence number will be for something in the send work | ||
1466 | * queue rather than the expected receive packet sequence number. | ||
1467 | * In other words, this QP is the requester. | ||
1468 | */ | ||
1469 | if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && | ||
1470 | opcode <= OP(ATOMIC_ACKNOWLEDGE)) { | ||
1471 | ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn, | ||
1472 | hdrsize, pmtu, header_in_data); | ||
1473 | goto bail; | ||
1474 | } | ||
1475 | |||
1476 | spin_lock_irqsave(&qp->r_rq.lock, flags); | ||
1477 | |||
1478 | /* Compute 24 bits worth of difference. */ | ||
1479 | diff = ipath_cmp24(psn, qp->r_psn); | ||
1480 | if (unlikely(diff)) { | ||
1481 | if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode, | ||
1482 | psn, diff, header_in_data)) | ||
1483 | goto done; | ||
1484 | goto resched; | ||
1485 | } | ||
1486 | |||
1487 | /* Check for opcode sequence errors. */ | ||
1488 | switch (qp->r_state) { | ||
1489 | case OP(SEND_FIRST): | ||
1490 | case OP(SEND_MIDDLE): | ||
1491 | if (opcode == OP(SEND_MIDDLE) || | ||
1492 | opcode == OP(SEND_LAST) || | ||
1493 | opcode == OP(SEND_LAST_WITH_IMMEDIATE)) | ||
1494 | break; | ||
1495 | nack_inv: | ||
1496 | /* | ||
1497 | * A NAK will ACK earlier sends and RDMA writes. Don't queue the | ||
1498 | * NAK if a RDMA read, atomic, or NAK is pending though. | ||
1499 | */ | ||
1500 | spin_lock(&qp->s_lock); | ||
1501 | if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) && | ||
1502 | qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { | ||
1503 | spin_unlock(&qp->s_lock); | ||
1504 | goto done; | ||
1505 | } | ||
1506 | /* XXX Flush WQEs */ | ||
1507 | qp->state = IB_QPS_ERR; | ||
1508 | qp->s_ack_state = OP(SEND_ONLY); | ||
1509 | qp->s_nak_state = IB_NAK_INVALID_REQUEST; | ||
1510 | qp->s_ack_psn = qp->r_psn; | ||
1511 | goto resched; | ||
1512 | |||
1513 | case OP(RDMA_WRITE_FIRST): | ||
1514 | case OP(RDMA_WRITE_MIDDLE): | ||
1515 | if (opcode == OP(RDMA_WRITE_MIDDLE) || | ||
1516 | opcode == OP(RDMA_WRITE_LAST) || | ||
1517 | opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) | ||
1518 | break; | ||
1519 | goto nack_inv; | ||
1520 | |||
1521 | case OP(RDMA_READ_REQUEST): | ||
1522 | case OP(COMPARE_SWAP): | ||
1523 | case OP(FETCH_ADD): | ||
1524 | /* | ||
1525 | * Drop all new requests until a response has been sent. A | ||
1526 | * new request then ACKs the RDMA response we sent. Relaxed | ||
1527 | * ordering would allow new requests to be processed but we | ||
1528 | * would need to keep a queue of rwqe's for all that are in | ||
1529 | * progress. Note that we can't RNR NAK this request since | ||
1530 | * the RDMA READ or atomic response is already queued to be | ||
1531 | * sent (unless we implement a response send queue). | ||
1532 | */ | ||
1533 | goto done; | ||
1534 | |||
1535 | default: | ||
1536 | if (opcode == OP(SEND_MIDDLE) || | ||
1537 | opcode == OP(SEND_LAST) || | ||
1538 | opcode == OP(SEND_LAST_WITH_IMMEDIATE) || | ||
1539 | opcode == OP(RDMA_WRITE_MIDDLE) || | ||
1540 | opcode == OP(RDMA_WRITE_LAST) || | ||
1541 | opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) | ||
1542 | goto nack_inv; | ||
1543 | break; | ||
1544 | } | ||
1545 | |||
1546 | wc.imm_data = 0; | ||
1547 | wc.wc_flags = 0; | ||
1548 | |||
1549 | /* OK, process the packet. */ | ||
1550 | switch (opcode) { | ||
1551 | case OP(SEND_FIRST): | ||
1552 | if (!ipath_get_rwqe(qp, 0)) { | ||
1553 | rnr_nak: | ||
1554 | /* | ||
1555 | * A RNR NAK will ACK earlier sends and RDMA writes. | ||
1556 | * Don't queue the NAK if a RDMA read or atomic | ||
1557 | * is pending though. | ||
1558 | */ | ||
1559 | spin_lock(&qp->s_lock); | ||
1560 | if (qp->s_ack_state >= | ||
1561 | OP(RDMA_READ_REQUEST) && | ||
1562 | qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) { | ||
1563 | spin_unlock(&qp->s_lock); | ||
1564 | goto done; | ||
1565 | } | ||
1566 | qp->s_ack_state = OP(SEND_ONLY); | ||
1567 | qp->s_nak_state = IB_RNR_NAK | qp->s_min_rnr_timer; | ||
1568 | qp->s_ack_psn = qp->r_psn; | ||
1569 | goto resched; | ||
1570 | } | ||
1571 | qp->r_rcv_len = 0; | ||
1572 | /* FALLTHROUGH */ | ||
1573 | case OP(SEND_MIDDLE): | ||
1574 | case OP(RDMA_WRITE_MIDDLE): | ||
1575 | send_middle: | ||
1576 | /* Check for invalid length PMTU or posted rwqe len. */ | ||
1577 | if (unlikely(tlen != (hdrsize + pmtu + 4))) | ||
1578 | goto nack_inv; | ||
1579 | qp->r_rcv_len += pmtu; | ||
1580 | if (unlikely(qp->r_rcv_len > qp->r_len)) | ||
1581 | goto nack_inv; | ||
1582 | ipath_copy_sge(&qp->r_sge, data, pmtu); | ||
1583 | break; | ||
1584 | |||
1585 | case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): | ||
1586 | /* consume RWQE */ | ||
1587 | if (!ipath_get_rwqe(qp, 1)) | ||
1588 | goto rnr_nak; | ||
1589 | goto send_last_imm; | ||
1590 | |||
1591 | case OP(SEND_ONLY): | ||
1592 | case OP(SEND_ONLY_WITH_IMMEDIATE): | ||
1593 | if (!ipath_get_rwqe(qp, 0)) | ||
1594 | goto rnr_nak; | ||
1595 | qp->r_rcv_len = 0; | ||
1596 | if (opcode == OP(SEND_ONLY)) | ||
1597 | goto send_last; | ||
1598 | /* FALLTHROUGH */ | ||
1599 | case OP(SEND_LAST_WITH_IMMEDIATE): | ||
1600 | send_last_imm: | ||
1601 | if (header_in_data) { | ||
1602 | wc.imm_data = *(__be32 *) data; | ||
1603 | data += sizeof(__be32); | ||
1604 | } else { | ||
1605 | /* Immediate data comes after BTH */ | ||
1606 | wc.imm_data = ohdr->u.imm_data; | ||
1607 | } | ||
1608 | hdrsize += 4; | ||
1609 | wc.wc_flags = IB_WC_WITH_IMM; | ||
1610 | /* FALLTHROUGH */ | ||
1611 | case OP(SEND_LAST): | ||
1612 | case OP(RDMA_WRITE_LAST): | ||
1613 | send_last: | ||
1614 | /* Get the number of bytes the message was padded by. */ | ||
1615 | pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; | ||
1616 | /* Check for invalid length. */ | ||
1617 | /* XXX LAST len should be >= 1 */ | ||
1618 | if (unlikely(tlen < (hdrsize + pad + 4))) | ||
1619 | goto nack_inv; | ||
1620 | /* Don't count the CRC. */ | ||
1621 | tlen -= (hdrsize + pad + 4); | ||
1622 | wc.byte_len = tlen + qp->r_rcv_len; | ||
1623 | if (unlikely(wc.byte_len > qp->r_len)) | ||
1624 | goto nack_inv; | ||
1625 | ipath_copy_sge(&qp->r_sge, data, tlen); | ||
1626 | atomic_inc(&qp->msn); | ||
1627 | if (opcode == OP(RDMA_WRITE_LAST) || | ||
1628 | opcode == OP(RDMA_WRITE_ONLY)) | ||
1629 | break; | ||
1630 | wc.wr_id = qp->r_wr_id; | ||
1631 | wc.status = IB_WC_SUCCESS; | ||
1632 | wc.opcode = IB_WC_RECV; | ||
1633 | wc.vendor_err = 0; | ||
1634 | wc.qp_num = qp->ibqp.qp_num; | ||
1635 | wc.src_qp = qp->remote_qpn; | ||
1636 | wc.pkey_index = 0; | ||
1637 | wc.slid = qp->remote_ah_attr.dlid; | ||
1638 | wc.sl = qp->remote_ah_attr.sl; | ||
1639 | wc.dlid_path_bits = 0; | ||
1640 | wc.port_num = 0; | ||
1641 | /* Signal completion event if the solicited bit is set. */ | ||
1642 | ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, | ||
1643 | (ohdr->bth[0] & | ||
1644 | __constant_cpu_to_be32(1 << 23)) != 0); | ||
1645 | break; | ||
1646 | |||
1647 | case OP(RDMA_WRITE_FIRST): | ||
1648 | case OP(RDMA_WRITE_ONLY): | ||
1649 | case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): | ||
1650 | /* consume RWQE */ | ||
1651 | /* RETH comes after BTH */ | ||
1652 | if (!header_in_data) | ||
1653 | reth = &ohdr->u.rc.reth; | ||
1654 | else { | ||
1655 | reth = (struct ib_reth *)data; | ||
1656 | data += sizeof(*reth); | ||
1657 | } | ||
1658 | hdrsize += sizeof(*reth); | ||
1659 | qp->r_len = be32_to_cpu(reth->length); | ||
1660 | qp->r_rcv_len = 0; | ||
1661 | if (qp->r_len != 0) { | ||
1662 | u32 rkey = be32_to_cpu(reth->rkey); | ||
1663 | u64 vaddr = be64_to_cpu(reth->vaddr); | ||
1664 | int ok; | ||
1665 | |||
1666 | /* Check rkey & NAK */ | ||
1667 | ok = ipath_rkey_ok(dev, &qp->r_sge, | ||
1668 | qp->r_len, vaddr, rkey, | ||
1669 | IB_ACCESS_REMOTE_WRITE); | ||
1670 | if (unlikely(!ok)) { | ||
1671 | nack_acc: | ||
1672 | /* | ||
1673 | * A NAK will ACK earlier sends and RDMA | ||
1674 | * writes. Don't queue the NAK if a RDMA | ||
1675 | * read, atomic, or NAK is pending though. | ||
1676 | */ | ||
1677 | spin_lock(&qp->s_lock); | ||
1678 | if (qp->s_ack_state >= | ||
1679 | OP(RDMA_READ_REQUEST) && | ||
1680 | qp->s_ack_state != | ||
1681 | IB_OPCODE_ACKNOWLEDGE) { | ||
1682 | spin_unlock(&qp->s_lock); | ||
1683 | goto done; | ||
1684 | } | ||
1685 | /* XXX Flush WQEs */ | ||
1686 | qp->state = IB_QPS_ERR; | ||
1687 | qp->s_ack_state = OP(RDMA_WRITE_ONLY); | ||
1688 | qp->s_nak_state = | ||
1689 | IB_NAK_REMOTE_ACCESS_ERROR; | ||
1690 | qp->s_ack_psn = qp->r_psn; | ||
1691 | goto resched; | ||
1692 | } | ||
1693 | } else { | ||
1694 | qp->r_sge.sg_list = NULL; | ||
1695 | qp->r_sge.sge.mr = NULL; | ||
1696 | qp->r_sge.sge.vaddr = NULL; | ||
1697 | qp->r_sge.sge.length = 0; | ||
1698 | qp->r_sge.sge.sge_length = 0; | ||
1699 | } | ||
1700 | if (unlikely(!(qp->qp_access_flags & | ||
1701 | IB_ACCESS_REMOTE_WRITE))) | ||
1702 | goto nack_acc; | ||
1703 | if (opcode == OP(RDMA_WRITE_FIRST)) | ||
1704 | goto send_middle; | ||
1705 | else if (opcode == OP(RDMA_WRITE_ONLY)) | ||
1706 | goto send_last; | ||
1707 | if (!ipath_get_rwqe(qp, 1)) | ||
1708 | goto rnr_nak; | ||
1709 | goto send_last_imm; | ||
1710 | |||
1711 | case OP(RDMA_READ_REQUEST): | ||
1712 | /* RETH comes after BTH */ | ||
1713 | if (!header_in_data) | ||
1714 | reth = &ohdr->u.rc.reth; | ||
1715 | else { | ||
1716 | reth = (struct ib_reth *)data; | ||
1717 | data += sizeof(*reth); | ||
1718 | } | ||
1719 | spin_lock(&qp->s_lock); | ||
1720 | if (qp->s_ack_state != OP(ACKNOWLEDGE) && | ||
1721 | qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) { | ||
1722 | spin_unlock(&qp->s_lock); | ||
1723 | goto done; | ||
1724 | } | ||
1725 | qp->s_rdma_len = be32_to_cpu(reth->length); | ||
1726 | if (qp->s_rdma_len != 0) { | ||
1727 | u32 rkey = be32_to_cpu(reth->rkey); | ||
1728 | u64 vaddr = be64_to_cpu(reth->vaddr); | ||
1729 | int ok; | ||
1730 | |||
1731 | /* Check rkey & NAK */ | ||
1732 | ok = ipath_rkey_ok(dev, &qp->s_rdma_sge, | ||
1733 | qp->s_rdma_len, vaddr, rkey, | ||
1734 | IB_ACCESS_REMOTE_READ); | ||
1735 | if (unlikely(!ok)) { | ||
1736 | spin_unlock(&qp->s_lock); | ||
1737 | goto nack_acc; | ||
1738 | } | ||
1739 | /* | ||
1740 | * Update the next expected PSN. We add 1 later | ||
1741 | * below, so only add the remainder here. | ||
1742 | */ | ||
1743 | if (qp->s_rdma_len > pmtu) | ||
1744 | qp->r_psn += (qp->s_rdma_len - 1) / pmtu; | ||
1745 | } else { | ||
1746 | qp->s_rdma_sge.sg_list = NULL; | ||
1747 | qp->s_rdma_sge.num_sge = 0; | ||
1748 | qp->s_rdma_sge.sge.mr = NULL; | ||
1749 | qp->s_rdma_sge.sge.vaddr = NULL; | ||
1750 | qp->s_rdma_sge.sge.length = 0; | ||
1751 | qp->s_rdma_sge.sge.sge_length = 0; | ||
1752 | } | ||
1753 | if (unlikely(!(qp->qp_access_flags & | ||
1754 | IB_ACCESS_REMOTE_READ))) | ||
1755 | goto nack_acc; | ||
1756 | /* | ||
1757 | * We need to increment the MSN here instead of when we | ||
1758 | * finish sending the result since a duplicate request would | ||
1759 | * increment it more than once. | ||
1760 | */ | ||
1761 | atomic_inc(&qp->msn); | ||
1762 | qp->s_ack_state = opcode; | ||
1763 | qp->s_nak_state = 0; | ||
1764 | qp->s_ack_psn = psn; | ||
1765 | qp->r_psn++; | ||
1766 | qp->r_state = opcode; | ||
1767 | goto rdmadone; | ||
1768 | |||
1769 | case OP(COMPARE_SWAP): | ||
1770 | case OP(FETCH_ADD): { | ||
1771 | struct ib_atomic_eth *ateth; | ||
1772 | u64 vaddr; | ||
1773 | u64 sdata; | ||
1774 | u32 rkey; | ||
1775 | |||
1776 | if (!header_in_data) | ||
1777 | ateth = &ohdr->u.atomic_eth; | ||
1778 | else { | ||
1779 | ateth = (struct ib_atomic_eth *)data; | ||
1780 | data += sizeof(*ateth); | ||
1781 | } | ||
1782 | vaddr = be64_to_cpu(ateth->vaddr); | ||
1783 | if (unlikely(vaddr & (sizeof(u64) - 1))) | ||
1784 | goto nack_inv; | ||
1785 | rkey = be32_to_cpu(ateth->rkey); | ||
1786 | /* Check rkey & NAK */ | ||
1787 | if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, | ||
1788 | sizeof(u64), vaddr, rkey, | ||
1789 | IB_ACCESS_REMOTE_ATOMIC))) | ||
1790 | goto nack_acc; | ||
1791 | if (unlikely(!(qp->qp_access_flags & | ||
1792 | IB_ACCESS_REMOTE_ATOMIC))) | ||
1793 | goto nack_acc; | ||
1794 | /* Perform atomic OP and save result. */ | ||
1795 | sdata = be64_to_cpu(ateth->swap_data); | ||
1796 | spin_lock(&dev->pending_lock); | ||
1797 | qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; | ||
1798 | if (opcode == OP(FETCH_ADD)) | ||
1799 | *(u64 *) qp->r_sge.sge.vaddr = | ||
1800 | qp->r_atomic_data + sdata; | ||
1801 | else if (qp->r_atomic_data == | ||
1802 | be64_to_cpu(ateth->compare_data)) | ||
1803 | *(u64 *) qp->r_sge.sge.vaddr = sdata; | ||
1804 | spin_unlock(&dev->pending_lock); | ||
1805 | atomic_inc(&qp->msn); | ||
1806 | qp->r_atomic_psn = psn & IPS_PSN_MASK; | ||
1807 | psn |= 1 << 31; | ||
1808 | break; | ||
1809 | } | ||
1810 | |||
1811 | default: | ||
1812 | /* Drop packet for unknown opcodes. */ | ||
1813 | goto done; | ||
1814 | } | ||
1815 | qp->r_psn++; | ||
1816 | qp->r_state = opcode; | ||
1817 | /* Send an ACK if requested or required. */ | ||
1818 | if (psn & (1 << 31)) { | ||
1819 | /* | ||
1820 | * Coalesce ACKs unless there is a RDMA READ or | ||
1821 | * ATOMIC pending. | ||
1822 | */ | ||
1823 | spin_lock(&qp->s_lock); | ||
1824 | if (qp->s_ack_state == OP(ACKNOWLEDGE) || | ||
1825 | qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) { | ||
1826 | qp->s_ack_state = opcode; | ||
1827 | qp->s_nak_state = 0; | ||
1828 | qp->s_ack_psn = psn; | ||
1829 | qp->s_ack_atomic = qp->r_atomic_data; | ||
1830 | goto resched; | ||
1831 | } | ||
1832 | spin_unlock(&qp->s_lock); | ||
1833 | } | ||
1834 | done: | ||
1835 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
1836 | goto bail; | ||
1837 | |||
1838 | resched: | ||
1839 | /* | ||
1840 | * Try to send ACK right away but not if ipath_do_rc_send() is | ||
1841 | * active. | ||
1842 | */ | ||
1843 | if (qp->s_hdrwords == 0 && | ||
1844 | (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST || | ||
1845 | qp->s_ack_state >= IB_OPCODE_COMPARE_SWAP)) | ||
1846 | send_rc_ack(qp); | ||
1847 | |||
1848 | rdmadone: | ||
1849 | spin_unlock(&qp->s_lock); | ||
1850 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
1851 | |||
1852 | /* Call ipath_do_rc_send() in another thread. */ | ||
1853 | tasklet_hi_schedule(&qp->s_task); | ||
1854 | |||
1855 | bail: | ||
1856 | return; | ||
1857 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h new file mode 100644 index 000000000000..1e59750c5f63 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_registers.h | |||
@@ -0,0 +1,446 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef _IPATH_REGISTERS_H | ||
34 | #define _IPATH_REGISTERS_H | ||
35 | |||
36 | /* | ||
37 | * This file should only be included by kernel source, and by the diags. | ||
38 | * It defines the registers, and their contents, for the InfiniPath HT-400 chip | ||
39 | */ | ||
40 | |||
41 | /* | ||
42 | * These are the InfiniPath register and buffer bit definitions, | ||
43 | * that are visible to software, and needed only by the kernel | ||
44 | * and diag code. A few, that are visible to protocol and user | ||
45 | * code are in ipath_common.h. Some bits are specific | ||
46 | * to a given chip implementation, and have been moved to the | ||
47 | * chip-specific source file | ||
48 | */ | ||
49 | |||
50 | /* kr_revision bits */ | ||
51 | #define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF | ||
52 | #define INFINIPATH_R_CHIPREVMINOR_SHIFT 0 | ||
53 | #define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF | ||
54 | #define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8 | ||
55 | #define INFINIPATH_R_ARCH_MASK 0xFF | ||
56 | #define INFINIPATH_R_ARCH_SHIFT 16 | ||
57 | #define INFINIPATH_R_SOFTWARE_MASK 0xFF | ||
58 | #define INFINIPATH_R_SOFTWARE_SHIFT 24 | ||
59 | #define INFINIPATH_R_BOARDID_MASK 0xFF | ||
60 | #define INFINIPATH_R_BOARDID_SHIFT 32 | ||
61 | |||
62 | /* kr_control bits */ | ||
63 | #define INFINIPATH_C_FREEZEMODE 0x00000002 | ||
64 | #define INFINIPATH_C_LINKENABLE 0x00000004 | ||
65 | #define INFINIPATH_C_RESET 0x00000001 | ||
66 | |||
67 | /* kr_sendctrl bits */ | ||
68 | #define INFINIPATH_S_DISARMPIOBUF_SHIFT 16 | ||
69 | |||
70 | #define IPATH_S_ABORT 0 | ||
71 | #define IPATH_S_PIOINTBUFAVAIL 1 | ||
72 | #define IPATH_S_PIOBUFAVAILUPD 2 | ||
73 | #define IPATH_S_PIOENABLE 3 | ||
74 | #define IPATH_S_DISARM 31 | ||
75 | |||
76 | #define INFINIPATH_S_ABORT (1U << IPATH_S_ABORT) | ||
77 | #define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL) | ||
78 | #define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD) | ||
79 | #define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE) | ||
80 | #define INFINIPATH_S_DISARM (1U << IPATH_S_DISARM) | ||
81 | |||
82 | /* kr_rcvctrl bits */ | ||
83 | #define INFINIPATH_R_PORTENABLE_SHIFT 0 | ||
84 | #define INFINIPATH_R_INTRAVAIL_SHIFT 16 | ||
85 | #define INFINIPATH_R_TAILUPD 0x80000000 | ||
86 | |||
87 | /* kr_intstatus, kr_intclear, kr_intmask bits */ | ||
88 | #define INFINIPATH_I_RCVURG_SHIFT 0 | ||
89 | #define INFINIPATH_I_RCVAVAIL_SHIFT 12 | ||
90 | #define INFINIPATH_I_ERROR 0x80000000 | ||
91 | #define INFINIPATH_I_SPIOSENT 0x40000000 | ||
92 | #define INFINIPATH_I_SPIOBUFAVAIL 0x20000000 | ||
93 | #define INFINIPATH_I_GPIO 0x10000000 | ||
94 | |||
95 | /* kr_errorstatus, kr_errorclear, kr_errormask bits */ | ||
96 | #define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL | ||
97 | #define INFINIPATH_E_RVCRC 0x0000000000000002ULL | ||
98 | #define INFINIPATH_E_RICRC 0x0000000000000004ULL | ||
99 | #define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL | ||
100 | #define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL | ||
101 | #define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL | ||
102 | #define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL | ||
103 | #define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL | ||
104 | #define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL | ||
105 | #define INFINIPATH_E_REBP 0x0000000000000200ULL | ||
106 | #define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL | ||
107 | #define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL | ||
108 | #define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL | ||
109 | #define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL | ||
110 | #define INFINIPATH_E_RBADTID 0x0000000000004000ULL | ||
111 | #define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL | ||
112 | #define INFINIPATH_E_RHDR 0x0000000000010000ULL | ||
113 | #define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL | ||
114 | #define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL | ||
115 | #define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL | ||
116 | #define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL | ||
117 | #define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL | ||
118 | #define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL | ||
119 | #define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL | ||
120 | #define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL | ||
121 | #define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL | ||
122 | #define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL | ||
123 | #define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL | ||
124 | #define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL | ||
125 | #define INFINIPATH_E_RESET 0x0004000000000000ULL | ||
126 | #define INFINIPATH_E_HARDWARE 0x0008000000000000ULL | ||
127 | |||
128 | /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ | ||
129 | /* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo | ||
130 | * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID | ||
131 | * bit 4: flag buffer, 5: datainfo, 6: header info */ | ||
132 | #define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL | ||
133 | #define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40 | ||
134 | #define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL | ||
135 | #define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44 | ||
136 | #define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL | ||
137 | #define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL | ||
138 | #define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL | ||
139 | #define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL | ||
140 | |||
141 | /* kr_hwdiagctrl bits */ | ||
142 | #define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL | ||
143 | #define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40 | ||
144 | #define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL | ||
145 | #define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44 | ||
146 | #define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR 0x0000000400000000ULL | ||
147 | #define INFINIPATH_DC_COUNTERDISABLE 0x1000000000000000ULL | ||
148 | #define INFINIPATH_DC_COUNTERWREN 0x2000000000000000ULL | ||
149 | #define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL | ||
150 | #define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL | ||
151 | |||
152 | /* kr_ibcctrl bits */ | ||
153 | #define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL | ||
154 | #define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0 | ||
155 | #define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL | ||
156 | #define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8 | ||
157 | #define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL | ||
158 | #define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1 | ||
159 | #define INFINIPATH_IBCC_LINKINITCMD_POLL 2 /* cycle through TS1/TS2 till OK */ | ||
160 | #define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3 /* wait for TS1, then go on */ | ||
161 | #define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16 | ||
162 | #define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL | ||
163 | #define INFINIPATH_IBCC_LINKCMD_INIT 1 /* move to 0x11 */ | ||
164 | #define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ | ||
165 | #define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ | ||
166 | #define INFINIPATH_IBCC_LINKCMD_SHIFT 18 | ||
167 | #define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL | ||
168 | #define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20 | ||
169 | #define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL | ||
170 | #define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32 | ||
171 | #define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL | ||
172 | #define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36 | ||
173 | #define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL | ||
174 | #define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40 | ||
175 | #define INFINIPATH_IBCC_LOOPBACK 0x8000000000000000ULL | ||
176 | #define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL | ||
177 | |||
178 | /* kr_ibcstatus bits */ | ||
179 | #define INFINIPATH_IBCS_LINKTRAININGSTATE_MASK 0xF | ||
180 | #define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0 | ||
181 | #define INFINIPATH_IBCS_LINKSTATE_MASK 0x7 | ||
182 | #define INFINIPATH_IBCS_LINKSTATE_SHIFT 4 | ||
183 | #define INFINIPATH_IBCS_TXREADY 0x40000000 | ||
184 | #define INFINIPATH_IBCS_TXCREDITOK 0x80000000 | ||
185 | /* link training states (shift by INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */ | ||
186 | #define INFINIPATH_IBCS_LT_STATE_DISABLED 0x00 | ||
187 | #define INFINIPATH_IBCS_LT_STATE_LINKUP 0x01 | ||
188 | #define INFINIPATH_IBCS_LT_STATE_POLLACTIVE 0x02 | ||
189 | #define INFINIPATH_IBCS_LT_STATE_POLLQUIET 0x03 | ||
190 | #define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY 0x04 | ||
191 | #define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET 0x05 | ||
192 | #define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE 0x08 | ||
193 | #define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG 0x09 | ||
194 | #define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT 0x0a | ||
195 | #define INFINIPATH_IBCS_LT_STATE_CFGIDLE 0x0b | ||
196 | #define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c | ||
197 | #define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e | ||
198 | #define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f | ||
199 | /* link state machine states (shift by INFINIPATH_IBCS_LINKSTATE_SHIFT) */ | ||
200 | #define INFINIPATH_IBCS_L_STATE_DOWN 0x0 | ||
201 | #define INFINIPATH_IBCS_L_STATE_INIT 0x1 | ||
202 | #define INFINIPATH_IBCS_L_STATE_ARM 0x2 | ||
203 | #define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3 | ||
204 | #define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4 | ||
205 | |||
206 | /* combination link status states that we use with some frequency */ | ||
207 | #define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \ | ||
208 | << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \ | ||
209 | (INFINIPATH_IBCS_LINKSTATE_MASK \ | ||
210 | <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)) | ||
211 | #define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \ | ||
212 | << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \ | ||
213 | (INFINIPATH_IBCS_LT_STATE_LINKUP \ | ||
214 | <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)) | ||
215 | #define IPATH_IBSTATE_ARM ((INFINIPATH_IBCS_L_STATE_ARM \ | ||
216 | << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \ | ||
217 | (INFINIPATH_IBCS_LT_STATE_LINKUP \ | ||
218 | <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)) | ||
219 | #define IPATH_IBSTATE_ACTIVE ((INFINIPATH_IBCS_L_STATE_ACTIVE \ | ||
220 | << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \ | ||
221 | (INFINIPATH_IBCS_LT_STATE_LINKUP \ | ||
222 | <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)) | ||
223 | |||
224 | /* kr_extstatus bits */ | ||
225 | #define INFINIPATH_EXTS_SERDESPLLLOCK 0x1 | ||
226 | #define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL | ||
227 | #define INFINIPATH_EXTS_GPIOIN_SHIFT 48 | ||
228 | |||
229 | /* kr_extctrl bits */ | ||
230 | #define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL | ||
231 | #define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32 | ||
232 | #define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL | ||
233 | #define INFINIPATH_EXTC_GPIOOE_SHIFT 48 | ||
234 | #define INFINIPATH_EXTC_SERDESENABLE 0x80000000ULL | ||
235 | #define INFINIPATH_EXTC_SERDESCONNECT 0x40000000ULL | ||
236 | #define INFINIPATH_EXTC_SERDESENTRUNKING 0x20000000ULL | ||
237 | #define INFINIPATH_EXTC_SERDESDISRXFIFO 0x10000000ULL | ||
238 | #define INFINIPATH_EXTC_SERDESENPLPBK1 0x08000000ULL | ||
239 | #define INFINIPATH_EXTC_SERDESENPLPBK2 0x04000000ULL | ||
240 | #define INFINIPATH_EXTC_SERDESENENCDEC 0x02000000ULL | ||
241 | #define INFINIPATH_EXTC_LED1SECPORT_ON 0x00000020ULL | ||
242 | #define INFINIPATH_EXTC_LED2SECPORT_ON 0x00000010ULL | ||
243 | #define INFINIPATH_EXTC_LED1PRIPORT_ON 0x00000008ULL | ||
244 | #define INFINIPATH_EXTC_LED2PRIPORT_ON 0x00000004ULL | ||
245 | #define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL | ||
246 | #define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL | ||
247 | |||
248 | /* kr_mdio bits */ | ||
249 | #define INFINIPATH_MDIO_CLKDIV_MASK 0x7FULL | ||
250 | #define INFINIPATH_MDIO_CLKDIV_SHIFT 32 | ||
251 | #define INFINIPATH_MDIO_COMMAND_MASK 0x7ULL | ||
252 | #define INFINIPATH_MDIO_COMMAND_SHIFT 26 | ||
253 | #define INFINIPATH_MDIO_DEVADDR_MASK 0x1FULL | ||
254 | #define INFINIPATH_MDIO_DEVADDR_SHIFT 21 | ||
255 | #define INFINIPATH_MDIO_REGADDR_MASK 0x1FULL | ||
256 | #define INFINIPATH_MDIO_REGADDR_SHIFT 16 | ||
257 | #define INFINIPATH_MDIO_DATA_MASK 0xFFFFULL | ||
258 | #define INFINIPATH_MDIO_DATA_SHIFT 0 | ||
259 | #define INFINIPATH_MDIO_CMDVALID 0x0000000040000000ULL | ||
260 | #define INFINIPATH_MDIO_RDDATAVALID 0x0000000080000000ULL | ||
261 | |||
262 | /* kr_partitionkey bits */ | ||
263 | #define INFINIPATH_PKEY_SIZE 16 | ||
264 | #define INFINIPATH_PKEY_MASK 0xFFFF | ||
265 | #define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF | ||
266 | |||
267 | /* kr_serdesconfig0 bits */ | ||
268 | #define INFINIPATH_SERDC0_RESET_MASK 0xfULL /* overal reset bits */ | ||
269 | #define INFINIPATH_SERDC0_RESET_PLL 0x10000000ULL /* pll reset */ | ||
270 | #define INFINIPATH_SERDC0_TXIDLE 0xF000ULL /* tx idle enables (per lane) */ | ||
271 | #define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL /* rx detect enables (per lane) */ | ||
272 | #define INFINIPATH_SERDC0_L1PWR_DN 0xF0ULL /* L1 Power down; use with RXDETECT, | ||
273 | Otherwise not used on IB side */ | ||
274 | |||
275 | /* kr_xgxsconfig bits */ | ||
276 | #define INFINIPATH_XGXS_RESET 0x7ULL | ||
277 | #define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL | ||
278 | #define INFINIPATH_XGXS_MDIOADDR_SHIFT 4 | ||
279 | |||
280 | #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ | ||
281 | |||
282 | /* TID entries (memory), HT400-only */ | ||
283 | #define INFINIPATH_RT_VALID 0x8000000000000000ULL | ||
284 | #define INFINIPATH_RT_ADDR_SHIFT 0 | ||
285 | #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF | ||
286 | #define INFINIPATH_RT_BUFSIZE_SHIFT 48 | ||
287 | |||
288 | /* | ||
289 | * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our | ||
290 | * PIO send buffers. This is well beyond anything currently | ||
291 | * defined in the InfiniBand spec. | ||
292 | */ | ||
293 | #define IPATH_PIO_MAXIBHDR 128 | ||
294 | |||
295 | typedef u64 ipath_err_t; | ||
296 | |||
297 | /* mask of defined bits for various registers */ | ||
298 | extern u64 infinipath_i_bitsextant; | ||
299 | extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant; | ||
300 | |||
301 | /* masks that are different in various chips, or only exist in some chips */ | ||
302 | extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask; | ||
303 | |||
304 | /* | ||
305 | * register bits for selecting i2c direction and values, used for I2C serial | ||
306 | * flash | ||
307 | */ | ||
308 | extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num; | ||
309 | extern u64 ipath_gpio_sda, ipath_gpio_scl; | ||
310 | |||
311 | /* | ||
312 | * These are the infinipath general register numbers (not offsets). | ||
313 | * The kernel registers are used directly, those beyond the kernel | ||
314 | * registers are calculated from one of the base registers. The use of | ||
315 | * an integer type doesn't allow type-checking as thorough as, say, | ||
316 | * an enum but allows for better hiding of chip differences. | ||
317 | */ | ||
318 | typedef const u16 ipath_kreg, /* infinipath general registers */ | ||
319 | ipath_creg, /* infinipath counter registers */ | ||
320 | ipath_sreg; /* kernel-only, infinipath send registers */ | ||
321 | |||
322 | /* | ||
323 | * These are the chip registers common to all infinipath chips, and | ||
324 | * used both by the kernel and the diagnostics or other user code. | ||
325 | * They are all implemented such that 64 bit accesses work. | ||
326 | * Some implement no more than 32 bits. Because 64 bit reads | ||
327 | * require 2 HT cmds on opteron, we access those with 32 bit | ||
328 | * reads for efficiency (they are written as 64 bits, since | ||
329 | * the extra 32 bits are nearly free on writes, and it slightly reduces | ||
330 | * complexity). The rest are all accessed as 64 bits. | ||
331 | */ | ||
332 | struct ipath_kregs { | ||
333 | /* These are the 32 bit group */ | ||
334 | ipath_kreg kr_control; | ||
335 | ipath_kreg kr_counterregbase; | ||
336 | ipath_kreg kr_intmask; | ||
337 | ipath_kreg kr_intstatus; | ||
338 | ipath_kreg kr_pagealign; | ||
339 | ipath_kreg kr_portcnt; | ||
340 | ipath_kreg kr_rcvtidbase; | ||
341 | ipath_kreg kr_rcvtidcnt; | ||
342 | ipath_kreg kr_rcvegrbase; | ||
343 | ipath_kreg kr_rcvegrcnt; | ||
344 | ipath_kreg kr_scratch; | ||
345 | ipath_kreg kr_sendctrl; | ||
346 | ipath_kreg kr_sendpiobufbase; | ||
347 | ipath_kreg kr_sendpiobufcnt; | ||
348 | ipath_kreg kr_sendpiosize; | ||
349 | ipath_kreg kr_sendregbase; | ||
350 | ipath_kreg kr_userregbase; | ||
351 | /* These are the 64 bit group */ | ||
352 | ipath_kreg kr_debugport; | ||
353 | ipath_kreg kr_debugportselect; | ||
354 | ipath_kreg kr_errorclear; | ||
355 | ipath_kreg kr_errormask; | ||
356 | ipath_kreg kr_errorstatus; | ||
357 | ipath_kreg kr_extctrl; | ||
358 | ipath_kreg kr_extstatus; | ||
359 | ipath_kreg kr_gpio_clear; | ||
360 | ipath_kreg kr_gpio_mask; | ||
361 | ipath_kreg kr_gpio_out; | ||
362 | ipath_kreg kr_gpio_status; | ||
363 | ipath_kreg kr_hwdiagctrl; | ||
364 | ipath_kreg kr_hwerrclear; | ||
365 | ipath_kreg kr_hwerrmask; | ||
366 | ipath_kreg kr_hwerrstatus; | ||
367 | ipath_kreg kr_ibcctrl; | ||
368 | ipath_kreg kr_ibcstatus; | ||
369 | ipath_kreg kr_intblocked; | ||
370 | ipath_kreg kr_intclear; | ||
371 | ipath_kreg kr_interruptconfig; | ||
372 | ipath_kreg kr_mdio; | ||
373 | ipath_kreg kr_partitionkey; | ||
374 | ipath_kreg kr_rcvbthqp; | ||
375 | ipath_kreg kr_rcvbufbase; | ||
376 | ipath_kreg kr_rcvbufsize; | ||
377 | ipath_kreg kr_rcvctrl; | ||
378 | ipath_kreg kr_rcvhdrcnt; | ||
379 | ipath_kreg kr_rcvhdrentsize; | ||
380 | ipath_kreg kr_rcvhdrsize; | ||
381 | ipath_kreg kr_rcvintmembase; | ||
382 | ipath_kreg kr_rcvintmemsize; | ||
383 | ipath_kreg kr_revision; | ||
384 | ipath_kreg kr_sendbuffererror; | ||
385 | ipath_kreg kr_sendpioavailaddr; | ||
386 | ipath_kreg kr_serdesconfig0; | ||
387 | ipath_kreg kr_serdesconfig1; | ||
388 | ipath_kreg kr_serdesstatus; | ||
389 | ipath_kreg kr_txintmembase; | ||
390 | ipath_kreg kr_txintmemsize; | ||
391 | ipath_kreg kr_xgxsconfig; | ||
392 | ipath_kreg kr_ibpllcfg; | ||
393 | /* use these two (and the following N ports) only with ipath_k*_kreg64_port(); | ||
394 | * not *kreg64() */ | ||
395 | ipath_kreg kr_rcvhdraddr; | ||
396 | ipath_kreg kr_rcvhdrtailaddr; | ||
397 | |||
398 | /* remaining registers are not present on all types of infinipath chips */ | ||
399 | ipath_kreg kr_rcvpktledcnt; | ||
400 | ipath_kreg kr_pcierbuftestreg0; | ||
401 | ipath_kreg kr_pcierbuftestreg1; | ||
402 | ipath_kreg kr_pcieq0serdesconfig0; | ||
403 | ipath_kreg kr_pcieq0serdesconfig1; | ||
404 | ipath_kreg kr_pcieq0serdesstatus; | ||
405 | ipath_kreg kr_pcieq1serdesconfig0; | ||
406 | ipath_kreg kr_pcieq1serdesconfig1; | ||
407 | ipath_kreg kr_pcieq1serdesstatus; | ||
408 | }; | ||
409 | |||
410 | struct ipath_cregs { | ||
411 | ipath_creg cr_badformatcnt; | ||
412 | ipath_creg cr_erricrccnt; | ||
413 | ipath_creg cr_errlinkcnt; | ||
414 | ipath_creg cr_errlpcrccnt; | ||
415 | ipath_creg cr_errpkey; | ||
416 | ipath_creg cr_errrcvflowctrlcnt; | ||
417 | ipath_creg cr_err_rlencnt; | ||
418 | ipath_creg cr_errslencnt; | ||
419 | ipath_creg cr_errtidfull; | ||
420 | ipath_creg cr_errtidvalid; | ||
421 | ipath_creg cr_errvcrccnt; | ||
422 | ipath_creg cr_ibstatuschange; | ||
423 | ipath_creg cr_intcnt; | ||
424 | ipath_creg cr_invalidrlencnt; | ||
425 | ipath_creg cr_invalidslencnt; | ||
426 | ipath_creg cr_lbflowstallcnt; | ||
427 | ipath_creg cr_iblinkdowncnt; | ||
428 | ipath_creg cr_iblinkerrrecovcnt; | ||
429 | ipath_creg cr_ibsymbolerrcnt; | ||
430 | ipath_creg cr_pktrcvcnt; | ||
431 | ipath_creg cr_pktrcvflowctrlcnt; | ||
432 | ipath_creg cr_pktsendcnt; | ||
433 | ipath_creg cr_pktsendflowcnt; | ||
434 | ipath_creg cr_portovflcnt; | ||
435 | ipath_creg cr_rcvebpcnt; | ||
436 | ipath_creg cr_rcvovflcnt; | ||
437 | ipath_creg cr_rxdroppktcnt; | ||
438 | ipath_creg cr_senddropped; | ||
439 | ipath_creg cr_sendstallcnt; | ||
440 | ipath_creg cr_sendunderruncnt; | ||
441 | ipath_creg cr_unsupvlcnt; | ||
442 | ipath_creg cr_wordrcvcnt; | ||
443 | ipath_creg cr_wordsendcnt; | ||
444 | }; | ||
445 | |||
446 | #endif /* _IPATH_REGISTERS_H */ | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c new file mode 100644 index 000000000000..f232e77b78ee --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c | |||
@@ -0,0 +1,552 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include "ipath_verbs.h" | ||
34 | |||
35 | /* | ||
36 | * Convert the AETH RNR timeout code into the number of milliseconds. | ||
37 | */ | ||
38 | const u32 ib_ipath_rnr_table[32] = { | ||
39 | 656, /* 0 */ | ||
40 | 1, /* 1 */ | ||
41 | 1, /* 2 */ | ||
42 | 1, /* 3 */ | ||
43 | 1, /* 4 */ | ||
44 | 1, /* 5 */ | ||
45 | 1, /* 6 */ | ||
46 | 1, /* 7 */ | ||
47 | 1, /* 8 */ | ||
48 | 1, /* 9 */ | ||
49 | 1, /* A */ | ||
50 | 1, /* B */ | ||
51 | 1, /* C */ | ||
52 | 1, /* D */ | ||
53 | 2, /* E */ | ||
54 | 2, /* F */ | ||
55 | 3, /* 10 */ | ||
56 | 4, /* 11 */ | ||
57 | 6, /* 12 */ | ||
58 | 8, /* 13 */ | ||
59 | 11, /* 14 */ | ||
60 | 16, /* 15 */ | ||
61 | 21, /* 16 */ | ||
62 | 31, /* 17 */ | ||
63 | 41, /* 18 */ | ||
64 | 62, /* 19 */ | ||
65 | 82, /* 1A */ | ||
66 | 123, /* 1B */ | ||
67 | 164, /* 1C */ | ||
68 | 246, /* 1D */ | ||
69 | 328, /* 1E */ | ||
70 | 492 /* 1F */ | ||
71 | }; | ||
72 | |||
73 | /** | ||
74 | * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device | ||
75 | * @qp: the QP | ||
76 | * | ||
77 | * XXX Use a simple list for now. We might need a priority | ||
78 | * queue if we have lots of QPs waiting for RNR timeouts | ||
79 | * but that should be rare. | ||
80 | */ | ||
81 | void ipath_insert_rnr_queue(struct ipath_qp *qp) | ||
82 | { | ||
83 | struct ipath_ibdev *dev = to_idev(qp->ibqp.device); | ||
84 | unsigned long flags; | ||
85 | |||
86 | spin_lock_irqsave(&dev->pending_lock, flags); | ||
87 | if (list_empty(&dev->rnrwait)) | ||
88 | list_add(&qp->timerwait, &dev->rnrwait); | ||
89 | else { | ||
90 | struct list_head *l = &dev->rnrwait; | ||
91 | struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp, | ||
92 | timerwait); | ||
93 | |||
94 | while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) { | ||
95 | qp->s_rnr_timeout -= nqp->s_rnr_timeout; | ||
96 | l = l->next; | ||
97 | if (l->next == &dev->rnrwait) | ||
98 | break; | ||
99 | nqp = list_entry(l->next, struct ipath_qp, | ||
100 | timerwait); | ||
101 | } | ||
102 | list_add(&qp->timerwait, l); | ||
103 | } | ||
104 | spin_unlock_irqrestore(&dev->pending_lock, flags); | ||
105 | } | ||
106 | |||
107 | /** | ||
108 | * ipath_get_rwqe - copy the next RWQE into the QP's RWQE | ||
109 | * @qp: the QP | ||
110 | * @wr_id_only: update wr_id only, not SGEs | ||
111 | * | ||
112 | * Return 0 if no RWQE is available, otherwise return 1. | ||
113 | * | ||
114 | * Called at interrupt level with the QP r_rq.lock held. | ||
115 | */ | ||
116 | int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) | ||
117 | { | ||
118 | struct ipath_rq *rq; | ||
119 | struct ipath_srq *srq; | ||
120 | struct ipath_rwqe *wqe; | ||
121 | int ret; | ||
122 | |||
123 | if (!qp->ibqp.srq) { | ||
124 | rq = &qp->r_rq; | ||
125 | if (unlikely(rq->tail == rq->head)) { | ||
126 | ret = 0; | ||
127 | goto bail; | ||
128 | } | ||
129 | wqe = get_rwqe_ptr(rq, rq->tail); | ||
130 | qp->r_wr_id = wqe->wr_id; | ||
131 | if (!wr_id_only) { | ||
132 | qp->r_sge.sge = wqe->sg_list[0]; | ||
133 | qp->r_sge.sg_list = wqe->sg_list + 1; | ||
134 | qp->r_sge.num_sge = wqe->num_sge; | ||
135 | qp->r_len = wqe->length; | ||
136 | } | ||
137 | if (++rq->tail >= rq->size) | ||
138 | rq->tail = 0; | ||
139 | ret = 1; | ||
140 | goto bail; | ||
141 | } | ||
142 | |||
143 | srq = to_isrq(qp->ibqp.srq); | ||
144 | rq = &srq->rq; | ||
145 | spin_lock(&rq->lock); | ||
146 | if (unlikely(rq->tail == rq->head)) { | ||
147 | spin_unlock(&rq->lock); | ||
148 | ret = 0; | ||
149 | goto bail; | ||
150 | } | ||
151 | wqe = get_rwqe_ptr(rq, rq->tail); | ||
152 | qp->r_wr_id = wqe->wr_id; | ||
153 | if (!wr_id_only) { | ||
154 | qp->r_sge.sge = wqe->sg_list[0]; | ||
155 | qp->r_sge.sg_list = wqe->sg_list + 1; | ||
156 | qp->r_sge.num_sge = wqe->num_sge; | ||
157 | qp->r_len = wqe->length; | ||
158 | } | ||
159 | if (++rq->tail >= rq->size) | ||
160 | rq->tail = 0; | ||
161 | if (srq->ibsrq.event_handler) { | ||
162 | struct ib_event ev; | ||
163 | u32 n; | ||
164 | |||
165 | if (rq->head < rq->tail) | ||
166 | n = rq->size + rq->head - rq->tail; | ||
167 | else | ||
168 | n = rq->head - rq->tail; | ||
169 | if (n < srq->limit) { | ||
170 | srq->limit = 0; | ||
171 | spin_unlock(&rq->lock); | ||
172 | ev.device = qp->ibqp.device; | ||
173 | ev.element.srq = qp->ibqp.srq; | ||
174 | ev.event = IB_EVENT_SRQ_LIMIT_REACHED; | ||
175 | srq->ibsrq.event_handler(&ev, | ||
176 | srq->ibsrq.srq_context); | ||
177 | } else | ||
178 | spin_unlock(&rq->lock); | ||
179 | } else | ||
180 | spin_unlock(&rq->lock); | ||
181 | ret = 1; | ||
182 | |||
183 | bail: | ||
184 | return ret; | ||
185 | } | ||
186 | |||
187 | /** | ||
188 | * ipath_ruc_loopback - handle UC and RC lookback requests | ||
189 | * @sqp: the loopback QP | ||
190 | * @wc: the work completion entry | ||
191 | * | ||
192 | * This is called from ipath_do_uc_send() or ipath_do_rc_send() to | ||
193 | * forward a WQE addressed to the same HCA. | ||
194 | * Note that although we are single threaded due to the tasklet, we still | ||
195 | * have to protect against post_send(). We don't have to worry about | ||
196 | * receive interrupts since this is a connected protocol and all packets | ||
197 | * will pass through here. | ||
198 | */ | ||
199 | void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc) | ||
200 | { | ||
201 | struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); | ||
202 | struct ipath_qp *qp; | ||
203 | struct ipath_swqe *wqe; | ||
204 | struct ipath_sge *sge; | ||
205 | unsigned long flags; | ||
206 | u64 sdata; | ||
207 | |||
208 | qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); | ||
209 | if (!qp) { | ||
210 | dev->n_pkt_drops++; | ||
211 | return; | ||
212 | } | ||
213 | |||
214 | again: | ||
215 | spin_lock_irqsave(&sqp->s_lock, flags); | ||
216 | |||
217 | if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) { | ||
218 | spin_unlock_irqrestore(&sqp->s_lock, flags); | ||
219 | goto done; | ||
220 | } | ||
221 | |||
222 | /* Get the next send request. */ | ||
223 | if (sqp->s_last == sqp->s_head) { | ||
224 | /* Send work queue is empty. */ | ||
225 | spin_unlock_irqrestore(&sqp->s_lock, flags); | ||
226 | goto done; | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * We can rely on the entry not changing without the s_lock | ||
231 | * being held until we update s_last. | ||
232 | */ | ||
233 | wqe = get_swqe_ptr(sqp, sqp->s_last); | ||
234 | spin_unlock_irqrestore(&sqp->s_lock, flags); | ||
235 | |||
236 | wc->wc_flags = 0; | ||
237 | wc->imm_data = 0; | ||
238 | |||
239 | sqp->s_sge.sge = wqe->sg_list[0]; | ||
240 | sqp->s_sge.sg_list = wqe->sg_list + 1; | ||
241 | sqp->s_sge.num_sge = wqe->wr.num_sge; | ||
242 | sqp->s_len = wqe->length; | ||
243 | switch (wqe->wr.opcode) { | ||
244 | case IB_WR_SEND_WITH_IMM: | ||
245 | wc->wc_flags = IB_WC_WITH_IMM; | ||
246 | wc->imm_data = wqe->wr.imm_data; | ||
247 | /* FALLTHROUGH */ | ||
248 | case IB_WR_SEND: | ||
249 | spin_lock_irqsave(&qp->r_rq.lock, flags); | ||
250 | if (!ipath_get_rwqe(qp, 0)) { | ||
251 | rnr_nak: | ||
252 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
253 | /* Handle RNR NAK */ | ||
254 | if (qp->ibqp.qp_type == IB_QPT_UC) | ||
255 | goto send_comp; | ||
256 | if (sqp->s_rnr_retry == 0) { | ||
257 | wc->status = IB_WC_RNR_RETRY_EXC_ERR; | ||
258 | goto err; | ||
259 | } | ||
260 | if (sqp->s_rnr_retry_cnt < 7) | ||
261 | sqp->s_rnr_retry--; | ||
262 | dev->n_rnr_naks++; | ||
263 | sqp->s_rnr_timeout = | ||
264 | ib_ipath_rnr_table[sqp->s_min_rnr_timer]; | ||
265 | ipath_insert_rnr_queue(sqp); | ||
266 | goto done; | ||
267 | } | ||
268 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
269 | break; | ||
270 | |||
271 | case IB_WR_RDMA_WRITE_WITH_IMM: | ||
272 | wc->wc_flags = IB_WC_WITH_IMM; | ||
273 | wc->imm_data = wqe->wr.imm_data; | ||
274 | spin_lock_irqsave(&qp->r_rq.lock, flags); | ||
275 | if (!ipath_get_rwqe(qp, 1)) | ||
276 | goto rnr_nak; | ||
277 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
278 | /* FALLTHROUGH */ | ||
279 | case IB_WR_RDMA_WRITE: | ||
280 | if (wqe->length == 0) | ||
281 | break; | ||
282 | if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length, | ||
283 | wqe->wr.wr.rdma.remote_addr, | ||
284 | wqe->wr.wr.rdma.rkey, | ||
285 | IB_ACCESS_REMOTE_WRITE))) { | ||
286 | acc_err: | ||
287 | wc->status = IB_WC_REM_ACCESS_ERR; | ||
288 | err: | ||
289 | wc->wr_id = wqe->wr.wr_id; | ||
290 | wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; | ||
291 | wc->vendor_err = 0; | ||
292 | wc->byte_len = 0; | ||
293 | wc->qp_num = sqp->ibqp.qp_num; | ||
294 | wc->src_qp = sqp->remote_qpn; | ||
295 | wc->pkey_index = 0; | ||
296 | wc->slid = sqp->remote_ah_attr.dlid; | ||
297 | wc->sl = sqp->remote_ah_attr.sl; | ||
298 | wc->dlid_path_bits = 0; | ||
299 | wc->port_num = 0; | ||
300 | ipath_sqerror_qp(sqp, wc); | ||
301 | goto done; | ||
302 | } | ||
303 | break; | ||
304 | |||
305 | case IB_WR_RDMA_READ: | ||
306 | if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length, | ||
307 | wqe->wr.wr.rdma.remote_addr, | ||
308 | wqe->wr.wr.rdma.rkey, | ||
309 | IB_ACCESS_REMOTE_READ))) | ||
310 | goto acc_err; | ||
311 | if (unlikely(!(qp->qp_access_flags & | ||
312 | IB_ACCESS_REMOTE_READ))) | ||
313 | goto acc_err; | ||
314 | qp->r_sge.sge = wqe->sg_list[0]; | ||
315 | qp->r_sge.sg_list = wqe->sg_list + 1; | ||
316 | qp->r_sge.num_sge = wqe->wr.num_sge; | ||
317 | break; | ||
318 | |||
319 | case IB_WR_ATOMIC_CMP_AND_SWP: | ||
320 | case IB_WR_ATOMIC_FETCH_AND_ADD: | ||
321 | if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64), | ||
322 | wqe->wr.wr.rdma.remote_addr, | ||
323 | wqe->wr.wr.rdma.rkey, | ||
324 | IB_ACCESS_REMOTE_ATOMIC))) | ||
325 | goto acc_err; | ||
326 | /* Perform atomic OP and save result. */ | ||
327 | sdata = wqe->wr.wr.atomic.swap; | ||
328 | spin_lock_irqsave(&dev->pending_lock, flags); | ||
329 | qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr; | ||
330 | if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) | ||
331 | *(u64 *) qp->r_sge.sge.vaddr = | ||
332 | qp->r_atomic_data + sdata; | ||
333 | else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add) | ||
334 | *(u64 *) qp->r_sge.sge.vaddr = sdata; | ||
335 | spin_unlock_irqrestore(&dev->pending_lock, flags); | ||
336 | *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data; | ||
337 | goto send_comp; | ||
338 | |||
339 | default: | ||
340 | goto done; | ||
341 | } | ||
342 | |||
343 | sge = &sqp->s_sge.sge; | ||
344 | while (sqp->s_len) { | ||
345 | u32 len = sqp->s_len; | ||
346 | |||
347 | if (len > sge->length) | ||
348 | len = sge->length; | ||
349 | BUG_ON(len == 0); | ||
350 | ipath_copy_sge(&qp->r_sge, sge->vaddr, len); | ||
351 | sge->vaddr += len; | ||
352 | sge->length -= len; | ||
353 | sge->sge_length -= len; | ||
354 | if (sge->sge_length == 0) { | ||
355 | if (--sqp->s_sge.num_sge) | ||
356 | *sge = *sqp->s_sge.sg_list++; | ||
357 | } else if (sge->length == 0 && sge->mr != NULL) { | ||
358 | if (++sge->n >= IPATH_SEGSZ) { | ||
359 | if (++sge->m >= sge->mr->mapsz) | ||
360 | break; | ||
361 | sge->n = 0; | ||
362 | } | ||
363 | sge->vaddr = | ||
364 | sge->mr->map[sge->m]->segs[sge->n].vaddr; | ||
365 | sge->length = | ||
366 | sge->mr->map[sge->m]->segs[sge->n].length; | ||
367 | } | ||
368 | sqp->s_len -= len; | ||
369 | } | ||
370 | |||
371 | if (wqe->wr.opcode == IB_WR_RDMA_WRITE || | ||
372 | wqe->wr.opcode == IB_WR_RDMA_READ) | ||
373 | goto send_comp; | ||
374 | |||
375 | if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) | ||
376 | wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; | ||
377 | else | ||
378 | wc->opcode = IB_WC_RECV; | ||
379 | wc->wr_id = qp->r_wr_id; | ||
380 | wc->status = IB_WC_SUCCESS; | ||
381 | wc->vendor_err = 0; | ||
382 | wc->byte_len = wqe->length; | ||
383 | wc->qp_num = qp->ibqp.qp_num; | ||
384 | wc->src_qp = qp->remote_qpn; | ||
385 | /* XXX do we know which pkey matched? Only needed for GSI. */ | ||
386 | wc->pkey_index = 0; | ||
387 | wc->slid = qp->remote_ah_attr.dlid; | ||
388 | wc->sl = qp->remote_ah_attr.sl; | ||
389 | wc->dlid_path_bits = 0; | ||
390 | /* Signal completion event if the solicited bit is set. */ | ||
391 | ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc, | ||
392 | wqe->wr.send_flags & IB_SEND_SOLICITED); | ||
393 | |||
394 | send_comp: | ||
395 | sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; | ||
396 | |||
397 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) || | ||
398 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { | ||
399 | wc->wr_id = wqe->wr.wr_id; | ||
400 | wc->status = IB_WC_SUCCESS; | ||
401 | wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; | ||
402 | wc->vendor_err = 0; | ||
403 | wc->byte_len = wqe->length; | ||
404 | wc->qp_num = sqp->ibqp.qp_num; | ||
405 | wc->src_qp = 0; | ||
406 | wc->pkey_index = 0; | ||
407 | wc->slid = 0; | ||
408 | wc->sl = 0; | ||
409 | wc->dlid_path_bits = 0; | ||
410 | wc->port_num = 0; | ||
411 | ipath_cq_enter(to_icq(sqp->ibqp.send_cq), wc, 0); | ||
412 | } | ||
413 | |||
414 | /* Update s_last now that we are finished with the SWQE */ | ||
415 | spin_lock_irqsave(&sqp->s_lock, flags); | ||
416 | if (++sqp->s_last >= sqp->s_size) | ||
417 | sqp->s_last = 0; | ||
418 | spin_unlock_irqrestore(&sqp->s_lock, flags); | ||
419 | goto again; | ||
420 | |||
421 | done: | ||
422 | if (atomic_dec_and_test(&qp->refcount)) | ||
423 | wake_up(&qp->wait); | ||
424 | } | ||
425 | |||
426 | /** | ||
427 | * ipath_no_bufs_available - tell the layer driver we need buffers | ||
428 | * @qp: the QP that caused the problem | ||
429 | * @dev: the device we ran out of buffers on | ||
430 | * | ||
431 | * Called when we run out of PIO buffers. | ||
432 | */ | ||
433 | void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev) | ||
434 | { | ||
435 | unsigned long flags; | ||
436 | |||
437 | spin_lock_irqsave(&dev->pending_lock, flags); | ||
438 | if (qp->piowait.next == LIST_POISON1) | ||
439 | list_add_tail(&qp->piowait, &dev->piowait); | ||
440 | spin_unlock_irqrestore(&dev->pending_lock, flags); | ||
441 | /* | ||
442 | * Note that as soon as ipath_layer_want_buffer() is called and | ||
443 | * possibly before it returns, ipath_ib_piobufavail() | ||
444 | * could be called. If we are still in the tasklet function, | ||
445 | * tasklet_hi_schedule() will not call us until the next time | ||
446 | * tasklet_hi_schedule() is called. | ||
447 | * We clear the tasklet flag now since we are committing to return | ||
448 | * from the tasklet function. | ||
449 | */ | ||
450 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | ||
451 | tasklet_unlock(&qp->s_task); | ||
452 | ipath_layer_want_buffer(dev->dd); | ||
453 | dev->n_piowait++; | ||
454 | } | ||
455 | |||
456 | /** | ||
457 | * ipath_post_rc_send - post RC and UC sends | ||
458 | * @qp: the QP to post on | ||
459 | * @wr: the work request to send | ||
460 | */ | ||
461 | int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr) | ||
462 | { | ||
463 | struct ipath_swqe *wqe; | ||
464 | unsigned long flags; | ||
465 | u32 next; | ||
466 | int i, j; | ||
467 | int acc; | ||
468 | int ret; | ||
469 | |||
470 | /* | ||
471 | * Don't allow RDMA reads or atomic operations on UC or | ||
472 | * undefined operations. | ||
473 | * Make sure buffer is large enough to hold the result for atomics. | ||
474 | */ | ||
475 | if (qp->ibqp.qp_type == IB_QPT_UC) { | ||
476 | if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) { | ||
477 | ret = -EINVAL; | ||
478 | goto bail; | ||
479 | } | ||
480 | } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { | ||
481 | ret = -EINVAL; | ||
482 | goto bail; | ||
483 | } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && | ||
484 | (wr->num_sge == 0 || | ||
485 | wr->sg_list[0].length < sizeof(u64) || | ||
486 | wr->sg_list[0].addr & (sizeof(u64) - 1))) { | ||
487 | ret = -EINVAL; | ||
488 | goto bail; | ||
489 | } | ||
490 | /* IB spec says that num_sge == 0 is OK. */ | ||
491 | if (wr->num_sge > qp->s_max_sge) { | ||
492 | ret = -ENOMEM; | ||
493 | goto bail; | ||
494 | } | ||
495 | spin_lock_irqsave(&qp->s_lock, flags); | ||
496 | next = qp->s_head + 1; | ||
497 | if (next >= qp->s_size) | ||
498 | next = 0; | ||
499 | if (next == qp->s_last) { | ||
500 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
501 | ret = -EINVAL; | ||
502 | goto bail; | ||
503 | } | ||
504 | |||
505 | wqe = get_swqe_ptr(qp, qp->s_head); | ||
506 | wqe->wr = *wr; | ||
507 | wqe->ssn = qp->s_ssn++; | ||
508 | wqe->sg_list[0].mr = NULL; | ||
509 | wqe->sg_list[0].vaddr = NULL; | ||
510 | wqe->sg_list[0].length = 0; | ||
511 | wqe->sg_list[0].sge_length = 0; | ||
512 | wqe->length = 0; | ||
513 | acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0; | ||
514 | for (i = 0, j = 0; i < wr->num_sge; i++) { | ||
515 | if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) { | ||
516 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
517 | ret = -EINVAL; | ||
518 | goto bail; | ||
519 | } | ||
520 | if (wr->sg_list[i].length == 0) | ||
521 | continue; | ||
522 | if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table, | ||
523 | &wqe->sg_list[j], &wr->sg_list[i], | ||
524 | acc)) { | ||
525 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
526 | ret = -EINVAL; | ||
527 | goto bail; | ||
528 | } | ||
529 | wqe->length += wr->sg_list[i].length; | ||
530 | j++; | ||
531 | } | ||
532 | wqe->wr.num_sge = j; | ||
533 | qp->s_head = next; | ||
534 | /* | ||
535 | * Wake up the send tasklet if the QP is not waiting | ||
536 | * for an RNR timeout. | ||
537 | */ | ||
538 | next = qp->s_rnr_timeout; | ||
539 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
540 | |||
541 | if (next == 0) { | ||
542 | if (qp->ibqp.qp_type == IB_QPT_UC) | ||
543 | ipath_do_uc_send((unsigned long) qp); | ||
544 | else | ||
545 | ipath_do_rc_send((unsigned long) qp); | ||
546 | } | ||
547 | |||
548 | ret = 0; | ||
549 | |||
550 | bail: | ||
551 | return ret; | ||
552 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c new file mode 100644 index 000000000000..01c4c6c56118 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_srq.c | |||
@@ -0,0 +1,273 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/err.h> | ||
34 | #include <linux/vmalloc.h> | ||
35 | |||
36 | #include "ipath_verbs.h" | ||
37 | |||
38 | /** | ||
39 | * ipath_post_srq_receive - post a receive on a shared receive queue | ||
40 | * @ibsrq: the SRQ to post the receive on | ||
41 | * @wr: the list of work requests to post | ||
42 | * @bad_wr: the first WR to cause a problem is put here | ||
43 | * | ||
44 | * This may be called from interrupt context. | ||
45 | */ | ||
46 | int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, | ||
47 | struct ib_recv_wr **bad_wr) | ||
48 | { | ||
49 | struct ipath_srq *srq = to_isrq(ibsrq); | ||
50 | struct ipath_ibdev *dev = to_idev(ibsrq->device); | ||
51 | unsigned long flags; | ||
52 | int ret; | ||
53 | |||
54 | for (; wr; wr = wr->next) { | ||
55 | struct ipath_rwqe *wqe; | ||
56 | u32 next; | ||
57 | int i, j; | ||
58 | |||
59 | if (wr->num_sge > srq->rq.max_sge) { | ||
60 | *bad_wr = wr; | ||
61 | ret = -ENOMEM; | ||
62 | goto bail; | ||
63 | } | ||
64 | |||
65 | spin_lock_irqsave(&srq->rq.lock, flags); | ||
66 | next = srq->rq.head + 1; | ||
67 | if (next >= srq->rq.size) | ||
68 | next = 0; | ||
69 | if (next == srq->rq.tail) { | ||
70 | spin_unlock_irqrestore(&srq->rq.lock, flags); | ||
71 | *bad_wr = wr; | ||
72 | ret = -ENOMEM; | ||
73 | goto bail; | ||
74 | } | ||
75 | |||
76 | wqe = get_rwqe_ptr(&srq->rq, srq->rq.head); | ||
77 | wqe->wr_id = wr->wr_id; | ||
78 | wqe->sg_list[0].mr = NULL; | ||
79 | wqe->sg_list[0].vaddr = NULL; | ||
80 | wqe->sg_list[0].length = 0; | ||
81 | wqe->sg_list[0].sge_length = 0; | ||
82 | wqe->length = 0; | ||
83 | for (i = 0, j = 0; i < wr->num_sge; i++) { | ||
84 | /* Check LKEY */ | ||
85 | if (to_ipd(srq->ibsrq.pd)->user && | ||
86 | wr->sg_list[i].lkey == 0) { | ||
87 | spin_unlock_irqrestore(&srq->rq.lock, | ||
88 | flags); | ||
89 | *bad_wr = wr; | ||
90 | ret = -EINVAL; | ||
91 | goto bail; | ||
92 | } | ||
93 | if (wr->sg_list[i].length == 0) | ||
94 | continue; | ||
95 | if (!ipath_lkey_ok(&dev->lk_table, | ||
96 | &wqe->sg_list[j], | ||
97 | &wr->sg_list[i], | ||
98 | IB_ACCESS_LOCAL_WRITE)) { | ||
99 | spin_unlock_irqrestore(&srq->rq.lock, | ||
100 | flags); | ||
101 | *bad_wr = wr; | ||
102 | ret = -EINVAL; | ||
103 | goto bail; | ||
104 | } | ||
105 | wqe->length += wr->sg_list[i].length; | ||
106 | j++; | ||
107 | } | ||
108 | wqe->num_sge = j; | ||
109 | srq->rq.head = next; | ||
110 | spin_unlock_irqrestore(&srq->rq.lock, flags); | ||
111 | } | ||
112 | ret = 0; | ||
113 | |||
114 | bail: | ||
115 | return ret; | ||
116 | } | ||
117 | |||
118 | /** | ||
119 | * ipath_create_srq - create a shared receive queue | ||
120 | * @ibpd: the protection domain of the SRQ to create | ||
121 | * @attr: the attributes of the SRQ | ||
122 | * @udata: not used by the InfiniPath verbs driver | ||
123 | */ | ||
124 | struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, | ||
125 | struct ib_srq_init_attr *srq_init_attr, | ||
126 | struct ib_udata *udata) | ||
127 | { | ||
128 | struct ipath_srq *srq; | ||
129 | u32 sz; | ||
130 | struct ib_srq *ret; | ||
131 | |||
132 | if (srq_init_attr->attr.max_sge < 1) { | ||
133 | ret = ERR_PTR(-EINVAL); | ||
134 | goto bail; | ||
135 | } | ||
136 | |||
137 | srq = kmalloc(sizeof(*srq), GFP_KERNEL); | ||
138 | if (!srq) { | ||
139 | ret = ERR_PTR(-ENOMEM); | ||
140 | goto bail; | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Need to use vmalloc() if we want to support large #s of entries. | ||
145 | */ | ||
146 | srq->rq.size = srq_init_attr->attr.max_wr + 1; | ||
147 | sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge + | ||
148 | sizeof(struct ipath_rwqe); | ||
149 | srq->rq.wq = vmalloc(srq->rq.size * sz); | ||
150 | if (!srq->rq.wq) { | ||
151 | kfree(srq); | ||
152 | ret = ERR_PTR(-ENOMEM); | ||
153 | goto bail; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * ib_create_srq() will initialize srq->ibsrq. | ||
158 | */ | ||
159 | spin_lock_init(&srq->rq.lock); | ||
160 | srq->rq.head = 0; | ||
161 | srq->rq.tail = 0; | ||
162 | srq->rq.max_sge = srq_init_attr->attr.max_sge; | ||
163 | srq->limit = srq_init_attr->attr.srq_limit; | ||
164 | |||
165 | ret = &srq->ibsrq; | ||
166 | |||
167 | bail: | ||
168 | return ret; | ||
169 | } | ||
170 | |||
171 | /** | ||
172 | * ipath_modify_srq - modify a shared receive queue | ||
173 | * @ibsrq: the SRQ to modify | ||
174 | * @attr: the new attributes of the SRQ | ||
175 | * @attr_mask: indicates which attributes to modify | ||
176 | */ | ||
177 | int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, | ||
178 | enum ib_srq_attr_mask attr_mask) | ||
179 | { | ||
180 | struct ipath_srq *srq = to_isrq(ibsrq); | ||
181 | unsigned long flags; | ||
182 | int ret; | ||
183 | |||
184 | if (attr_mask & IB_SRQ_LIMIT) { | ||
185 | spin_lock_irqsave(&srq->rq.lock, flags); | ||
186 | srq->limit = attr->srq_limit; | ||
187 | spin_unlock_irqrestore(&srq->rq.lock, flags); | ||
188 | } | ||
189 | if (attr_mask & IB_SRQ_MAX_WR) { | ||
190 | u32 size = attr->max_wr + 1; | ||
191 | struct ipath_rwqe *wq, *p; | ||
192 | u32 n; | ||
193 | u32 sz; | ||
194 | |||
195 | if (attr->max_sge < srq->rq.max_sge) { | ||
196 | ret = -EINVAL; | ||
197 | goto bail; | ||
198 | } | ||
199 | |||
200 | sz = sizeof(struct ipath_rwqe) + | ||
201 | attr->max_sge * sizeof(struct ipath_sge); | ||
202 | wq = vmalloc(size * sz); | ||
203 | if (!wq) { | ||
204 | ret = -ENOMEM; | ||
205 | goto bail; | ||
206 | } | ||
207 | |||
208 | spin_lock_irqsave(&srq->rq.lock, flags); | ||
209 | if (srq->rq.head < srq->rq.tail) | ||
210 | n = srq->rq.size + srq->rq.head - srq->rq.tail; | ||
211 | else | ||
212 | n = srq->rq.head - srq->rq.tail; | ||
213 | if (size <= n || size <= srq->limit) { | ||
214 | spin_unlock_irqrestore(&srq->rq.lock, flags); | ||
215 | vfree(wq); | ||
216 | ret = -EINVAL; | ||
217 | goto bail; | ||
218 | } | ||
219 | n = 0; | ||
220 | p = wq; | ||
221 | while (srq->rq.tail != srq->rq.head) { | ||
222 | struct ipath_rwqe *wqe; | ||
223 | int i; | ||
224 | |||
225 | wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail); | ||
226 | p->wr_id = wqe->wr_id; | ||
227 | p->length = wqe->length; | ||
228 | p->num_sge = wqe->num_sge; | ||
229 | for (i = 0; i < wqe->num_sge; i++) | ||
230 | p->sg_list[i] = wqe->sg_list[i]; | ||
231 | n++; | ||
232 | p = (struct ipath_rwqe *)((char *) p + sz); | ||
233 | if (++srq->rq.tail >= srq->rq.size) | ||
234 | srq->rq.tail = 0; | ||
235 | } | ||
236 | vfree(srq->rq.wq); | ||
237 | srq->rq.wq = wq; | ||
238 | srq->rq.size = size; | ||
239 | srq->rq.head = n; | ||
240 | srq->rq.tail = 0; | ||
241 | srq->rq.max_sge = attr->max_sge; | ||
242 | spin_unlock_irqrestore(&srq->rq.lock, flags); | ||
243 | } | ||
244 | |||
245 | ret = 0; | ||
246 | |||
247 | bail: | ||
248 | return ret; | ||
249 | } | ||
250 | |||
251 | int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) | ||
252 | { | ||
253 | struct ipath_srq *srq = to_isrq(ibsrq); | ||
254 | |||
255 | attr->max_wr = srq->rq.size - 1; | ||
256 | attr->max_sge = srq->rq.max_sge; | ||
257 | attr->srq_limit = srq->limit; | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | /** | ||
262 | * ipath_destroy_srq - destroy a shared receive queue | ||
263 | * @ibsrq: the SRQ to destroy | ||
264 | */ | ||
265 | int ipath_destroy_srq(struct ib_srq *ibsrq) | ||
266 | { | ||
267 | struct ipath_srq *srq = to_isrq(ibsrq); | ||
268 | |||
269 | vfree(srq->rq.wq); | ||
270 | kfree(srq); | ||
271 | |||
272 | return 0; | ||
273 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c new file mode 100644 index 000000000000..fe209137ee74 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_stats.c | |||
@@ -0,0 +1,303 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/pci.h> | ||
34 | |||
35 | #include "ipath_kernel.h" | ||
36 | |||
37 | struct infinipath_stats ipath_stats; | ||
38 | |||
39 | /** | ||
40 | * ipath_snap_cntr - snapshot a chip counter | ||
41 | * @dd: the infinipath device | ||
42 | * @creg: the counter to snapshot | ||
43 | * | ||
44 | * called from add_timer and user counter read calls, to deal with | ||
45 | * counters that wrap in "human time". The words sent and received, and | ||
46 | * the packets sent and received are all that we worry about. For now, | ||
47 | * at least, we don't worry about error counters, because if they wrap | ||
48 | * that quickly, we probably don't care. We may eventually just make this | ||
49 | * handle all the counters. word counters can wrap in about 20 seconds | ||
50 | * of full bandwidth traffic, packet counters in a few hours. | ||
51 | */ | ||
52 | |||
53 | u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) | ||
54 | { | ||
55 | u32 val, reg64 = 0; | ||
56 | u64 val64; | ||
57 | unsigned long t0, t1; | ||
58 | u64 ret; | ||
59 | |||
60 | t0 = jiffies; | ||
61 | /* If fast increment counters are only 32 bits, snapshot them, | ||
62 | * and maintain them as 64bit values in the driver */ | ||
63 | if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) && | ||
64 | (creg == dd->ipath_cregs->cr_wordsendcnt || | ||
65 | creg == dd->ipath_cregs->cr_wordrcvcnt || | ||
66 | creg == dd->ipath_cregs->cr_pktsendcnt || | ||
67 | creg == dd->ipath_cregs->cr_pktrcvcnt)) { | ||
68 | val64 = ipath_read_creg(dd, creg); | ||
69 | val = val64 == ~0ULL ? ~0U : 0; | ||
70 | reg64 = 1; | ||
71 | } else /* val64 just to keep gcc quiet... */ | ||
72 | val64 = val = ipath_read_creg32(dd, creg); | ||
73 | /* | ||
74 | * See if a second has passed. This is just a way to detect things | ||
75 | * that are quite broken. Normally this should take just a few | ||
76 | * cycles (the check is for long enough that we don't care if we get | ||
77 | * pre-empted.) An Opteron HT O read timeout is 4 seconds with | ||
78 | * normal NB values | ||
79 | */ | ||
80 | t1 = jiffies; | ||
81 | if (time_before(t0 + HZ, t1) && val == -1) { | ||
82 | ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n", | ||
83 | creg); | ||
84 | ret = 0ULL; | ||
85 | goto bail; | ||
86 | } | ||
87 | if (reg64) { | ||
88 | ret = val64; | ||
89 | goto bail; | ||
90 | } | ||
91 | |||
92 | if (creg == dd->ipath_cregs->cr_wordsendcnt) { | ||
93 | if (val != dd->ipath_lastsword) { | ||
94 | dd->ipath_sword += val - dd->ipath_lastsword; | ||
95 | dd->ipath_lastsword = val; | ||
96 | } | ||
97 | val64 = dd->ipath_sword; | ||
98 | } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { | ||
99 | if (val != dd->ipath_lastrword) { | ||
100 | dd->ipath_rword += val - dd->ipath_lastrword; | ||
101 | dd->ipath_lastrword = val; | ||
102 | } | ||
103 | val64 = dd->ipath_rword; | ||
104 | } else if (creg == dd->ipath_cregs->cr_pktsendcnt) { | ||
105 | if (val != dd->ipath_lastspkts) { | ||
106 | dd->ipath_spkts += val - dd->ipath_lastspkts; | ||
107 | dd->ipath_lastspkts = val; | ||
108 | } | ||
109 | val64 = dd->ipath_spkts; | ||
110 | } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) { | ||
111 | if (val != dd->ipath_lastrpkts) { | ||
112 | dd->ipath_rpkts += val - dd->ipath_lastrpkts; | ||
113 | dd->ipath_lastrpkts = val; | ||
114 | } | ||
115 | val64 = dd->ipath_rpkts; | ||
116 | } else | ||
117 | val64 = (u64) val; | ||
118 | |||
119 | ret = val64; | ||
120 | |||
121 | bail: | ||
122 | return ret; | ||
123 | } | ||
124 | |||
125 | /** | ||
126 | * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports | ||
127 | * @dd: the infinipath device | ||
128 | * | ||
129 | * print the delta of egrfull/hdrqfull errors for kernel ports no more than | ||
130 | * every 5 seconds. User processes are printed at close, but kernel doesn't | ||
131 | * close, so... Separate routine so may call from other places someday, and | ||
132 | * so function name when printed by _IPATH_INFO is meaningfull | ||
133 | */ | ||
134 | static void ipath_qcheck(struct ipath_devdata *dd) | ||
135 | { | ||
136 | static u64 last_tot_hdrqfull; | ||
137 | size_t blen = 0; | ||
138 | char buf[128]; | ||
139 | |||
140 | *buf = 0; | ||
141 | if (dd->ipath_pd[0]->port_hdrqfull != dd->ipath_p0_hdrqfull) { | ||
142 | blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u", | ||
143 | dd->ipath_pd[0]->port_hdrqfull - | ||
144 | dd->ipath_p0_hdrqfull); | ||
145 | dd->ipath_p0_hdrqfull = dd->ipath_pd[0]->port_hdrqfull; | ||
146 | } | ||
147 | if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) { | ||
148 | blen += snprintf(buf + blen, sizeof buf - blen, | ||
149 | "%srcvegrfull %llu", | ||
150 | blen ? ", " : "", | ||
151 | (unsigned long long) | ||
152 | (ipath_stats.sps_etidfull - | ||
153 | dd->ipath_last_tidfull)); | ||
154 | dd->ipath_last_tidfull = ipath_stats.sps_etidfull; | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * this is actually the number of hdrq full interrupts, not actual | ||
159 | * events, but at the moment that's mostly what I'm interested in. | ||
160 | * Actual count, etc. is in the counters, if needed. For production | ||
161 | * users this won't ordinarily be printed. | ||
162 | */ | ||
163 | |||
164 | if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) && | ||
165 | ipath_stats.sps_hdrqfull != last_tot_hdrqfull) { | ||
166 | blen += snprintf(buf + blen, sizeof buf - blen, | ||
167 | "%shdrqfull %llu (all ports)", | ||
168 | blen ? ", " : "", | ||
169 | (unsigned long long) | ||
170 | (ipath_stats.sps_hdrqfull - | ||
171 | last_tot_hdrqfull)); | ||
172 | last_tot_hdrqfull = ipath_stats.sps_hdrqfull; | ||
173 | } | ||
174 | if (blen) | ||
175 | ipath_dbg("%s\n", buf); | ||
176 | |||
177 | if (dd->ipath_port0head != (u32) | ||
178 | le64_to_cpu(*dd->ipath_hdrqtailptr)) { | ||
179 | if (dd->ipath_lastport0rcv_cnt == | ||
180 | ipath_stats.sps_port0pkts) { | ||
181 | ipath_cdbg(PKT, "missing rcv interrupts? " | ||
182 | "port0 hd=%llx tl=%x; port0pkts %llx\n", | ||
183 | (unsigned long long) | ||
184 | le64_to_cpu(*dd->ipath_hdrqtailptr), | ||
185 | dd->ipath_port0head, | ||
186 | (unsigned long long) | ||
187 | ipath_stats.sps_port0pkts); | ||
188 | ipath_kreceive(dd); | ||
189 | } | ||
190 | dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts; | ||
191 | } | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * ipath_get_faststats - get word counters from chip before they overflow | ||
196 | * @opaque - contains a pointer to the infinipath device ipath_devdata | ||
197 | * | ||
198 | * called from add_timer | ||
199 | */ | ||
200 | void ipath_get_faststats(unsigned long opaque) | ||
201 | { | ||
202 | struct ipath_devdata *dd = (struct ipath_devdata *) opaque; | ||
203 | u32 val; | ||
204 | static unsigned cnt; | ||
205 | |||
206 | /* | ||
207 | * don't access the chip while running diags, or memory diags can | ||
208 | * fail | ||
209 | */ | ||
210 | if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) || | ||
211 | ipath_diag_inuse) | ||
212 | /* but re-arm the timer, for diags case; won't hurt other */ | ||
213 | goto done; | ||
214 | |||
215 | if (dd->ipath_flags & IPATH_32BITCOUNTERS) { | ||
216 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); | ||
217 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); | ||
218 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); | ||
219 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); | ||
220 | } | ||
221 | |||
222 | ipath_qcheck(dd); | ||
223 | |||
224 | /* | ||
225 | * deal with repeat error suppression. Doesn't really matter if | ||
226 | * last error was almost a full interval ago, or just a few usecs | ||
227 | * ago; still won't get more than 2 per interval. We may want | ||
228 | * longer intervals for this eventually, could do with mod, counter | ||
229 | * or separate timer. Also see code in ipath_handle_errors() and | ||
230 | * ipath_handle_hwerrors(). | ||
231 | */ | ||
232 | |||
233 | if (dd->ipath_lasterror) | ||
234 | dd->ipath_lasterror = 0; | ||
235 | if (dd->ipath_lasthwerror) | ||
236 | dd->ipath_lasthwerror = 0; | ||
237 | if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) | ||
238 | && time_after(jiffies, dd->ipath_unmasktime)) { | ||
239 | char ebuf[256]; | ||
240 | ipath_decode_err(ebuf, sizeof ebuf, | ||
241 | (dd->ipath_maskederrs & ~dd-> | ||
242 | ipath_ignorederrs)); | ||
243 | if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & | ||
244 | ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) | ||
245 | ipath_dev_err(dd, "Re-enabling masked errors " | ||
246 | "(%s)\n", ebuf); | ||
247 | else { | ||
248 | /* | ||
249 | * rcvegrfull and rcvhdrqfull are "normal", for some | ||
250 | * types of processes (mostly benchmarks) that send | ||
251 | * huge numbers of messages, while not processing | ||
252 | * them. So only complain about these at debug | ||
253 | * level. | ||
254 | */ | ||
255 | ipath_dbg("Disabling frequent queue full errors " | ||
256 | "(%s)\n", ebuf); | ||
257 | } | ||
258 | dd->ipath_maskederrs = dd->ipath_ignorederrs; | ||
259 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, | ||
260 | ~dd->ipath_maskederrs); | ||
261 | } | ||
262 | |||
263 | /* limit qfull messages to ~one per minute per port */ | ||
264 | if ((++cnt & 0x10)) { | ||
265 | for (val = dd->ipath_cfgports - 1; ((int)val) >= 0; | ||
266 | val--) { | ||
267 | if (dd->ipath_lastegrheads[val] != -1) | ||
268 | dd->ipath_lastegrheads[val] = -1; | ||
269 | if (dd->ipath_lastrcvhdrqtails[val] != -1) | ||
270 | dd->ipath_lastrcvhdrqtails[val] = -1; | ||
271 | } | ||
272 | } | ||
273 | |||
274 | if (dd->ipath_nosma_bufs) { | ||
275 | dd->ipath_nosma_secs += 5; | ||
276 | if (dd->ipath_nosma_secs >= 30) { | ||
277 | ipath_cdbg(SMA, "No SMA bufs avail %u seconds; " | ||
278 | "cancelling pending sends\n", | ||
279 | dd->ipath_nosma_secs); | ||
280 | /* | ||
281 | * issue an abort as well, in case we have a packet | ||
282 | * stuck in launch fifo. This could corrupt an | ||
283 | * outgoing user packet in the worst case, | ||
284 | * but this is a pretty catastrophic, anyway. | ||
285 | */ | ||
286 | ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, | ||
287 | INFINIPATH_S_ABORT); | ||
288 | ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf, | ||
289 | dd->ipath_piobcnt2k + | ||
290 | dd->ipath_piobcnt4k - | ||
291 | dd->ipath_lastport_piobuf); | ||
292 | /* start again, if necessary */ | ||
293 | dd->ipath_nosma_secs = 0; | ||
294 | } else | ||
295 | ipath_cdbg(SMA, "No SMA bufs avail %u tries, " | ||
296 | "after %u seconds\n", | ||
297 | dd->ipath_nosma_bufs, | ||
298 | dd->ipath_nosma_secs); | ||
299 | } | ||
300 | |||
301 | done: | ||
302 | mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); | ||
303 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c new file mode 100644 index 000000000000..32acd8048b49 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c | |||
@@ -0,0 +1,778 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/ctype.h> | ||
34 | #include <linux/pci.h> | ||
35 | |||
36 | #include "ipath_kernel.h" | ||
37 | #include "ips_common.h" | ||
38 | #include "ipath_layer.h" | ||
39 | |||
40 | /** | ||
41 | * ipath_parse_ushort - parse an unsigned short value in an arbitrary base | ||
42 | * @str: the string containing the number | ||
43 | * @valp: where to put the result | ||
44 | * | ||
45 | * returns the number of bytes consumed, or negative value on error | ||
46 | */ | ||
47 | int ipath_parse_ushort(const char *str, unsigned short *valp) | ||
48 | { | ||
49 | unsigned long val; | ||
50 | char *end; | ||
51 | int ret; | ||
52 | |||
53 | if (!isdigit(str[0])) { | ||
54 | ret = -EINVAL; | ||
55 | goto bail; | ||
56 | } | ||
57 | |||
58 | val = simple_strtoul(str, &end, 0); | ||
59 | |||
60 | if (val > 0xffff) { | ||
61 | ret = -EINVAL; | ||
62 | goto bail; | ||
63 | } | ||
64 | |||
65 | *valp = val; | ||
66 | |||
67 | ret = end + 1 - str; | ||
68 | if (ret == 0) | ||
69 | ret = -EINVAL; | ||
70 | |||
71 | bail: | ||
72 | return ret; | ||
73 | } | ||
74 | |||
75 | static ssize_t show_version(struct device_driver *dev, char *buf) | ||
76 | { | ||
77 | /* The string printed here is already newline-terminated. */ | ||
78 | return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version); | ||
79 | } | ||
80 | |||
81 | static ssize_t show_num_units(struct device_driver *dev, char *buf) | ||
82 | { | ||
83 | return scnprintf(buf, PAGE_SIZE, "%d\n", | ||
84 | ipath_count_units(NULL, NULL, NULL)); | ||
85 | } | ||
86 | |||
87 | #define DRIVER_STAT(name, attr) \ | ||
88 | static ssize_t show_stat_##name(struct device_driver *dev, \ | ||
89 | char *buf) \ | ||
90 | { \ | ||
91 | return scnprintf( \ | ||
92 | buf, PAGE_SIZE, "%llu\n", \ | ||
93 | (unsigned long long) ipath_stats.sps_ ##attr); \ | ||
94 | } \ | ||
95 | static DRIVER_ATTR(name, S_IRUGO, show_stat_##name, NULL) | ||
96 | |||
97 | DRIVER_STAT(intrs, ints); | ||
98 | DRIVER_STAT(err_intrs, errints); | ||
99 | DRIVER_STAT(errs, errs); | ||
100 | DRIVER_STAT(pkt_errs, pkterrs); | ||
101 | DRIVER_STAT(crc_errs, crcerrs); | ||
102 | DRIVER_STAT(hw_errs, hwerrs); | ||
103 | DRIVER_STAT(ib_link, iblink); | ||
104 | DRIVER_STAT(port0_pkts, port0pkts); | ||
105 | DRIVER_STAT(ether_spkts, ether_spkts); | ||
106 | DRIVER_STAT(ether_rpkts, ether_rpkts); | ||
107 | DRIVER_STAT(sma_spkts, sma_spkts); | ||
108 | DRIVER_STAT(sma_rpkts, sma_rpkts); | ||
109 | DRIVER_STAT(hdrq_full, hdrqfull); | ||
110 | DRIVER_STAT(etid_full, etidfull); | ||
111 | DRIVER_STAT(no_piobufs, nopiobufs); | ||
112 | DRIVER_STAT(ports, ports); | ||
113 | DRIVER_STAT(pkey0, pkeys[0]); | ||
114 | DRIVER_STAT(pkey1, pkeys[1]); | ||
115 | DRIVER_STAT(pkey2, pkeys[2]); | ||
116 | DRIVER_STAT(pkey3, pkeys[3]); | ||
117 | /* XXX fix the following when dynamic table of devices used */ | ||
118 | DRIVER_STAT(lid0, lid[0]); | ||
119 | DRIVER_STAT(lid1, lid[1]); | ||
120 | DRIVER_STAT(lid2, lid[2]); | ||
121 | DRIVER_STAT(lid3, lid[3]); | ||
122 | |||
123 | DRIVER_STAT(nports, nports); | ||
124 | DRIVER_STAT(null_intr, nullintr); | ||
125 | DRIVER_STAT(max_pkts_call, maxpkts_call); | ||
126 | DRIVER_STAT(avg_pkts_call, avgpkts_call); | ||
127 | DRIVER_STAT(page_locks, pagelocks); | ||
128 | DRIVER_STAT(page_unlocks, pageunlocks); | ||
129 | DRIVER_STAT(krdrops, krdrops); | ||
130 | /* XXX fix the following when dynamic table of devices used */ | ||
131 | DRIVER_STAT(mlid0, mlid[0]); | ||
132 | DRIVER_STAT(mlid1, mlid[1]); | ||
133 | DRIVER_STAT(mlid2, mlid[2]); | ||
134 | DRIVER_STAT(mlid3, mlid[3]); | ||
135 | |||
136 | static struct attribute *driver_stat_attributes[] = { | ||
137 | &driver_attr_intrs.attr, | ||
138 | &driver_attr_err_intrs.attr, | ||
139 | &driver_attr_errs.attr, | ||
140 | &driver_attr_pkt_errs.attr, | ||
141 | &driver_attr_crc_errs.attr, | ||
142 | &driver_attr_hw_errs.attr, | ||
143 | &driver_attr_ib_link.attr, | ||
144 | &driver_attr_port0_pkts.attr, | ||
145 | &driver_attr_ether_spkts.attr, | ||
146 | &driver_attr_ether_rpkts.attr, | ||
147 | &driver_attr_sma_spkts.attr, | ||
148 | &driver_attr_sma_rpkts.attr, | ||
149 | &driver_attr_hdrq_full.attr, | ||
150 | &driver_attr_etid_full.attr, | ||
151 | &driver_attr_no_piobufs.attr, | ||
152 | &driver_attr_ports.attr, | ||
153 | &driver_attr_pkey0.attr, | ||
154 | &driver_attr_pkey1.attr, | ||
155 | &driver_attr_pkey2.attr, | ||
156 | &driver_attr_pkey3.attr, | ||
157 | &driver_attr_lid0.attr, | ||
158 | &driver_attr_lid1.attr, | ||
159 | &driver_attr_lid2.attr, | ||
160 | &driver_attr_lid3.attr, | ||
161 | &driver_attr_nports.attr, | ||
162 | &driver_attr_null_intr.attr, | ||
163 | &driver_attr_max_pkts_call.attr, | ||
164 | &driver_attr_avg_pkts_call.attr, | ||
165 | &driver_attr_page_locks.attr, | ||
166 | &driver_attr_page_unlocks.attr, | ||
167 | &driver_attr_krdrops.attr, | ||
168 | &driver_attr_mlid0.attr, | ||
169 | &driver_attr_mlid1.attr, | ||
170 | &driver_attr_mlid2.attr, | ||
171 | &driver_attr_mlid3.attr, | ||
172 | NULL | ||
173 | }; | ||
174 | |||
175 | static struct attribute_group driver_stat_attr_group = { | ||
176 | .name = "stats", | ||
177 | .attrs = driver_stat_attributes | ||
178 | }; | ||
179 | |||
180 | static ssize_t show_status(struct device *dev, | ||
181 | struct device_attribute *attr, | ||
182 | char *buf) | ||
183 | { | ||
184 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
185 | ssize_t ret; | ||
186 | |||
187 | if (!dd->ipath_statusp) { | ||
188 | ret = -EINVAL; | ||
189 | goto bail; | ||
190 | } | ||
191 | |||
192 | ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", | ||
193 | (unsigned long long) *(dd->ipath_statusp)); | ||
194 | |||
195 | bail: | ||
196 | return ret; | ||
197 | } | ||
198 | |||
199 | static const char *ipath_status_str[] = { | ||
200 | "Initted", | ||
201 | "Disabled", | ||
202 | "Admin_Disabled", | ||
203 | "OIB_SMA", | ||
204 | "SMA", | ||
205 | "Present", | ||
206 | "IB_link_up", | ||
207 | "IB_configured", | ||
208 | "NoIBcable", | ||
209 | "Fatal_Hardware_Error", | ||
210 | NULL, | ||
211 | }; | ||
212 | |||
213 | static ssize_t show_status_str(struct device *dev, | ||
214 | struct device_attribute *attr, | ||
215 | char *buf) | ||
216 | { | ||
217 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
218 | int i, any; | ||
219 | u64 s; | ||
220 | ssize_t ret; | ||
221 | |||
222 | if (!dd->ipath_statusp) { | ||
223 | ret = -EINVAL; | ||
224 | goto bail; | ||
225 | } | ||
226 | |||
227 | s = *(dd->ipath_statusp); | ||
228 | *buf = '\0'; | ||
229 | for (any = i = 0; s && ipath_status_str[i]; i++) { | ||
230 | if (s & 1) { | ||
231 | if (any && strlcat(buf, " ", PAGE_SIZE) >= | ||
232 | PAGE_SIZE) | ||
233 | /* overflow */ | ||
234 | break; | ||
235 | if (strlcat(buf, ipath_status_str[i], | ||
236 | PAGE_SIZE) >= PAGE_SIZE) | ||
237 | break; | ||
238 | any = 1; | ||
239 | } | ||
240 | s >>= 1; | ||
241 | } | ||
242 | if (any) | ||
243 | strlcat(buf, "\n", PAGE_SIZE); | ||
244 | |||
245 | ret = strlen(buf); | ||
246 | |||
247 | bail: | ||
248 | return ret; | ||
249 | } | ||
250 | |||
251 | static ssize_t show_boardversion(struct device *dev, | ||
252 | struct device_attribute *attr, | ||
253 | char *buf) | ||
254 | { | ||
255 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
256 | /* The string printed here is already newline-terminated. */ | ||
257 | return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion); | ||
258 | } | ||
259 | |||
260 | static ssize_t show_lid(struct device *dev, | ||
261 | struct device_attribute *attr, | ||
262 | char *buf) | ||
263 | { | ||
264 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
265 | |||
266 | return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid); | ||
267 | } | ||
268 | |||
269 | static ssize_t store_lid(struct device *dev, | ||
270 | struct device_attribute *attr, | ||
271 | const char *buf, | ||
272 | size_t count) | ||
273 | { | ||
274 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
275 | u16 lid; | ||
276 | int ret; | ||
277 | |||
278 | ret = ipath_parse_ushort(buf, &lid); | ||
279 | if (ret < 0) | ||
280 | goto invalid; | ||
281 | |||
282 | if (lid == 0 || lid >= 0xc000) { | ||
283 | ret = -EINVAL; | ||
284 | goto invalid; | ||
285 | } | ||
286 | |||
287 | ipath_set_sps_lid(dd, lid, 0); | ||
288 | |||
289 | goto bail; | ||
290 | invalid: | ||
291 | ipath_dev_err(dd, "attempt to set invalid LID\n"); | ||
292 | bail: | ||
293 | return ret; | ||
294 | } | ||
295 | |||
296 | static ssize_t show_mlid(struct device *dev, | ||
297 | struct device_attribute *attr, | ||
298 | char *buf) | ||
299 | { | ||
300 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
301 | |||
302 | return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid); | ||
303 | } | ||
304 | |||
305 | static ssize_t store_mlid(struct device *dev, | ||
306 | struct device_attribute *attr, | ||
307 | const char *buf, | ||
308 | size_t count) | ||
309 | { | ||
310 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
311 | int unit; | ||
312 | u16 mlid; | ||
313 | int ret; | ||
314 | |||
315 | ret = ipath_parse_ushort(buf, &mlid); | ||
316 | if (ret < 0) | ||
317 | goto invalid; | ||
318 | |||
319 | unit = dd->ipath_unit; | ||
320 | |||
321 | dd->ipath_mlid = mlid; | ||
322 | ipath_stats.sps_mlid[unit] = mlid; | ||
323 | ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST); | ||
324 | |||
325 | goto bail; | ||
326 | invalid: | ||
327 | ipath_dev_err(dd, "attempt to set invalid MLID\n"); | ||
328 | bail: | ||
329 | return ret; | ||
330 | } | ||
331 | |||
332 | static ssize_t show_guid(struct device *dev, | ||
333 | struct device_attribute *attr, | ||
334 | char *buf) | ||
335 | { | ||
336 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
337 | u8 *guid; | ||
338 | |||
339 | guid = (u8 *) & (dd->ipath_guid); | ||
340 | |||
341 | return scnprintf(buf, PAGE_SIZE, | ||
342 | "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", | ||
343 | guid[0], guid[1], guid[2], guid[3], | ||
344 | guid[4], guid[5], guid[6], guid[7]); | ||
345 | } | ||
346 | |||
347 | static ssize_t store_guid(struct device *dev, | ||
348 | struct device_attribute *attr, | ||
349 | const char *buf, | ||
350 | size_t count) | ||
351 | { | ||
352 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
353 | ssize_t ret; | ||
354 | unsigned short guid[8]; | ||
355 | __be64 nguid; | ||
356 | u8 *ng; | ||
357 | int i; | ||
358 | |||
359 | if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx", | ||
360 | &guid[0], &guid[1], &guid[2], &guid[3], | ||
361 | &guid[4], &guid[5], &guid[6], &guid[7]) != 8) | ||
362 | goto invalid; | ||
363 | |||
364 | ng = (u8 *) &nguid; | ||
365 | |||
366 | for (i = 0; i < 8; i++) { | ||
367 | if (guid[i] > 0xff) | ||
368 | goto invalid; | ||
369 | ng[i] = guid[i]; | ||
370 | } | ||
371 | |||
372 | dd->ipath_guid = nguid; | ||
373 | dd->ipath_nguid = 1; | ||
374 | |||
375 | ret = strlen(buf); | ||
376 | goto bail; | ||
377 | |||
378 | invalid: | ||
379 | ipath_dev_err(dd, "attempt to set invalid GUID\n"); | ||
380 | ret = -EINVAL; | ||
381 | |||
382 | bail: | ||
383 | return ret; | ||
384 | } | ||
385 | |||
386 | static ssize_t show_nguid(struct device *dev, | ||
387 | struct device_attribute *attr, | ||
388 | char *buf) | ||
389 | { | ||
390 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
391 | |||
392 | return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid); | ||
393 | } | ||
394 | |||
395 | static ssize_t show_serial(struct device *dev, | ||
396 | struct device_attribute *attr, | ||
397 | char *buf) | ||
398 | { | ||
399 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
400 | |||
401 | buf[sizeof dd->ipath_serial] = '\0'; | ||
402 | memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial); | ||
403 | strcat(buf, "\n"); | ||
404 | return strlen(buf); | ||
405 | } | ||
406 | |||
407 | static ssize_t show_unit(struct device *dev, | ||
408 | struct device_attribute *attr, | ||
409 | char *buf) | ||
410 | { | ||
411 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
412 | |||
413 | return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit); | ||
414 | } | ||
415 | |||
416 | #define DEVICE_COUNTER(name, attr) \ | ||
417 | static ssize_t show_counter_##name(struct device *dev, \ | ||
418 | struct device_attribute *attr, \ | ||
419 | char *buf) \ | ||
420 | { \ | ||
421 | struct ipath_devdata *dd = dev_get_drvdata(dev); \ | ||
422 | return scnprintf(\ | ||
423 | buf, PAGE_SIZE, "%llu\n", (unsigned long long) \ | ||
424 | ipath_snap_cntr( \ | ||
425 | dd, offsetof(struct infinipath_counters, \ | ||
426 | attr) / sizeof(u64))); \ | ||
427 | } \ | ||
428 | static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL); | ||
429 | |||
430 | DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt); | ||
431 | DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt); | ||
432 | DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt); | ||
433 | DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt); | ||
434 | DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt); | ||
435 | DEVICE_COUNTER(lb_ints, LBIntCnt); | ||
436 | DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt); | ||
437 | DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt); | ||
438 | DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt); | ||
439 | DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt); | ||
440 | DEVICE_COUNTER(rx_dwords, RxDwordCnt); | ||
441 | DEVICE_COUNTER(rx_ebps, RxEBPCnt); | ||
442 | DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt); | ||
443 | DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt); | ||
444 | DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt); | ||
445 | DEVICE_COUNTER(rx_len_errs, RxLenErrCnt); | ||
446 | DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt); | ||
447 | DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt); | ||
448 | DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt); | ||
449 | DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt); | ||
450 | DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt); | ||
451 | DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt); | ||
452 | DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt); | ||
453 | DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt); | ||
454 | DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt); | ||
455 | DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt); | ||
456 | DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt); | ||
457 | DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt); | ||
458 | DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt); | ||
459 | DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt); | ||
460 | DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt); | ||
461 | DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt); | ||
462 | DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt); | ||
463 | DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt); | ||
464 | DEVICE_COUNTER(tx_dwords, TxDwordCnt); | ||
465 | DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt); | ||
466 | DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt); | ||
467 | DEVICE_COUNTER(tx_len_errs, TxLenErrCnt); | ||
468 | DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt); | ||
469 | DEVICE_COUNTER(tx_underruns, TxUnderrunCnt); | ||
470 | DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt); | ||
471 | |||
472 | static struct attribute *dev_counter_attributes[] = { | ||
473 | &dev_attr_ib_link_downeds.attr, | ||
474 | &dev_attr_ib_link_err_recoveries.attr, | ||
475 | &dev_attr_ib_status_changes.attr, | ||
476 | &dev_attr_ib_symbol_errs.attr, | ||
477 | &dev_attr_lb_flow_stalls.attr, | ||
478 | &dev_attr_lb_ints.attr, | ||
479 | &dev_attr_rx_bad_formats.attr, | ||
480 | &dev_attr_rx_buf_ovfls.attr, | ||
481 | &dev_attr_rx_data_pkts.attr, | ||
482 | &dev_attr_rx_dropped_pkts.attr, | ||
483 | &dev_attr_rx_dwords.attr, | ||
484 | &dev_attr_rx_ebps.attr, | ||
485 | &dev_attr_rx_flow_ctrl_errs.attr, | ||
486 | &dev_attr_rx_flow_pkts.attr, | ||
487 | &dev_attr_rx_icrc_errs.attr, | ||
488 | &dev_attr_rx_len_errs.attr, | ||
489 | &dev_attr_rx_link_problems.attr, | ||
490 | &dev_attr_rx_lpcrc_errs.attr, | ||
491 | &dev_attr_rx_max_min_len_errs.attr, | ||
492 | &dev_attr_rx_p0_hdr_egr_ovfls.attr, | ||
493 | &dev_attr_rx_p1_hdr_egr_ovfls.attr, | ||
494 | &dev_attr_rx_p2_hdr_egr_ovfls.attr, | ||
495 | &dev_attr_rx_p3_hdr_egr_ovfls.attr, | ||
496 | &dev_attr_rx_p4_hdr_egr_ovfls.attr, | ||
497 | &dev_attr_rx_p5_hdr_egr_ovfls.attr, | ||
498 | &dev_attr_rx_p6_hdr_egr_ovfls.attr, | ||
499 | &dev_attr_rx_p7_hdr_egr_ovfls.attr, | ||
500 | &dev_attr_rx_p8_hdr_egr_ovfls.attr, | ||
501 | &dev_attr_rx_pkey_mismatches.attr, | ||
502 | &dev_attr_rx_tid_full_errs.attr, | ||
503 | &dev_attr_rx_tid_valid_errs.attr, | ||
504 | &dev_attr_rx_vcrc_errs.attr, | ||
505 | &dev_attr_tx_data_pkts.attr, | ||
506 | &dev_attr_tx_dropped_pkts.attr, | ||
507 | &dev_attr_tx_dwords.attr, | ||
508 | &dev_attr_tx_flow_pkts.attr, | ||
509 | &dev_attr_tx_flow_stalls.attr, | ||
510 | &dev_attr_tx_len_errs.attr, | ||
511 | &dev_attr_tx_max_min_len_errs.attr, | ||
512 | &dev_attr_tx_underruns.attr, | ||
513 | &dev_attr_tx_unsup_vl_errs.attr, | ||
514 | NULL | ||
515 | }; | ||
516 | |||
517 | static struct attribute_group dev_counter_attr_group = { | ||
518 | .name = "counters", | ||
519 | .attrs = dev_counter_attributes | ||
520 | }; | ||
521 | |||
522 | static ssize_t store_reset(struct device *dev, | ||
523 | struct device_attribute *attr, | ||
524 | const char *buf, | ||
525 | size_t count) | ||
526 | { | ||
527 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
528 | int ret; | ||
529 | |||
530 | if (count < 5 || memcmp(buf, "reset", 5)) { | ||
531 | ret = -EINVAL; | ||
532 | goto bail; | ||
533 | } | ||
534 | |||
535 | if (dd->ipath_flags & IPATH_DISABLED) { | ||
536 | /* | ||
537 | * post-reset init would re-enable interrupts, etc. | ||
538 | * so don't allow reset on disabled devices. Not | ||
539 | * perfect error, but about the best choice. | ||
540 | */ | ||
541 | dev_info(dev,"Unit %d is disabled, can't reset\n", | ||
542 | dd->ipath_unit); | ||
543 | ret = -EINVAL; | ||
544 | } | ||
545 | ret = ipath_reset_device(dd->ipath_unit); | ||
546 | bail: | ||
547 | return ret<0 ? ret : count; | ||
548 | } | ||
549 | |||
550 | static ssize_t store_link_state(struct device *dev, | ||
551 | struct device_attribute *attr, | ||
552 | const char *buf, | ||
553 | size_t count) | ||
554 | { | ||
555 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
556 | int ret, r; | ||
557 | u16 state; | ||
558 | |||
559 | ret = ipath_parse_ushort(buf, &state); | ||
560 | if (ret < 0) | ||
561 | goto invalid; | ||
562 | |||
563 | r = ipath_layer_set_linkstate(dd, state); | ||
564 | if (r < 0) { | ||
565 | ret = r; | ||
566 | goto bail; | ||
567 | } | ||
568 | |||
569 | goto bail; | ||
570 | invalid: | ||
571 | ipath_dev_err(dd, "attempt to set invalid link state\n"); | ||
572 | bail: | ||
573 | return ret; | ||
574 | } | ||
575 | |||
576 | static ssize_t show_mtu(struct device *dev, | ||
577 | struct device_attribute *attr, | ||
578 | char *buf) | ||
579 | { | ||
580 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
581 | return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu); | ||
582 | } | ||
583 | |||
584 | static ssize_t store_mtu(struct device *dev, | ||
585 | struct device_attribute *attr, | ||
586 | const char *buf, | ||
587 | size_t count) | ||
588 | { | ||
589 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
590 | ssize_t ret; | ||
591 | u16 mtu = 0; | ||
592 | int r; | ||
593 | |||
594 | ret = ipath_parse_ushort(buf, &mtu); | ||
595 | if (ret < 0) | ||
596 | goto invalid; | ||
597 | |||
598 | r = ipath_layer_set_mtu(dd, mtu); | ||
599 | if (r < 0) | ||
600 | ret = r; | ||
601 | |||
602 | goto bail; | ||
603 | invalid: | ||
604 | ipath_dev_err(dd, "attempt to set invalid MTU\n"); | ||
605 | bail: | ||
606 | return ret; | ||
607 | } | ||
608 | |||
609 | static ssize_t show_enabled(struct device *dev, | ||
610 | struct device_attribute *attr, | ||
611 | char *buf) | ||
612 | { | ||
613 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
614 | return scnprintf(buf, PAGE_SIZE, "%u\n", | ||
615 | (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1); | ||
616 | } | ||
617 | |||
618 | static ssize_t store_enabled(struct device *dev, | ||
619 | struct device_attribute *attr, | ||
620 | const char *buf, | ||
621 | size_t count) | ||
622 | { | ||
623 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
624 | ssize_t ret; | ||
625 | u16 enable = 0; | ||
626 | |||
627 | ret = ipath_parse_ushort(buf, &enable); | ||
628 | if (ret < 0) { | ||
629 | ipath_dev_err(dd, "attempt to use non-numeric on enable\n"); | ||
630 | goto bail; | ||
631 | } | ||
632 | |||
633 | if (enable) { | ||
634 | if (!(dd->ipath_flags & IPATH_DISABLED)) | ||
635 | goto bail; | ||
636 | |||
637 | dev_info(dev, "Enabling unit %d\n", dd->ipath_unit); | ||
638 | /* same as post-reset */ | ||
639 | ret = ipath_init_chip(dd, 1); | ||
640 | if (ret) | ||
641 | ipath_dev_err(dd, "Failed to enable unit %d\n", | ||
642 | dd->ipath_unit); | ||
643 | else { | ||
644 | dd->ipath_flags &= ~IPATH_DISABLED; | ||
645 | *dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED; | ||
646 | } | ||
647 | } | ||
648 | else if (!(dd->ipath_flags & IPATH_DISABLED)) { | ||
649 | dev_info(dev, "Disabling unit %d\n", dd->ipath_unit); | ||
650 | ipath_shutdown_device(dd); | ||
651 | dd->ipath_flags |= IPATH_DISABLED; | ||
652 | *dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED; | ||
653 | } | ||
654 | |||
655 | bail: | ||
656 | return ret; | ||
657 | } | ||
658 | |||
659 | static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); | ||
660 | static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); | ||
661 | |||
662 | static struct attribute *driver_attributes[] = { | ||
663 | &driver_attr_num_units.attr, | ||
664 | &driver_attr_version.attr, | ||
665 | NULL | ||
666 | }; | ||
667 | |||
668 | static struct attribute_group driver_attr_group = { | ||
669 | .attrs = driver_attributes | ||
670 | }; | ||
671 | |||
672 | static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid); | ||
673 | static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid); | ||
674 | static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state); | ||
675 | static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid); | ||
676 | static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu); | ||
677 | static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled); | ||
678 | static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL); | ||
679 | static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset); | ||
680 | static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); | ||
681 | static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); | ||
682 | static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL); | ||
683 | static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); | ||
684 | static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); | ||
685 | |||
686 | static struct attribute *dev_attributes[] = { | ||
687 | &dev_attr_guid.attr, | ||
688 | &dev_attr_lid.attr, | ||
689 | &dev_attr_link_state.attr, | ||
690 | &dev_attr_mlid.attr, | ||
691 | &dev_attr_mtu.attr, | ||
692 | &dev_attr_nguid.attr, | ||
693 | &dev_attr_serial.attr, | ||
694 | &dev_attr_status.attr, | ||
695 | &dev_attr_status_str.attr, | ||
696 | &dev_attr_boardversion.attr, | ||
697 | &dev_attr_unit.attr, | ||
698 | &dev_attr_enabled.attr, | ||
699 | NULL | ||
700 | }; | ||
701 | |||
702 | static struct attribute_group dev_attr_group = { | ||
703 | .attrs = dev_attributes | ||
704 | }; | ||
705 | |||
706 | /** | ||
707 | * ipath_expose_reset - create a device reset file | ||
708 | * @dev: the device structure | ||
709 | * | ||
710 | * Only expose a file that lets us reset the device after someone | ||
711 | * enters diag mode. A device reset is quite likely to crash the | ||
712 | * machine entirely, so we don't want to normally make it | ||
713 | * available. | ||
714 | */ | ||
715 | int ipath_expose_reset(struct device *dev) | ||
716 | { | ||
717 | return device_create_file(dev, &dev_attr_reset); | ||
718 | } | ||
719 | |||
720 | int ipath_driver_create_group(struct device_driver *drv) | ||
721 | { | ||
722 | int ret; | ||
723 | |||
724 | ret = sysfs_create_group(&drv->kobj, &driver_attr_group); | ||
725 | if (ret) | ||
726 | goto bail; | ||
727 | |||
728 | ret = sysfs_create_group(&drv->kobj, &driver_stat_attr_group); | ||
729 | if (ret) | ||
730 | sysfs_remove_group(&drv->kobj, &driver_attr_group); | ||
731 | |||
732 | bail: | ||
733 | return ret; | ||
734 | } | ||
735 | |||
736 | void ipath_driver_remove_group(struct device_driver *drv) | ||
737 | { | ||
738 | sysfs_remove_group(&drv->kobj, &driver_stat_attr_group); | ||
739 | sysfs_remove_group(&drv->kobj, &driver_attr_group); | ||
740 | } | ||
741 | |||
742 | int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd) | ||
743 | { | ||
744 | int ret; | ||
745 | char unit[5]; | ||
746 | |||
747 | ret = sysfs_create_group(&dev->kobj, &dev_attr_group); | ||
748 | if (ret) | ||
749 | goto bail; | ||
750 | |||
751 | ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group); | ||
752 | if (ret) | ||
753 | goto bail_attrs; | ||
754 | |||
755 | snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit); | ||
756 | ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, unit); | ||
757 | if (ret == 0) | ||
758 | goto bail; | ||
759 | |||
760 | sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); | ||
761 | bail_attrs: | ||
762 | sysfs_remove_group(&dev->kobj, &dev_attr_group); | ||
763 | bail: | ||
764 | return ret; | ||
765 | } | ||
766 | |||
767 | void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd) | ||
768 | { | ||
769 | char unit[5]; | ||
770 | |||
771 | snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit); | ||
772 | sysfs_remove_link(&dev->driver->kobj, unit); | ||
773 | |||
774 | sysfs_remove_group(&dev->kobj, &dev_counter_attr_group); | ||
775 | sysfs_remove_group(&dev->kobj, &dev_attr_group); | ||
776 | |||
777 | device_remove_file(dev, &dev_attr_reset); | ||
778 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c new file mode 100644 index 000000000000..0d6dbc0a541e --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_uc.c | |||
@@ -0,0 +1,645 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include "ipath_verbs.h" | ||
34 | #include "ips_common.h" | ||
35 | |||
36 | /* cut down ridiculously long IB macro names */ | ||
37 | #define OP(x) IB_OPCODE_UC_##x | ||
38 | |||
39 | static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe, | ||
40 | struct ib_wc *wc) | ||
41 | { | ||
42 | if (++qp->s_last == qp->s_size) | ||
43 | qp->s_last = 0; | ||
44 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || | ||
45 | (wqe->wr.send_flags & IB_SEND_SIGNALED)) { | ||
46 | wc->wr_id = wqe->wr.wr_id; | ||
47 | wc->status = IB_WC_SUCCESS; | ||
48 | wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; | ||
49 | wc->vendor_err = 0; | ||
50 | wc->byte_len = wqe->length; | ||
51 | wc->qp_num = qp->ibqp.qp_num; | ||
52 | wc->src_qp = qp->remote_qpn; | ||
53 | wc->pkey_index = 0; | ||
54 | wc->slid = qp->remote_ah_attr.dlid; | ||
55 | wc->sl = qp->remote_ah_attr.sl; | ||
56 | wc->dlid_path_bits = 0; | ||
57 | wc->port_num = 0; | ||
58 | ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0); | ||
59 | } | ||
60 | wqe = get_swqe_ptr(qp, qp->s_last); | ||
61 | } | ||
62 | |||
63 | /** | ||
64 | * ipath_do_uc_send - do a send on a UC queue | ||
65 | * @data: contains a pointer to the QP to send on | ||
66 | * | ||
67 | * Process entries in the send work queue until the queue is exhausted. | ||
68 | * Only allow one CPU to send a packet per QP (tasklet). | ||
69 | * Otherwise, after we drop the QP lock, two threads could send | ||
70 | * packets out of order. | ||
71 | * This is similar to ipath_do_rc_send() below except we don't have | ||
72 | * timeouts or resends. | ||
73 | */ | ||
74 | void ipath_do_uc_send(unsigned long data) | ||
75 | { | ||
76 | struct ipath_qp *qp = (struct ipath_qp *)data; | ||
77 | struct ipath_ibdev *dev = to_idev(qp->ibqp.device); | ||
78 | struct ipath_swqe *wqe; | ||
79 | unsigned long flags; | ||
80 | u16 lrh0; | ||
81 | u32 hwords; | ||
82 | u32 nwords; | ||
83 | u32 extra_bytes; | ||
84 | u32 bth0; | ||
85 | u32 bth2; | ||
86 | u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); | ||
87 | u32 len; | ||
88 | struct ipath_other_headers *ohdr; | ||
89 | struct ib_wc wc; | ||
90 | |||
91 | if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) | ||
92 | goto bail; | ||
93 | |||
94 | if (unlikely(qp->remote_ah_attr.dlid == | ||
95 | ipath_layer_get_lid(dev->dd))) { | ||
96 | /* Pass in an uninitialized ib_wc to save stack space. */ | ||
97 | ipath_ruc_loopback(qp, &wc); | ||
98 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | ||
99 | goto bail; | ||
100 | } | ||
101 | |||
102 | ohdr = &qp->s_hdr.u.oth; | ||
103 | if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) | ||
104 | ohdr = &qp->s_hdr.u.l.oth; | ||
105 | |||
106 | again: | ||
107 | /* Check for a constructed packet to be sent. */ | ||
108 | if (qp->s_hdrwords != 0) { | ||
109 | /* | ||
110 | * If no PIO bufs are available, return. | ||
111 | * An interrupt will call ipath_ib_piobufavail() | ||
112 | * when one is available. | ||
113 | */ | ||
114 | if (ipath_verbs_send(dev->dd, qp->s_hdrwords, | ||
115 | (u32 *) &qp->s_hdr, | ||
116 | qp->s_cur_size, | ||
117 | qp->s_cur_sge)) { | ||
118 | ipath_no_bufs_available(qp, dev); | ||
119 | goto bail; | ||
120 | } | ||
121 | dev->n_unicast_xmit++; | ||
122 | /* Record that we sent the packet and s_hdr is empty. */ | ||
123 | qp->s_hdrwords = 0; | ||
124 | } | ||
125 | |||
126 | lrh0 = IPS_LRH_BTH; | ||
127 | /* header size in 32-bit words LRH+BTH = (8+12)/4. */ | ||
128 | hwords = 5; | ||
129 | |||
130 | /* | ||
131 | * The lock is needed to synchronize between | ||
132 | * setting qp->s_ack_state and post_send(). | ||
133 | */ | ||
134 | spin_lock_irqsave(&qp->s_lock, flags); | ||
135 | |||
136 | if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) | ||
137 | goto done; | ||
138 | |||
139 | bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); | ||
140 | |||
141 | /* Send a request. */ | ||
142 | wqe = get_swqe_ptr(qp, qp->s_last); | ||
143 | switch (qp->s_state) { | ||
144 | default: | ||
145 | /* | ||
146 | * Signal the completion of the last send (if there is | ||
147 | * one). | ||
148 | */ | ||
149 | if (qp->s_last != qp->s_tail) | ||
150 | complete_last_send(qp, wqe, &wc); | ||
151 | |||
152 | /* Check if send work queue is empty. */ | ||
153 | if (qp->s_tail == qp->s_head) | ||
154 | goto done; | ||
155 | /* | ||
156 | * Start a new request. | ||
157 | */ | ||
158 | qp->s_psn = wqe->psn = qp->s_next_psn; | ||
159 | qp->s_sge.sge = wqe->sg_list[0]; | ||
160 | qp->s_sge.sg_list = wqe->sg_list + 1; | ||
161 | qp->s_sge.num_sge = wqe->wr.num_sge; | ||
162 | qp->s_len = len = wqe->length; | ||
163 | switch (wqe->wr.opcode) { | ||
164 | case IB_WR_SEND: | ||
165 | case IB_WR_SEND_WITH_IMM: | ||
166 | if (len > pmtu) { | ||
167 | qp->s_state = OP(SEND_FIRST); | ||
168 | len = pmtu; | ||
169 | break; | ||
170 | } | ||
171 | if (wqe->wr.opcode == IB_WR_SEND) | ||
172 | qp->s_state = OP(SEND_ONLY); | ||
173 | else { | ||
174 | qp->s_state = | ||
175 | OP(SEND_ONLY_WITH_IMMEDIATE); | ||
176 | /* Immediate data comes after the BTH */ | ||
177 | ohdr->u.imm_data = wqe->wr.imm_data; | ||
178 | hwords += 1; | ||
179 | } | ||
180 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | ||
181 | bth0 |= 1 << 23; | ||
182 | break; | ||
183 | |||
184 | case IB_WR_RDMA_WRITE: | ||
185 | case IB_WR_RDMA_WRITE_WITH_IMM: | ||
186 | ohdr->u.rc.reth.vaddr = | ||
187 | cpu_to_be64(wqe->wr.wr.rdma.remote_addr); | ||
188 | ohdr->u.rc.reth.rkey = | ||
189 | cpu_to_be32(wqe->wr.wr.rdma.rkey); | ||
190 | ohdr->u.rc.reth.length = cpu_to_be32(len); | ||
191 | hwords += sizeof(struct ib_reth) / 4; | ||
192 | if (len > pmtu) { | ||
193 | qp->s_state = OP(RDMA_WRITE_FIRST); | ||
194 | len = pmtu; | ||
195 | break; | ||
196 | } | ||
197 | if (wqe->wr.opcode == IB_WR_RDMA_WRITE) | ||
198 | qp->s_state = OP(RDMA_WRITE_ONLY); | ||
199 | else { | ||
200 | qp->s_state = | ||
201 | OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); | ||
202 | /* Immediate data comes after the RETH */ | ||
203 | ohdr->u.rc.imm_data = wqe->wr.imm_data; | ||
204 | hwords += 1; | ||
205 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | ||
206 | bth0 |= 1 << 23; | ||
207 | } | ||
208 | break; | ||
209 | |||
210 | default: | ||
211 | goto done; | ||
212 | } | ||
213 | if (++qp->s_tail >= qp->s_size) | ||
214 | qp->s_tail = 0; | ||
215 | break; | ||
216 | |||
217 | case OP(SEND_FIRST): | ||
218 | qp->s_state = OP(SEND_MIDDLE); | ||
219 | /* FALLTHROUGH */ | ||
220 | case OP(SEND_MIDDLE): | ||
221 | len = qp->s_len; | ||
222 | if (len > pmtu) { | ||
223 | len = pmtu; | ||
224 | break; | ||
225 | } | ||
226 | if (wqe->wr.opcode == IB_WR_SEND) | ||
227 | qp->s_state = OP(SEND_LAST); | ||
228 | else { | ||
229 | qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); | ||
230 | /* Immediate data comes after the BTH */ | ||
231 | ohdr->u.imm_data = wqe->wr.imm_data; | ||
232 | hwords += 1; | ||
233 | } | ||
234 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | ||
235 | bth0 |= 1 << 23; | ||
236 | break; | ||
237 | |||
238 | case OP(RDMA_WRITE_FIRST): | ||
239 | qp->s_state = OP(RDMA_WRITE_MIDDLE); | ||
240 | /* FALLTHROUGH */ | ||
241 | case OP(RDMA_WRITE_MIDDLE): | ||
242 | len = qp->s_len; | ||
243 | if (len > pmtu) { | ||
244 | len = pmtu; | ||
245 | break; | ||
246 | } | ||
247 | if (wqe->wr.opcode == IB_WR_RDMA_WRITE) | ||
248 | qp->s_state = OP(RDMA_WRITE_LAST); | ||
249 | else { | ||
250 | qp->s_state = | ||
251 | OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); | ||
252 | /* Immediate data comes after the BTH */ | ||
253 | ohdr->u.imm_data = wqe->wr.imm_data; | ||
254 | hwords += 1; | ||
255 | if (wqe->wr.send_flags & IB_SEND_SOLICITED) | ||
256 | bth0 |= 1 << 23; | ||
257 | } | ||
258 | break; | ||
259 | } | ||
260 | bth2 = qp->s_next_psn++ & IPS_PSN_MASK; | ||
261 | qp->s_len -= len; | ||
262 | bth0 |= qp->s_state << 24; | ||
263 | |||
264 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
265 | |||
266 | /* Construct the header. */ | ||
267 | extra_bytes = (4 - len) & 3; | ||
268 | nwords = (len + extra_bytes) >> 2; | ||
269 | if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { | ||
270 | /* Header size in 32-bit words. */ | ||
271 | hwords += 10; | ||
272 | lrh0 = IPS_LRH_GRH; | ||
273 | qp->s_hdr.u.l.grh.version_tclass_flow = | ||
274 | cpu_to_be32((6 << 28) | | ||
275 | (qp->remote_ah_attr.grh.traffic_class | ||
276 | << 20) | | ||
277 | qp->remote_ah_attr.grh.flow_label); | ||
278 | qp->s_hdr.u.l.grh.paylen = | ||
279 | cpu_to_be16(((hwords - 12) + nwords + | ||
280 | SIZE_OF_CRC) << 2); | ||
281 | /* next_hdr is defined by C8-7 in ch. 8.4.1 */ | ||
282 | qp->s_hdr.u.l.grh.next_hdr = 0x1B; | ||
283 | qp->s_hdr.u.l.grh.hop_limit = | ||
284 | qp->remote_ah_attr.grh.hop_limit; | ||
285 | /* The SGID is 32-bit aligned. */ | ||
286 | qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = | ||
287 | dev->gid_prefix; | ||
288 | qp->s_hdr.u.l.grh.sgid.global.interface_id = | ||
289 | ipath_layer_get_guid(dev->dd); | ||
290 | qp->s_hdr.u.l.grh.dgid = qp->remote_ah_attr.grh.dgid; | ||
291 | } | ||
292 | qp->s_hdrwords = hwords; | ||
293 | qp->s_cur_sge = &qp->s_sge; | ||
294 | qp->s_cur_size = len; | ||
295 | lrh0 |= qp->remote_ah_attr.sl << 4; | ||
296 | qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); | ||
297 | /* DEST LID */ | ||
298 | qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); | ||
299 | qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC); | ||
300 | qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); | ||
301 | bth0 |= extra_bytes << 20; | ||
302 | ohdr->bth[0] = cpu_to_be32(bth0); | ||
303 | ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); | ||
304 | ohdr->bth[2] = cpu_to_be32(bth2); | ||
305 | |||
306 | /* Check for more work to do. */ | ||
307 | goto again; | ||
308 | |||
309 | done: | ||
310 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
311 | clear_bit(IPATH_S_BUSY, &qp->s_flags); | ||
312 | |||
313 | bail: | ||
314 | return; | ||
315 | } | ||
316 | |||
317 | /** | ||
318 | * ipath_uc_rcv - handle an incoming UC packet | ||
319 | * @dev: the device the packet came in on | ||
320 | * @hdr: the header of the packet | ||
321 | * @has_grh: true if the packet has a GRH | ||
322 | * @data: the packet data | ||
323 | * @tlen: the length of the packet | ||
324 | * @qp: the QP for this packet. | ||
325 | * | ||
326 | * This is called from ipath_qp_rcv() to process an incoming UC packet | ||
327 | * for the given QP. | ||
328 | * Called at interrupt level. | ||
329 | */ | ||
330 | void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | ||
331 | int has_grh, void *data, u32 tlen, struct ipath_qp *qp) | ||
332 | { | ||
333 | struct ipath_other_headers *ohdr; | ||
334 | int opcode; | ||
335 | u32 hdrsize; | ||
336 | u32 psn; | ||
337 | u32 pad; | ||
338 | unsigned long flags; | ||
339 | struct ib_wc wc; | ||
340 | u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); | ||
341 | struct ib_reth *reth; | ||
342 | int header_in_data; | ||
343 | |||
344 | /* Check for GRH */ | ||
345 | if (!has_grh) { | ||
346 | ohdr = &hdr->u.oth; | ||
347 | hdrsize = 8 + 12; /* LRH + BTH */ | ||
348 | psn = be32_to_cpu(ohdr->bth[2]); | ||
349 | header_in_data = 0; | ||
350 | } else { | ||
351 | ohdr = &hdr->u.l.oth; | ||
352 | hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ | ||
353 | /* | ||
354 | * The header with GRH is 60 bytes and the | ||
355 | * core driver sets the eager header buffer | ||
356 | * size to 56 bytes so the last 4 bytes of | ||
357 | * the BTH header (PSN) is in the data buffer. | ||
358 | */ | ||
359 | header_in_data = | ||
360 | ipath_layer_get_rcvhdrentsize(dev->dd) == 16; | ||
361 | if (header_in_data) { | ||
362 | psn = be32_to_cpu(((__be32 *) data)[0]); | ||
363 | data += sizeof(__be32); | ||
364 | } else | ||
365 | psn = be32_to_cpu(ohdr->bth[2]); | ||
366 | } | ||
367 | /* | ||
368 | * The opcode is in the low byte when its in network order | ||
369 | * (top byte when in host order). | ||
370 | */ | ||
371 | opcode = be32_to_cpu(ohdr->bth[0]) >> 24; | ||
372 | |||
373 | wc.imm_data = 0; | ||
374 | wc.wc_flags = 0; | ||
375 | |||
376 | spin_lock_irqsave(&qp->r_rq.lock, flags); | ||
377 | |||
378 | /* Compare the PSN verses the expected PSN. */ | ||
379 | if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { | ||
380 | /* | ||
381 | * Handle a sequence error. | ||
382 | * Silently drop any current message. | ||
383 | */ | ||
384 | qp->r_psn = psn; | ||
385 | inv: | ||
386 | qp->r_state = OP(SEND_LAST); | ||
387 | switch (opcode) { | ||
388 | case OP(SEND_FIRST): | ||
389 | case OP(SEND_ONLY): | ||
390 | case OP(SEND_ONLY_WITH_IMMEDIATE): | ||
391 | goto send_first; | ||
392 | |||
393 | case OP(RDMA_WRITE_FIRST): | ||
394 | case OP(RDMA_WRITE_ONLY): | ||
395 | case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): | ||
396 | goto rdma_first; | ||
397 | |||
398 | default: | ||
399 | dev->n_pkt_drops++; | ||
400 | goto done; | ||
401 | } | ||
402 | } | ||
403 | |||
404 | /* Check for opcode sequence errors. */ | ||
405 | switch (qp->r_state) { | ||
406 | case OP(SEND_FIRST): | ||
407 | case OP(SEND_MIDDLE): | ||
408 | if (opcode == OP(SEND_MIDDLE) || | ||
409 | opcode == OP(SEND_LAST) || | ||
410 | opcode == OP(SEND_LAST_WITH_IMMEDIATE)) | ||
411 | break; | ||
412 | goto inv; | ||
413 | |||
414 | case OP(RDMA_WRITE_FIRST): | ||
415 | case OP(RDMA_WRITE_MIDDLE): | ||
416 | if (opcode == OP(RDMA_WRITE_MIDDLE) || | ||
417 | opcode == OP(RDMA_WRITE_LAST) || | ||
418 | opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) | ||
419 | break; | ||
420 | goto inv; | ||
421 | |||
422 | default: | ||
423 | if (opcode == OP(SEND_FIRST) || | ||
424 | opcode == OP(SEND_ONLY) || | ||
425 | opcode == OP(SEND_ONLY_WITH_IMMEDIATE) || | ||
426 | opcode == OP(RDMA_WRITE_FIRST) || | ||
427 | opcode == OP(RDMA_WRITE_ONLY) || | ||
428 | opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) | ||
429 | break; | ||
430 | goto inv; | ||
431 | } | ||
432 | |||
433 | /* OK, process the packet. */ | ||
434 | switch (opcode) { | ||
435 | case OP(SEND_FIRST): | ||
436 | case OP(SEND_ONLY): | ||
437 | case OP(SEND_ONLY_WITH_IMMEDIATE): | ||
438 | send_first: | ||
439 | if (qp->r_reuse_sge) { | ||
440 | qp->r_reuse_sge = 0; | ||
441 | qp->r_sge = qp->s_rdma_sge; | ||
442 | } else if (!ipath_get_rwqe(qp, 0)) { | ||
443 | dev->n_pkt_drops++; | ||
444 | goto done; | ||
445 | } | ||
446 | /* Save the WQE so we can reuse it in case of an error. */ | ||
447 | qp->s_rdma_sge = qp->r_sge; | ||
448 | qp->r_rcv_len = 0; | ||
449 | if (opcode == OP(SEND_ONLY)) | ||
450 | goto send_last; | ||
451 | else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) | ||
452 | goto send_last_imm; | ||
453 | /* FALLTHROUGH */ | ||
454 | case OP(SEND_MIDDLE): | ||
455 | /* Check for invalid length PMTU or posted rwqe len. */ | ||
456 | if (unlikely(tlen != (hdrsize + pmtu + 4))) { | ||
457 | qp->r_reuse_sge = 1; | ||
458 | dev->n_pkt_drops++; | ||
459 | goto done; | ||
460 | } | ||
461 | qp->r_rcv_len += pmtu; | ||
462 | if (unlikely(qp->r_rcv_len > qp->r_len)) { | ||
463 | qp->r_reuse_sge = 1; | ||
464 | dev->n_pkt_drops++; | ||
465 | goto done; | ||
466 | } | ||
467 | ipath_copy_sge(&qp->r_sge, data, pmtu); | ||
468 | break; | ||
469 | |||
470 | case OP(SEND_LAST_WITH_IMMEDIATE): | ||
471 | send_last_imm: | ||
472 | if (header_in_data) { | ||
473 | wc.imm_data = *(__be32 *) data; | ||
474 | data += sizeof(__be32); | ||
475 | } else { | ||
476 | /* Immediate data comes after BTH */ | ||
477 | wc.imm_data = ohdr->u.imm_data; | ||
478 | } | ||
479 | hdrsize += 4; | ||
480 | wc.wc_flags = IB_WC_WITH_IMM; | ||
481 | /* FALLTHROUGH */ | ||
482 | case OP(SEND_LAST): | ||
483 | send_last: | ||
484 | /* Get the number of bytes the message was padded by. */ | ||
485 | pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; | ||
486 | /* Check for invalid length. */ | ||
487 | /* XXX LAST len should be >= 1 */ | ||
488 | if (unlikely(tlen < (hdrsize + pad + 4))) { | ||
489 | qp->r_reuse_sge = 1; | ||
490 | dev->n_pkt_drops++; | ||
491 | goto done; | ||
492 | } | ||
493 | /* Don't count the CRC. */ | ||
494 | tlen -= (hdrsize + pad + 4); | ||
495 | wc.byte_len = tlen + qp->r_rcv_len; | ||
496 | if (unlikely(wc.byte_len > qp->r_len)) { | ||
497 | qp->r_reuse_sge = 1; | ||
498 | dev->n_pkt_drops++; | ||
499 | goto done; | ||
500 | } | ||
501 | /* XXX Need to free SGEs */ | ||
502 | last_imm: | ||
503 | ipath_copy_sge(&qp->r_sge, data, tlen); | ||
504 | wc.wr_id = qp->r_wr_id; | ||
505 | wc.status = IB_WC_SUCCESS; | ||
506 | wc.opcode = IB_WC_RECV; | ||
507 | wc.vendor_err = 0; | ||
508 | wc.qp_num = qp->ibqp.qp_num; | ||
509 | wc.src_qp = qp->remote_qpn; | ||
510 | wc.pkey_index = 0; | ||
511 | wc.slid = qp->remote_ah_attr.dlid; | ||
512 | wc.sl = qp->remote_ah_attr.sl; | ||
513 | wc.dlid_path_bits = 0; | ||
514 | wc.port_num = 0; | ||
515 | /* Signal completion event if the solicited bit is set. */ | ||
516 | ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, | ||
517 | (ohdr->bth[0] & | ||
518 | __constant_cpu_to_be32(1 << 23)) != 0); | ||
519 | break; | ||
520 | |||
521 | case OP(RDMA_WRITE_FIRST): | ||
522 | case OP(RDMA_WRITE_ONLY): | ||
523 | case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ | ||
524 | rdma_first: | ||
525 | /* RETH comes after BTH */ | ||
526 | if (!header_in_data) | ||
527 | reth = &ohdr->u.rc.reth; | ||
528 | else { | ||
529 | reth = (struct ib_reth *)data; | ||
530 | data += sizeof(*reth); | ||
531 | } | ||
532 | hdrsize += sizeof(*reth); | ||
533 | qp->r_len = be32_to_cpu(reth->length); | ||
534 | qp->r_rcv_len = 0; | ||
535 | if (qp->r_len != 0) { | ||
536 | u32 rkey = be32_to_cpu(reth->rkey); | ||
537 | u64 vaddr = be64_to_cpu(reth->vaddr); | ||
538 | |||
539 | /* Check rkey */ | ||
540 | if (unlikely(!ipath_rkey_ok( | ||
541 | dev, &qp->r_sge, qp->r_len, | ||
542 | vaddr, rkey, | ||
543 | IB_ACCESS_REMOTE_WRITE))) { | ||
544 | dev->n_pkt_drops++; | ||
545 | goto done; | ||
546 | } | ||
547 | } else { | ||
548 | qp->r_sge.sg_list = NULL; | ||
549 | qp->r_sge.sge.mr = NULL; | ||
550 | qp->r_sge.sge.vaddr = NULL; | ||
551 | qp->r_sge.sge.length = 0; | ||
552 | qp->r_sge.sge.sge_length = 0; | ||
553 | } | ||
554 | if (unlikely(!(qp->qp_access_flags & | ||
555 | IB_ACCESS_REMOTE_WRITE))) { | ||
556 | dev->n_pkt_drops++; | ||
557 | goto done; | ||
558 | } | ||
559 | if (opcode == OP(RDMA_WRITE_ONLY)) | ||
560 | goto rdma_last; | ||
561 | else if (opcode == | ||
562 | OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) | ||
563 | goto rdma_last_imm; | ||
564 | /* FALLTHROUGH */ | ||
565 | case OP(RDMA_WRITE_MIDDLE): | ||
566 | /* Check for invalid length PMTU or posted rwqe len. */ | ||
567 | if (unlikely(tlen != (hdrsize + pmtu + 4))) { | ||
568 | dev->n_pkt_drops++; | ||
569 | goto done; | ||
570 | } | ||
571 | qp->r_rcv_len += pmtu; | ||
572 | if (unlikely(qp->r_rcv_len > qp->r_len)) { | ||
573 | dev->n_pkt_drops++; | ||
574 | goto done; | ||
575 | } | ||
576 | ipath_copy_sge(&qp->r_sge, data, pmtu); | ||
577 | break; | ||
578 | |||
579 | case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): | ||
580 | rdma_last_imm: | ||
581 | /* Get the number of bytes the message was padded by. */ | ||
582 | pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; | ||
583 | /* Check for invalid length. */ | ||
584 | /* XXX LAST len should be >= 1 */ | ||
585 | if (unlikely(tlen < (hdrsize + pad + 4))) { | ||
586 | dev->n_pkt_drops++; | ||
587 | goto done; | ||
588 | } | ||
589 | /* Don't count the CRC. */ | ||
590 | tlen -= (hdrsize + pad + 4); | ||
591 | if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) { | ||
592 | dev->n_pkt_drops++; | ||
593 | goto done; | ||
594 | } | ||
595 | if (qp->r_reuse_sge) { | ||
596 | qp->r_reuse_sge = 0; | ||
597 | } else if (!ipath_get_rwqe(qp, 1)) { | ||
598 | dev->n_pkt_drops++; | ||
599 | goto done; | ||
600 | } | ||
601 | if (header_in_data) { | ||
602 | wc.imm_data = *(__be32 *) data; | ||
603 | data += sizeof(__be32); | ||
604 | } else { | ||
605 | /* Immediate data comes after BTH */ | ||
606 | wc.imm_data = ohdr->u.imm_data; | ||
607 | } | ||
608 | hdrsize += 4; | ||
609 | wc.wc_flags = IB_WC_WITH_IMM; | ||
610 | wc.byte_len = 0; | ||
611 | goto last_imm; | ||
612 | |||
613 | case OP(RDMA_WRITE_LAST): | ||
614 | rdma_last: | ||
615 | /* Get the number of bytes the message was padded by. */ | ||
616 | pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; | ||
617 | /* Check for invalid length. */ | ||
618 | /* XXX LAST len should be >= 1 */ | ||
619 | if (unlikely(tlen < (hdrsize + pad + 4))) { | ||
620 | dev->n_pkt_drops++; | ||
621 | goto done; | ||
622 | } | ||
623 | /* Don't count the CRC. */ | ||
624 | tlen -= (hdrsize + pad + 4); | ||
625 | if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) { | ||
626 | dev->n_pkt_drops++; | ||
627 | goto done; | ||
628 | } | ||
629 | ipath_copy_sge(&qp->r_sge, data, tlen); | ||
630 | break; | ||
631 | |||
632 | default: | ||
633 | /* Drop packet for unknown opcodes. */ | ||
634 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
635 | dev->n_pkt_drops++; | ||
636 | goto bail; | ||
637 | } | ||
638 | qp->r_psn++; | ||
639 | qp->r_state = opcode; | ||
640 | done: | ||
641 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
642 | |||
643 | bail: | ||
644 | return; | ||
645 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c new file mode 100644 index 000000000000..01cfb30ee160 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_ud.c | |||
@@ -0,0 +1,621 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <rdma/ib_smi.h> | ||
34 | |||
35 | #include "ipath_verbs.h" | ||
36 | #include "ips_common.h" | ||
37 | |||
38 | /** | ||
39 | * ipath_ud_loopback - handle send on loopback QPs | ||
40 | * @sqp: the QP | ||
41 | * @ss: the SGE state | ||
42 | * @length: the length of the data to send | ||
43 | * @wr: the work request | ||
44 | * @wc: the work completion entry | ||
45 | * | ||
46 | * This is called from ipath_post_ud_send() to forward a WQE addressed | ||
47 | * to the same HCA. | ||
48 | */ | ||
49 | static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_sge_state *ss, | ||
50 | u32 length, struct ib_send_wr *wr, struct ib_wc *wc) | ||
51 | { | ||
52 | struct ipath_ibdev *dev = to_idev(sqp->ibqp.device); | ||
53 | struct ipath_qp *qp; | ||
54 | struct ib_ah_attr *ah_attr; | ||
55 | unsigned long flags; | ||
56 | struct ipath_rq *rq; | ||
57 | struct ipath_srq *srq; | ||
58 | struct ipath_sge_state rsge; | ||
59 | struct ipath_sge *sge; | ||
60 | struct ipath_rwqe *wqe; | ||
61 | |||
62 | qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn); | ||
63 | if (!qp) | ||
64 | return; | ||
65 | |||
66 | /* | ||
67 | * Check that the qkey matches (except for QP0, see 9.6.1.4.1). | ||
68 | * Qkeys with the high order bit set mean use the | ||
69 | * qkey from the QP context instead of the WR (see 10.2.5). | ||
70 | */ | ||
71 | if (unlikely(qp->ibqp.qp_num && | ||
72 | ((int) wr->wr.ud.remote_qkey < 0 | ||
73 | ? qp->qkey : wr->wr.ud.remote_qkey) != qp->qkey)) { | ||
74 | /* XXX OK to lose a count once in a while. */ | ||
75 | dev->qkey_violations++; | ||
76 | dev->n_pkt_drops++; | ||
77 | goto done; | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * A GRH is expected to preceed the data even if not | ||
82 | * present on the wire. | ||
83 | */ | ||
84 | wc->byte_len = length + sizeof(struct ib_grh); | ||
85 | |||
86 | if (wr->opcode == IB_WR_SEND_WITH_IMM) { | ||
87 | wc->wc_flags = IB_WC_WITH_IMM; | ||
88 | wc->imm_data = wr->imm_data; | ||
89 | } else { | ||
90 | wc->wc_flags = 0; | ||
91 | wc->imm_data = 0; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Get the next work request entry to find where to put the data. | ||
96 | * Note that it is safe to drop the lock after changing rq->tail | ||
97 | * since ipath_post_receive() won't fill the empty slot. | ||
98 | */ | ||
99 | if (qp->ibqp.srq) { | ||
100 | srq = to_isrq(qp->ibqp.srq); | ||
101 | rq = &srq->rq; | ||
102 | } else { | ||
103 | srq = NULL; | ||
104 | rq = &qp->r_rq; | ||
105 | } | ||
106 | spin_lock_irqsave(&rq->lock, flags); | ||
107 | if (rq->tail == rq->head) { | ||
108 | spin_unlock_irqrestore(&rq->lock, flags); | ||
109 | dev->n_pkt_drops++; | ||
110 | goto done; | ||
111 | } | ||
112 | /* Silently drop packets which are too big. */ | ||
113 | wqe = get_rwqe_ptr(rq, rq->tail); | ||
114 | if (wc->byte_len > wqe->length) { | ||
115 | spin_unlock_irqrestore(&rq->lock, flags); | ||
116 | dev->n_pkt_drops++; | ||
117 | goto done; | ||
118 | } | ||
119 | wc->wr_id = wqe->wr_id; | ||
120 | rsge.sge = wqe->sg_list[0]; | ||
121 | rsge.sg_list = wqe->sg_list + 1; | ||
122 | rsge.num_sge = wqe->num_sge; | ||
123 | if (++rq->tail >= rq->size) | ||
124 | rq->tail = 0; | ||
125 | if (srq && srq->ibsrq.event_handler) { | ||
126 | u32 n; | ||
127 | |||
128 | if (rq->head < rq->tail) | ||
129 | n = rq->size + rq->head - rq->tail; | ||
130 | else | ||
131 | n = rq->head - rq->tail; | ||
132 | if (n < srq->limit) { | ||
133 | struct ib_event ev; | ||
134 | |||
135 | srq->limit = 0; | ||
136 | spin_unlock_irqrestore(&rq->lock, flags); | ||
137 | ev.device = qp->ibqp.device; | ||
138 | ev.element.srq = qp->ibqp.srq; | ||
139 | ev.event = IB_EVENT_SRQ_LIMIT_REACHED; | ||
140 | srq->ibsrq.event_handler(&ev, | ||
141 | srq->ibsrq.srq_context); | ||
142 | } else | ||
143 | spin_unlock_irqrestore(&rq->lock, flags); | ||
144 | } else | ||
145 | spin_unlock_irqrestore(&rq->lock, flags); | ||
146 | ah_attr = &to_iah(wr->wr.ud.ah)->attr; | ||
147 | if (ah_attr->ah_flags & IB_AH_GRH) { | ||
148 | ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); | ||
149 | wc->wc_flags |= IB_WC_GRH; | ||
150 | } else | ||
151 | ipath_skip_sge(&rsge, sizeof(struct ib_grh)); | ||
152 | sge = &ss->sge; | ||
153 | while (length) { | ||
154 | u32 len = sge->length; | ||
155 | |||
156 | if (len > length) | ||
157 | len = length; | ||
158 | BUG_ON(len == 0); | ||
159 | ipath_copy_sge(&rsge, sge->vaddr, len); | ||
160 | sge->vaddr += len; | ||
161 | sge->length -= len; | ||
162 | sge->sge_length -= len; | ||
163 | if (sge->sge_length == 0) { | ||
164 | if (--ss->num_sge) | ||
165 | *sge = *ss->sg_list++; | ||
166 | } else if (sge->length == 0 && sge->mr != NULL) { | ||
167 | if (++sge->n >= IPATH_SEGSZ) { | ||
168 | if (++sge->m >= sge->mr->mapsz) | ||
169 | break; | ||
170 | sge->n = 0; | ||
171 | } | ||
172 | sge->vaddr = | ||
173 | sge->mr->map[sge->m]->segs[sge->n].vaddr; | ||
174 | sge->length = | ||
175 | sge->mr->map[sge->m]->segs[sge->n].length; | ||
176 | } | ||
177 | length -= len; | ||
178 | } | ||
179 | wc->status = IB_WC_SUCCESS; | ||
180 | wc->opcode = IB_WC_RECV; | ||
181 | wc->vendor_err = 0; | ||
182 | wc->qp_num = qp->ibqp.qp_num; | ||
183 | wc->src_qp = sqp->ibqp.qp_num; | ||
184 | /* XXX do we know which pkey matched? Only needed for GSI. */ | ||
185 | wc->pkey_index = 0; | ||
186 | wc->slid = ipath_layer_get_lid(dev->dd) | | ||
187 | (ah_attr->src_path_bits & | ||
188 | ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1)); | ||
189 | wc->sl = ah_attr->sl; | ||
190 | wc->dlid_path_bits = | ||
191 | ah_attr->dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); | ||
192 | /* Signal completion event if the solicited bit is set. */ | ||
193 | ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc, | ||
194 | wr->send_flags & IB_SEND_SOLICITED); | ||
195 | |||
196 | done: | ||
197 | if (atomic_dec_and_test(&qp->refcount)) | ||
198 | wake_up(&qp->wait); | ||
199 | } | ||
200 | |||
201 | /** | ||
202 | * ipath_post_ud_send - post a UD send on QP | ||
203 | * @qp: the QP | ||
204 | * @wr: the work request | ||
205 | * | ||
206 | * Note that we actually send the data as it is posted instead of putting | ||
207 | * the request into a ring buffer. If we wanted to use a ring buffer, | ||
208 | * we would need to save a reference to the destination address in the SWQE. | ||
209 | */ | ||
210 | int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr) | ||
211 | { | ||
212 | struct ipath_ibdev *dev = to_idev(qp->ibqp.device); | ||
213 | struct ipath_other_headers *ohdr; | ||
214 | struct ib_ah_attr *ah_attr; | ||
215 | struct ipath_sge_state ss; | ||
216 | struct ipath_sge *sg_list; | ||
217 | struct ib_wc wc; | ||
218 | u32 hwords; | ||
219 | u32 nwords; | ||
220 | u32 len; | ||
221 | u32 extra_bytes; | ||
222 | u32 bth0; | ||
223 | u16 lrh0; | ||
224 | u16 lid; | ||
225 | int i; | ||
226 | int ret; | ||
227 | |||
228 | if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { | ||
229 | ret = 0; | ||
230 | goto bail; | ||
231 | } | ||
232 | |||
233 | /* IB spec says that num_sge == 0 is OK. */ | ||
234 | if (wr->num_sge > qp->s_max_sge) { | ||
235 | ret = -EINVAL; | ||
236 | goto bail; | ||
237 | } | ||
238 | |||
239 | if (wr->num_sge > 1) { | ||
240 | sg_list = kmalloc((qp->s_max_sge - 1) * sizeof(*sg_list), | ||
241 | GFP_ATOMIC); | ||
242 | if (!sg_list) { | ||
243 | ret = -ENOMEM; | ||
244 | goto bail; | ||
245 | } | ||
246 | } else | ||
247 | sg_list = NULL; | ||
248 | |||
249 | /* Check the buffer to send. */ | ||
250 | ss.sg_list = sg_list; | ||
251 | ss.sge.mr = NULL; | ||
252 | ss.sge.vaddr = NULL; | ||
253 | ss.sge.length = 0; | ||
254 | ss.sge.sge_length = 0; | ||
255 | ss.num_sge = 0; | ||
256 | len = 0; | ||
257 | for (i = 0; i < wr->num_sge; i++) { | ||
258 | /* Check LKEY */ | ||
259 | if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) { | ||
260 | ret = -EINVAL; | ||
261 | goto bail; | ||
262 | } | ||
263 | |||
264 | if (wr->sg_list[i].length == 0) | ||
265 | continue; | ||
266 | if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ? | ||
267 | sg_list + ss.num_sge - 1 : &ss.sge, | ||
268 | &wr->sg_list[i], 0)) { | ||
269 | ret = -EINVAL; | ||
270 | goto bail; | ||
271 | } | ||
272 | len += wr->sg_list[i].length; | ||
273 | ss.num_sge++; | ||
274 | } | ||
275 | extra_bytes = (4 - len) & 3; | ||
276 | nwords = (len + extra_bytes) >> 2; | ||
277 | |||
278 | /* Construct the header. */ | ||
279 | ah_attr = &to_iah(wr->wr.ud.ah)->attr; | ||
280 | if (ah_attr->dlid == 0) { | ||
281 | ret = -EINVAL; | ||
282 | goto bail; | ||
283 | } | ||
284 | if (ah_attr->dlid >= IPS_MULTICAST_LID_BASE) { | ||
285 | if (ah_attr->dlid != IPS_PERMISSIVE_LID) | ||
286 | dev->n_multicast_xmit++; | ||
287 | else | ||
288 | dev->n_unicast_xmit++; | ||
289 | } else { | ||
290 | dev->n_unicast_xmit++; | ||
291 | lid = ah_attr->dlid & | ||
292 | ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); | ||
293 | if (unlikely(lid == ipath_layer_get_lid(dev->dd))) { | ||
294 | /* | ||
295 | * Pass in an uninitialized ib_wc to save stack | ||
296 | * space. | ||
297 | */ | ||
298 | ipath_ud_loopback(qp, &ss, len, wr, &wc); | ||
299 | goto done; | ||
300 | } | ||
301 | } | ||
302 | if (ah_attr->ah_flags & IB_AH_GRH) { | ||
303 | /* Header size in 32-bit words. */ | ||
304 | hwords = 17; | ||
305 | lrh0 = IPS_LRH_GRH; | ||
306 | ohdr = &qp->s_hdr.u.l.oth; | ||
307 | qp->s_hdr.u.l.grh.version_tclass_flow = | ||
308 | cpu_to_be32((6 << 28) | | ||
309 | (ah_attr->grh.traffic_class << 20) | | ||
310 | ah_attr->grh.flow_label); | ||
311 | qp->s_hdr.u.l.grh.paylen = | ||
312 | cpu_to_be16(((wr->opcode == | ||
313 | IB_WR_SEND_WITH_IMM ? 6 : 5) + | ||
314 | nwords + SIZE_OF_CRC) << 2); | ||
315 | /* next_hdr is defined by C8-7 in ch. 8.4.1 */ | ||
316 | qp->s_hdr.u.l.grh.next_hdr = 0x1B; | ||
317 | qp->s_hdr.u.l.grh.hop_limit = ah_attr->grh.hop_limit; | ||
318 | /* The SGID is 32-bit aligned. */ | ||
319 | qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = | ||
320 | dev->gid_prefix; | ||
321 | qp->s_hdr.u.l.grh.sgid.global.interface_id = | ||
322 | ipath_layer_get_guid(dev->dd); | ||
323 | qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid; | ||
324 | /* | ||
325 | * Don't worry about sending to locally attached multicast | ||
326 | * QPs. It is unspecified by the spec. what happens. | ||
327 | */ | ||
328 | } else { | ||
329 | /* Header size in 32-bit words. */ | ||
330 | hwords = 7; | ||
331 | lrh0 = IPS_LRH_BTH; | ||
332 | ohdr = &qp->s_hdr.u.oth; | ||
333 | } | ||
334 | if (wr->opcode == IB_WR_SEND_WITH_IMM) { | ||
335 | ohdr->u.ud.imm_data = wr->imm_data; | ||
336 | wc.imm_data = wr->imm_data; | ||
337 | hwords += 1; | ||
338 | bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; | ||
339 | } else if (wr->opcode == IB_WR_SEND) { | ||
340 | wc.imm_data = 0; | ||
341 | bth0 = IB_OPCODE_UD_SEND_ONLY << 24; | ||
342 | } else { | ||
343 | ret = -EINVAL; | ||
344 | goto bail; | ||
345 | } | ||
346 | lrh0 |= ah_attr->sl << 4; | ||
347 | if (qp->ibqp.qp_type == IB_QPT_SMI) | ||
348 | lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ | ||
349 | qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); | ||
350 | qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ | ||
351 | qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC); | ||
352 | lid = ipath_layer_get_lid(dev->dd); | ||
353 | if (lid) { | ||
354 | lid |= ah_attr->src_path_bits & | ||
355 | ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); | ||
356 | qp->s_hdr.lrh[3] = cpu_to_be16(lid); | ||
357 | } else | ||
358 | qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; | ||
359 | if (wr->send_flags & IB_SEND_SOLICITED) | ||
360 | bth0 |= 1 << 23; | ||
361 | bth0 |= extra_bytes << 20; | ||
362 | bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPS_DEFAULT_P_KEY : | ||
363 | ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); | ||
364 | ohdr->bth[0] = cpu_to_be32(bth0); | ||
365 | /* | ||
366 | * Use the multicast QP if the destination LID is a multicast LID. | ||
367 | */ | ||
368 | ohdr->bth[1] = ah_attr->dlid >= IPS_MULTICAST_LID_BASE && | ||
369 | ah_attr->dlid != IPS_PERMISSIVE_LID ? | ||
370 | __constant_cpu_to_be32(IPS_MULTICAST_QPN) : | ||
371 | cpu_to_be32(wr->wr.ud.remote_qpn); | ||
372 | /* XXX Could lose a PSN count but not worth locking */ | ||
373 | ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPS_PSN_MASK); | ||
374 | /* | ||
375 | * Qkeys with the high order bit set mean use the | ||
376 | * qkey from the QP context instead of the WR (see 10.2.5). | ||
377 | */ | ||
378 | ohdr->u.ud.deth[0] = cpu_to_be32((int)wr->wr.ud.remote_qkey < 0 ? | ||
379 | qp->qkey : wr->wr.ud.remote_qkey); | ||
380 | ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); | ||
381 | if (ipath_verbs_send(dev->dd, hwords, (u32 *) &qp->s_hdr, | ||
382 | len, &ss)) | ||
383 | dev->n_no_piobuf++; | ||
384 | |||
385 | done: | ||
386 | /* Queue the completion status entry. */ | ||
387 | if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) || | ||
388 | (wr->send_flags & IB_SEND_SIGNALED)) { | ||
389 | wc.wr_id = wr->wr_id; | ||
390 | wc.status = IB_WC_SUCCESS; | ||
391 | wc.vendor_err = 0; | ||
392 | wc.opcode = IB_WC_SEND; | ||
393 | wc.byte_len = len; | ||
394 | wc.qp_num = qp->ibqp.qp_num; | ||
395 | wc.src_qp = 0; | ||
396 | wc.wc_flags = 0; | ||
397 | /* XXX initialize other fields? */ | ||
398 | ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); | ||
399 | } | ||
400 | kfree(sg_list); | ||
401 | |||
402 | ret = 0; | ||
403 | |||
404 | bail: | ||
405 | return ret; | ||
406 | } | ||
407 | |||
408 | /** | ||
409 | * ipath_ud_rcv - receive an incoming UD packet | ||
410 | * @dev: the device the packet came in on | ||
411 | * @hdr: the packet header | ||
412 | * @has_grh: true if the packet has a GRH | ||
413 | * @data: the packet data | ||
414 | * @tlen: the packet length | ||
415 | * @qp: the QP the packet came on | ||
416 | * | ||
417 | * This is called from ipath_qp_rcv() to process an incoming UD packet | ||
418 | * for the given QP. | ||
419 | * Called at interrupt level. | ||
420 | */ | ||
421 | void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | ||
422 | int has_grh, void *data, u32 tlen, struct ipath_qp *qp) | ||
423 | { | ||
424 | struct ipath_other_headers *ohdr; | ||
425 | int opcode; | ||
426 | u32 hdrsize; | ||
427 | u32 pad; | ||
428 | unsigned long flags; | ||
429 | struct ib_wc wc; | ||
430 | u32 qkey; | ||
431 | u32 src_qp; | ||
432 | struct ipath_rq *rq; | ||
433 | struct ipath_srq *srq; | ||
434 | struct ipath_rwqe *wqe; | ||
435 | u16 dlid; | ||
436 | int header_in_data; | ||
437 | |||
438 | /* Check for GRH */ | ||
439 | if (!has_grh) { | ||
440 | ohdr = &hdr->u.oth; | ||
441 | hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */ | ||
442 | qkey = be32_to_cpu(ohdr->u.ud.deth[0]); | ||
443 | src_qp = be32_to_cpu(ohdr->u.ud.deth[1]); | ||
444 | header_in_data = 0; | ||
445 | } else { | ||
446 | ohdr = &hdr->u.l.oth; | ||
447 | hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */ | ||
448 | /* | ||
449 | * The header with GRH is 68 bytes and the core driver sets | ||
450 | * the eager header buffer size to 56 bytes so the last 12 | ||
451 | * bytes of the IB header is in the data buffer. | ||
452 | */ | ||
453 | header_in_data = | ||
454 | ipath_layer_get_rcvhdrentsize(dev->dd) == 16; | ||
455 | if (header_in_data) { | ||
456 | qkey = be32_to_cpu(((__be32 *) data)[1]); | ||
457 | src_qp = be32_to_cpu(((__be32 *) data)[2]); | ||
458 | data += 12; | ||
459 | } else { | ||
460 | qkey = be32_to_cpu(ohdr->u.ud.deth[0]); | ||
461 | src_qp = be32_to_cpu(ohdr->u.ud.deth[1]); | ||
462 | } | ||
463 | } | ||
464 | src_qp &= IPS_QPN_MASK; | ||
465 | |||
466 | /* | ||
467 | * Check that the permissive LID is only used on QP0 | ||
468 | * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1). | ||
469 | */ | ||
470 | if (qp->ibqp.qp_num) { | ||
471 | if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE || | ||
472 | hdr->lrh[3] == IB_LID_PERMISSIVE)) { | ||
473 | dev->n_pkt_drops++; | ||
474 | goto bail; | ||
475 | } | ||
476 | if (unlikely(qkey != qp->qkey)) { | ||
477 | /* XXX OK to lose a count once in a while. */ | ||
478 | dev->qkey_violations++; | ||
479 | dev->n_pkt_drops++; | ||
480 | goto bail; | ||
481 | } | ||
482 | } else if (hdr->lrh[1] == IB_LID_PERMISSIVE || | ||
483 | hdr->lrh[3] == IB_LID_PERMISSIVE) { | ||
484 | struct ib_smp *smp = (struct ib_smp *) data; | ||
485 | |||
486 | if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { | ||
487 | dev->n_pkt_drops++; | ||
488 | goto bail; | ||
489 | } | ||
490 | } | ||
491 | |||
492 | /* Get the number of bytes the message was padded by. */ | ||
493 | pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; | ||
494 | if (unlikely(tlen < (hdrsize + pad + 4))) { | ||
495 | /* Drop incomplete packets. */ | ||
496 | dev->n_pkt_drops++; | ||
497 | goto bail; | ||
498 | } | ||
499 | tlen -= hdrsize + pad + 4; | ||
500 | |||
501 | /* Drop invalid MAD packets (see 13.5.3.1). */ | ||
502 | if (unlikely((qp->ibqp.qp_num == 0 && | ||
503 | (tlen != 256 || | ||
504 | (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) || | ||
505 | (qp->ibqp.qp_num == 1 && | ||
506 | (tlen != 256 || | ||
507 | (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) { | ||
508 | dev->n_pkt_drops++; | ||
509 | goto bail; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * A GRH is expected to preceed the data even if not | ||
514 | * present on the wire. | ||
515 | */ | ||
516 | wc.byte_len = tlen + sizeof(struct ib_grh); | ||
517 | |||
518 | /* | ||
519 | * The opcode is in the low byte when its in network order | ||
520 | * (top byte when in host order). | ||
521 | */ | ||
522 | opcode = be32_to_cpu(ohdr->bth[0]) >> 24; | ||
523 | if (qp->ibqp.qp_num > 1 && | ||
524 | opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { | ||
525 | if (header_in_data) { | ||
526 | wc.imm_data = *(__be32 *) data; | ||
527 | data += sizeof(__be32); | ||
528 | } else | ||
529 | wc.imm_data = ohdr->u.ud.imm_data; | ||
530 | wc.wc_flags = IB_WC_WITH_IMM; | ||
531 | hdrsize += sizeof(u32); | ||
532 | } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { | ||
533 | wc.imm_data = 0; | ||
534 | wc.wc_flags = 0; | ||
535 | } else { | ||
536 | dev->n_pkt_drops++; | ||
537 | goto bail; | ||
538 | } | ||
539 | |||
540 | /* | ||
541 | * Get the next work request entry to find where to put the data. | ||
542 | * Note that it is safe to drop the lock after changing rq->tail | ||
543 | * since ipath_post_receive() won't fill the empty slot. | ||
544 | */ | ||
545 | if (qp->ibqp.srq) { | ||
546 | srq = to_isrq(qp->ibqp.srq); | ||
547 | rq = &srq->rq; | ||
548 | } else { | ||
549 | srq = NULL; | ||
550 | rq = &qp->r_rq; | ||
551 | } | ||
552 | spin_lock_irqsave(&rq->lock, flags); | ||
553 | if (rq->tail == rq->head) { | ||
554 | spin_unlock_irqrestore(&rq->lock, flags); | ||
555 | dev->n_pkt_drops++; | ||
556 | goto bail; | ||
557 | } | ||
558 | /* Silently drop packets which are too big. */ | ||
559 | wqe = get_rwqe_ptr(rq, rq->tail); | ||
560 | if (wc.byte_len > wqe->length) { | ||
561 | spin_unlock_irqrestore(&rq->lock, flags); | ||
562 | dev->n_pkt_drops++; | ||
563 | goto bail; | ||
564 | } | ||
565 | wc.wr_id = wqe->wr_id; | ||
566 | qp->r_sge.sge = wqe->sg_list[0]; | ||
567 | qp->r_sge.sg_list = wqe->sg_list + 1; | ||
568 | qp->r_sge.num_sge = wqe->num_sge; | ||
569 | if (++rq->tail >= rq->size) | ||
570 | rq->tail = 0; | ||
571 | if (srq && srq->ibsrq.event_handler) { | ||
572 | u32 n; | ||
573 | |||
574 | if (rq->head < rq->tail) | ||
575 | n = rq->size + rq->head - rq->tail; | ||
576 | else | ||
577 | n = rq->head - rq->tail; | ||
578 | if (n < srq->limit) { | ||
579 | struct ib_event ev; | ||
580 | |||
581 | srq->limit = 0; | ||
582 | spin_unlock_irqrestore(&rq->lock, flags); | ||
583 | ev.device = qp->ibqp.device; | ||
584 | ev.element.srq = qp->ibqp.srq; | ||
585 | ev.event = IB_EVENT_SRQ_LIMIT_REACHED; | ||
586 | srq->ibsrq.event_handler(&ev, | ||
587 | srq->ibsrq.srq_context); | ||
588 | } else | ||
589 | spin_unlock_irqrestore(&rq->lock, flags); | ||
590 | } else | ||
591 | spin_unlock_irqrestore(&rq->lock, flags); | ||
592 | if (has_grh) { | ||
593 | ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh, | ||
594 | sizeof(struct ib_grh)); | ||
595 | wc.wc_flags |= IB_WC_GRH; | ||
596 | } else | ||
597 | ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); | ||
598 | ipath_copy_sge(&qp->r_sge, data, | ||
599 | wc.byte_len - sizeof(struct ib_grh)); | ||
600 | wc.status = IB_WC_SUCCESS; | ||
601 | wc.opcode = IB_WC_RECV; | ||
602 | wc.vendor_err = 0; | ||
603 | wc.qp_num = qp->ibqp.qp_num; | ||
604 | wc.src_qp = src_qp; | ||
605 | /* XXX do we know which pkey matched? Only needed for GSI. */ | ||
606 | wc.pkey_index = 0; | ||
607 | wc.slid = be16_to_cpu(hdr->lrh[3]); | ||
608 | wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF; | ||
609 | dlid = be16_to_cpu(hdr->lrh[1]); | ||
610 | /* | ||
611 | * Save the LMC lower bits if the destination LID is a unicast LID. | ||
612 | */ | ||
613 | wc.dlid_path_bits = dlid >= IPS_MULTICAST_LID_BASE ? 0 : | ||
614 | dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); | ||
615 | /* Signal completion event if the solicited bit is set. */ | ||
616 | ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, | ||
617 | (ohdr->bth[0] & | ||
618 | __constant_cpu_to_be32(1 << 23)) != 0); | ||
619 | |||
620 | bail:; | ||
621 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c new file mode 100644 index 000000000000..2bb08afc86d0 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c | |||
@@ -0,0 +1,207 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/mm.h> | ||
34 | #include <linux/device.h> | ||
35 | |||
36 | #include "ipath_kernel.h" | ||
37 | |||
38 | static void __ipath_release_user_pages(struct page **p, size_t num_pages, | ||
39 | int dirty) | ||
40 | { | ||
41 | size_t i; | ||
42 | |||
43 | for (i = 0; i < num_pages; i++) { | ||
44 | ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i, | ||
45 | (unsigned long) num_pages, p[i]); | ||
46 | if (dirty) | ||
47 | set_page_dirty_lock(p[i]); | ||
48 | put_page(p[i]); | ||
49 | } | ||
50 | } | ||
51 | |||
52 | /* call with current->mm->mmap_sem held */ | ||
53 | static int __get_user_pages(unsigned long start_page, size_t num_pages, | ||
54 | struct page **p, struct vm_area_struct **vma) | ||
55 | { | ||
56 | unsigned long lock_limit; | ||
57 | size_t got; | ||
58 | int ret; | ||
59 | |||
60 | #if 0 | ||
61 | /* | ||
62 | * XXX - causes MPI programs to fail, haven't had time to check | ||
63 | * yet | ||
64 | */ | ||
65 | if (!capable(CAP_IPC_LOCK)) { | ||
66 | ret = -EPERM; | ||
67 | goto bail; | ||
68 | } | ||
69 | #endif | ||
70 | |||
71 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> | ||
72 | PAGE_SHIFT; | ||
73 | |||
74 | if (num_pages > lock_limit) { | ||
75 | ret = -ENOMEM; | ||
76 | goto bail; | ||
77 | } | ||
78 | |||
79 | ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n", | ||
80 | (unsigned long) num_pages, start_page); | ||
81 | |||
82 | for (got = 0; got < num_pages; got += ret) { | ||
83 | ret = get_user_pages(current, current->mm, | ||
84 | start_page + got * PAGE_SIZE, | ||
85 | num_pages - got, 1, 1, | ||
86 | p + got, vma); | ||
87 | if (ret < 0) | ||
88 | goto bail_release; | ||
89 | } | ||
90 | |||
91 | current->mm->locked_vm += num_pages; | ||
92 | |||
93 | ret = 0; | ||
94 | goto bail; | ||
95 | |||
96 | bail_release: | ||
97 | __ipath_release_user_pages(p, got, 0); | ||
98 | bail: | ||
99 | return ret; | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * ipath_get_user_pages - lock user pages into memory | ||
104 | * @start_page: the start page | ||
105 | * @num_pages: the number of pages | ||
106 | * @p: the output page structures | ||
107 | * | ||
108 | * This function takes a given start page (page aligned user virtual | ||
109 | * address) and pins it and the following specified number of pages. For | ||
110 | * now, num_pages is always 1, but that will probably change at some point | ||
111 | * (because caller is doing expected sends on a single virtually contiguous | ||
112 | * buffer, so we can do all pages at once). | ||
113 | */ | ||
114 | int ipath_get_user_pages(unsigned long start_page, size_t num_pages, | ||
115 | struct page **p) | ||
116 | { | ||
117 | int ret; | ||
118 | |||
119 | down_write(¤t->mm->mmap_sem); | ||
120 | |||
121 | ret = __get_user_pages(start_page, num_pages, p, NULL); | ||
122 | |||
123 | up_write(¤t->mm->mmap_sem); | ||
124 | |||
125 | return ret; | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * ipath_get_user_pages_nocopy - lock a single page for I/O and mark shared | ||
130 | * @start_page: the page to lock | ||
131 | * @p: the output page structure | ||
132 | * | ||
133 | * This is similar to ipath_get_user_pages, but it's always one page, and we | ||
134 | * mark the page as locked for I/O, and shared. This is used for the user | ||
135 | * process page that contains the destination address for the rcvhdrq tail | ||
136 | * update, so we need to have the vma. If we don't do this, the page can be | ||
137 | * taken away from us on fork, even if the child never touches it, and then | ||
138 | * the user process never sees the tail register updates. | ||
139 | */ | ||
140 | int ipath_get_user_pages_nocopy(unsigned long page, struct page **p) | ||
141 | { | ||
142 | struct vm_area_struct *vma; | ||
143 | int ret; | ||
144 | |||
145 | down_write(¤t->mm->mmap_sem); | ||
146 | |||
147 | ret = __get_user_pages(page, 1, p, &vma); | ||
148 | |||
149 | up_write(¤t->mm->mmap_sem); | ||
150 | |||
151 | return ret; | ||
152 | } | ||
153 | |||
154 | void ipath_release_user_pages(struct page **p, size_t num_pages) | ||
155 | { | ||
156 | down_write(¤t->mm->mmap_sem); | ||
157 | |||
158 | __ipath_release_user_pages(p, num_pages, 1); | ||
159 | |||
160 | current->mm->locked_vm -= num_pages; | ||
161 | |||
162 | up_write(¤t->mm->mmap_sem); | ||
163 | } | ||
164 | |||
165 | struct ipath_user_pages_work { | ||
166 | struct work_struct work; | ||
167 | struct mm_struct *mm; | ||
168 | unsigned long num_pages; | ||
169 | }; | ||
170 | |||
171 | static void user_pages_account(void *ptr) | ||
172 | { | ||
173 | struct ipath_user_pages_work *work = ptr; | ||
174 | |||
175 | down_write(&work->mm->mmap_sem); | ||
176 | work->mm->locked_vm -= work->num_pages; | ||
177 | up_write(&work->mm->mmap_sem); | ||
178 | mmput(work->mm); | ||
179 | kfree(work); | ||
180 | } | ||
181 | |||
182 | void ipath_release_user_pages_on_close(struct page **p, size_t num_pages) | ||
183 | { | ||
184 | struct ipath_user_pages_work *work; | ||
185 | struct mm_struct *mm; | ||
186 | |||
187 | __ipath_release_user_pages(p, num_pages, 1); | ||
188 | |||
189 | mm = get_task_mm(current); | ||
190 | if (!mm) | ||
191 | goto bail; | ||
192 | |||
193 | work = kmalloc(sizeof(*work), GFP_KERNEL); | ||
194 | if (!work) | ||
195 | goto bail_mm; | ||
196 | |||
197 | goto bail; | ||
198 | |||
199 | INIT_WORK(&work->work, user_pages_account, work); | ||
200 | work->mm = mm; | ||
201 | work->num_pages = num_pages; | ||
202 | |||
203 | bail_mm: | ||
204 | mmput(mm); | ||
205 | bail: | ||
206 | return; | ||
207 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c new file mode 100644 index 000000000000..8d2558a01f35 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c | |||
@@ -0,0 +1,1222 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <rdma/ib_mad.h> | ||
34 | #include <rdma/ib_user_verbs.h> | ||
35 | #include <linux/utsname.h> | ||
36 | |||
37 | #include "ipath_kernel.h" | ||
38 | #include "ipath_verbs.h" | ||
39 | #include "ips_common.h" | ||
40 | |||
41 | /* Not static, because we don't want the compiler removing it */ | ||
42 | const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR; | ||
43 | |||
44 | static unsigned int ib_ipath_qp_table_size = 251; | ||
45 | module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); | ||
46 | MODULE_PARM_DESC(qp_table_size, "QP table size"); | ||
47 | |||
48 | unsigned int ib_ipath_lkey_table_size = 12; | ||
49 | module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint, | ||
50 | S_IRUGO); | ||
51 | MODULE_PARM_DESC(lkey_table_size, | ||
52 | "LKEY table size in bits (2^n, 1 <= n <= 23)"); | ||
53 | |||
54 | unsigned int ib_ipath_debug; /* debug mask */ | ||
55 | module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO); | ||
56 | MODULE_PARM_DESC(debug, "Verbs debug mask"); | ||
57 | |||
58 | MODULE_LICENSE("GPL"); | ||
59 | MODULE_AUTHOR("PathScale <support@pathscale.com>"); | ||
60 | MODULE_DESCRIPTION("Pathscale InfiniPath driver"); | ||
61 | |||
62 | const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { | ||
63 | [IB_QPS_RESET] = 0, | ||
64 | [IB_QPS_INIT] = IPATH_POST_RECV_OK, | ||
65 | [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, | ||
66 | [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | | ||
67 | IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, | ||
68 | [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | | ||
69 | IPATH_POST_SEND_OK, | ||
70 | [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, | ||
71 | [IB_QPS_ERR] = 0, | ||
72 | }; | ||
73 | |||
74 | /* | ||
75 | * Translate ib_wr_opcode into ib_wc_opcode. | ||
76 | */ | ||
77 | const enum ib_wc_opcode ib_ipath_wc_opcode[] = { | ||
78 | [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, | ||
79 | [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, | ||
80 | [IB_WR_SEND] = IB_WC_SEND, | ||
81 | [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, | ||
82 | [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, | ||
83 | [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, | ||
84 | [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD | ||
85 | }; | ||
86 | |||
87 | /* | ||
88 | * System image GUID. | ||
89 | */ | ||
90 | static __be64 sys_image_guid; | ||
91 | |||
92 | /** | ||
93 | * ipath_copy_sge - copy data to SGE memory | ||
94 | * @ss: the SGE state | ||
95 | * @data: the data to copy | ||
96 | * @length: the length of the data | ||
97 | */ | ||
98 | void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) | ||
99 | { | ||
100 | struct ipath_sge *sge = &ss->sge; | ||
101 | |||
102 | while (length) { | ||
103 | u32 len = sge->length; | ||
104 | |||
105 | BUG_ON(len == 0); | ||
106 | if (len > length) | ||
107 | len = length; | ||
108 | memcpy(sge->vaddr, data, len); | ||
109 | sge->vaddr += len; | ||
110 | sge->length -= len; | ||
111 | sge->sge_length -= len; | ||
112 | if (sge->sge_length == 0) { | ||
113 | if (--ss->num_sge) | ||
114 | *sge = *ss->sg_list++; | ||
115 | } else if (sge->length == 0 && sge->mr != NULL) { | ||
116 | if (++sge->n >= IPATH_SEGSZ) { | ||
117 | if (++sge->m >= sge->mr->mapsz) | ||
118 | break; | ||
119 | sge->n = 0; | ||
120 | } | ||
121 | sge->vaddr = | ||
122 | sge->mr->map[sge->m]->segs[sge->n].vaddr; | ||
123 | sge->length = | ||
124 | sge->mr->map[sge->m]->segs[sge->n].length; | ||
125 | } | ||
126 | data += len; | ||
127 | length -= len; | ||
128 | } | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func | ||
133 | * @ss: the SGE state | ||
134 | * @length: the number of bytes to skip | ||
135 | */ | ||
136 | void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) | ||
137 | { | ||
138 | struct ipath_sge *sge = &ss->sge; | ||
139 | |||
140 | while (length > sge->sge_length) { | ||
141 | length -= sge->sge_length; | ||
142 | ss->sge = *ss->sg_list++; | ||
143 | } | ||
144 | while (length) { | ||
145 | u32 len = sge->length; | ||
146 | |||
147 | BUG_ON(len == 0); | ||
148 | if (len > length) | ||
149 | len = length; | ||
150 | sge->vaddr += len; | ||
151 | sge->length -= len; | ||
152 | sge->sge_length -= len; | ||
153 | if (sge->sge_length == 0) { | ||
154 | if (--ss->num_sge) | ||
155 | *sge = *ss->sg_list++; | ||
156 | } else if (sge->length == 0 && sge->mr != NULL) { | ||
157 | if (++sge->n >= IPATH_SEGSZ) { | ||
158 | if (++sge->m >= sge->mr->mapsz) | ||
159 | break; | ||
160 | sge->n = 0; | ||
161 | } | ||
162 | sge->vaddr = | ||
163 | sge->mr->map[sge->m]->segs[sge->n].vaddr; | ||
164 | sge->length = | ||
165 | sge->mr->map[sge->m]->segs[sge->n].length; | ||
166 | } | ||
167 | length -= len; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | /** | ||
172 | * ipath_post_send - post a send on a QP | ||
173 | * @ibqp: the QP to post the send on | ||
174 | * @wr: the list of work requests to post | ||
175 | * @bad_wr: the first bad WR is put here | ||
176 | * | ||
177 | * This may be called from interrupt context. | ||
178 | */ | ||
179 | static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | ||
180 | struct ib_send_wr **bad_wr) | ||
181 | { | ||
182 | struct ipath_qp *qp = to_iqp(ibqp); | ||
183 | int err = 0; | ||
184 | |||
185 | /* Check that state is OK to post send. */ | ||
186 | if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)) { | ||
187 | *bad_wr = wr; | ||
188 | err = -EINVAL; | ||
189 | goto bail; | ||
190 | } | ||
191 | |||
192 | for (; wr; wr = wr->next) { | ||
193 | switch (qp->ibqp.qp_type) { | ||
194 | case IB_QPT_UC: | ||
195 | case IB_QPT_RC: | ||
196 | err = ipath_post_rc_send(qp, wr); | ||
197 | break; | ||
198 | |||
199 | case IB_QPT_SMI: | ||
200 | case IB_QPT_GSI: | ||
201 | case IB_QPT_UD: | ||
202 | err = ipath_post_ud_send(qp, wr); | ||
203 | break; | ||
204 | |||
205 | default: | ||
206 | err = -EINVAL; | ||
207 | } | ||
208 | if (err) { | ||
209 | *bad_wr = wr; | ||
210 | break; | ||
211 | } | ||
212 | } | ||
213 | |||
214 | bail: | ||
215 | return err; | ||
216 | } | ||
217 | |||
218 | /** | ||
219 | * ipath_post_receive - post a receive on a QP | ||
220 | * @ibqp: the QP to post the receive on | ||
221 | * @wr: the WR to post | ||
222 | * @bad_wr: the first bad WR is put here | ||
223 | * | ||
224 | * This may be called from interrupt context. | ||
225 | */ | ||
226 | static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, | ||
227 | struct ib_recv_wr **bad_wr) | ||
228 | { | ||
229 | struct ipath_qp *qp = to_iqp(ibqp); | ||
230 | unsigned long flags; | ||
231 | int ret; | ||
232 | |||
233 | /* Check that state is OK to post receive. */ | ||
234 | if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) { | ||
235 | *bad_wr = wr; | ||
236 | ret = -EINVAL; | ||
237 | goto bail; | ||
238 | } | ||
239 | |||
240 | for (; wr; wr = wr->next) { | ||
241 | struct ipath_rwqe *wqe; | ||
242 | u32 next; | ||
243 | int i, j; | ||
244 | |||
245 | if (wr->num_sge > qp->r_rq.max_sge) { | ||
246 | *bad_wr = wr; | ||
247 | ret = -ENOMEM; | ||
248 | goto bail; | ||
249 | } | ||
250 | |||
251 | spin_lock_irqsave(&qp->r_rq.lock, flags); | ||
252 | next = qp->r_rq.head + 1; | ||
253 | if (next >= qp->r_rq.size) | ||
254 | next = 0; | ||
255 | if (next == qp->r_rq.tail) { | ||
256 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
257 | *bad_wr = wr; | ||
258 | ret = -ENOMEM; | ||
259 | goto bail; | ||
260 | } | ||
261 | |||
262 | wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head); | ||
263 | wqe->wr_id = wr->wr_id; | ||
264 | wqe->sg_list[0].mr = NULL; | ||
265 | wqe->sg_list[0].vaddr = NULL; | ||
266 | wqe->sg_list[0].length = 0; | ||
267 | wqe->sg_list[0].sge_length = 0; | ||
268 | wqe->length = 0; | ||
269 | for (i = 0, j = 0; i < wr->num_sge; i++) { | ||
270 | /* Check LKEY */ | ||
271 | if (to_ipd(qp->ibqp.pd)->user && | ||
272 | wr->sg_list[i].lkey == 0) { | ||
273 | spin_unlock_irqrestore(&qp->r_rq.lock, | ||
274 | flags); | ||
275 | *bad_wr = wr; | ||
276 | ret = -EINVAL; | ||
277 | goto bail; | ||
278 | } | ||
279 | if (wr->sg_list[i].length == 0) | ||
280 | continue; | ||
281 | if (!ipath_lkey_ok( | ||
282 | &to_idev(qp->ibqp.device)->lk_table, | ||
283 | &wqe->sg_list[j], &wr->sg_list[i], | ||
284 | IB_ACCESS_LOCAL_WRITE)) { | ||
285 | spin_unlock_irqrestore(&qp->r_rq.lock, | ||
286 | flags); | ||
287 | *bad_wr = wr; | ||
288 | ret = -EINVAL; | ||
289 | goto bail; | ||
290 | } | ||
291 | wqe->length += wr->sg_list[i].length; | ||
292 | j++; | ||
293 | } | ||
294 | wqe->num_sge = j; | ||
295 | qp->r_rq.head = next; | ||
296 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | ||
297 | } | ||
298 | ret = 0; | ||
299 | |||
300 | bail: | ||
301 | return ret; | ||
302 | } | ||
303 | |||
304 | /** | ||
305 | * ipath_qp_rcv - processing an incoming packet on a QP | ||
306 | * @dev: the device the packet came on | ||
307 | * @hdr: the packet header | ||
308 | * @has_grh: true if the packet has a GRH | ||
309 | * @data: the packet data | ||
310 | * @tlen: the packet length | ||
311 | * @qp: the QP the packet came on | ||
312 | * | ||
313 | * This is called from ipath_ib_rcv() to process an incoming packet | ||
314 | * for the given QP. | ||
315 | * Called at interrupt level. | ||
316 | */ | ||
317 | static void ipath_qp_rcv(struct ipath_ibdev *dev, | ||
318 | struct ipath_ib_header *hdr, int has_grh, | ||
319 | void *data, u32 tlen, struct ipath_qp *qp) | ||
320 | { | ||
321 | /* Check for valid receive state. */ | ||
322 | if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { | ||
323 | dev->n_pkt_drops++; | ||
324 | return; | ||
325 | } | ||
326 | |||
327 | switch (qp->ibqp.qp_type) { | ||
328 | case IB_QPT_SMI: | ||
329 | case IB_QPT_GSI: | ||
330 | case IB_QPT_UD: | ||
331 | ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); | ||
332 | break; | ||
333 | |||
334 | case IB_QPT_RC: | ||
335 | ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp); | ||
336 | break; | ||
337 | |||
338 | case IB_QPT_UC: | ||
339 | ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp); | ||
340 | break; | ||
341 | |||
342 | default: | ||
343 | break; | ||
344 | } | ||
345 | } | ||
346 | |||
347 | /** | ||
348 | * ipath_ib_rcv - process and incoming packet | ||
349 | * @arg: the device pointer | ||
350 | * @rhdr: the header of the packet | ||
351 | * @data: the packet data | ||
352 | * @tlen: the packet length | ||
353 | * | ||
354 | * This is called from ipath_kreceive() to process an incoming packet at | ||
355 | * interrupt level. Tlen is the length of the header + data + CRC in bytes. | ||
356 | */ | ||
357 | static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen) | ||
358 | { | ||
359 | struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; | ||
360 | struct ipath_ib_header *hdr = rhdr; | ||
361 | struct ipath_other_headers *ohdr; | ||
362 | struct ipath_qp *qp; | ||
363 | u32 qp_num; | ||
364 | int lnh; | ||
365 | u8 opcode; | ||
366 | u16 lid; | ||
367 | |||
368 | if (unlikely(dev == NULL)) | ||
369 | goto bail; | ||
370 | |||
371 | if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */ | ||
372 | dev->rcv_errors++; | ||
373 | goto bail; | ||
374 | } | ||
375 | |||
376 | /* Check for a valid destination LID (see ch. 7.11.1). */ | ||
377 | lid = be16_to_cpu(hdr->lrh[1]); | ||
378 | if (lid < IPS_MULTICAST_LID_BASE) { | ||
379 | lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); | ||
380 | if (unlikely(lid != ipath_layer_get_lid(dev->dd))) { | ||
381 | dev->rcv_errors++; | ||
382 | goto bail; | ||
383 | } | ||
384 | } | ||
385 | |||
386 | /* Check for GRH */ | ||
387 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; | ||
388 | if (lnh == IPS_LRH_BTH) | ||
389 | ohdr = &hdr->u.oth; | ||
390 | else if (lnh == IPS_LRH_GRH) | ||
391 | ohdr = &hdr->u.l.oth; | ||
392 | else { | ||
393 | dev->rcv_errors++; | ||
394 | goto bail; | ||
395 | } | ||
396 | |||
397 | opcode = be32_to_cpu(ohdr->bth[0]) >> 24; | ||
398 | dev->opstats[opcode].n_bytes += tlen; | ||
399 | dev->opstats[opcode].n_packets++; | ||
400 | |||
401 | /* Get the destination QP number. */ | ||
402 | qp_num = be32_to_cpu(ohdr->bth[1]) & IPS_QPN_MASK; | ||
403 | if (qp_num == IPS_MULTICAST_QPN) { | ||
404 | struct ipath_mcast *mcast; | ||
405 | struct ipath_mcast_qp *p; | ||
406 | |||
407 | mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); | ||
408 | if (mcast == NULL) { | ||
409 | dev->n_pkt_drops++; | ||
410 | goto bail; | ||
411 | } | ||
412 | dev->n_multicast_rcv++; | ||
413 | list_for_each_entry_rcu(p, &mcast->qp_list, list) | ||
414 | ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data, | ||
415 | tlen, p->qp); | ||
416 | /* | ||
417 | * Notify ipath_multicast_detach() if it is waiting for us | ||
418 | * to finish. | ||
419 | */ | ||
420 | if (atomic_dec_return(&mcast->refcount) <= 1) | ||
421 | wake_up(&mcast->wait); | ||
422 | } else { | ||
423 | qp = ipath_lookup_qpn(&dev->qp_table, qp_num); | ||
424 | if (qp) { | ||
425 | dev->n_unicast_rcv++; | ||
426 | ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data, | ||
427 | tlen, qp); | ||
428 | /* | ||
429 | * Notify ipath_destroy_qp() if it is waiting | ||
430 | * for us to finish. | ||
431 | */ | ||
432 | if (atomic_dec_and_test(&qp->refcount)) | ||
433 | wake_up(&qp->wait); | ||
434 | } else | ||
435 | dev->n_pkt_drops++; | ||
436 | } | ||
437 | |||
438 | bail:; | ||
439 | } | ||
440 | |||
441 | /** | ||
442 | * ipath_ib_timer - verbs timer | ||
443 | * @arg: the device pointer | ||
444 | * | ||
445 | * This is called from ipath_do_rcv_timer() at interrupt level to check for | ||
446 | * QPs which need retransmits and to collect performance numbers. | ||
447 | */ | ||
448 | static void ipath_ib_timer(void *arg) | ||
449 | { | ||
450 | struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; | ||
451 | struct ipath_qp *resend = NULL; | ||
452 | struct ipath_qp *rnr = NULL; | ||
453 | struct list_head *last; | ||
454 | struct ipath_qp *qp; | ||
455 | unsigned long flags; | ||
456 | |||
457 | if (dev == NULL) | ||
458 | return; | ||
459 | |||
460 | spin_lock_irqsave(&dev->pending_lock, flags); | ||
461 | /* Start filling the next pending queue. */ | ||
462 | if (++dev->pending_index >= ARRAY_SIZE(dev->pending)) | ||
463 | dev->pending_index = 0; | ||
464 | /* Save any requests still in the new queue, they have timed out. */ | ||
465 | last = &dev->pending[dev->pending_index]; | ||
466 | while (!list_empty(last)) { | ||
467 | qp = list_entry(last->next, struct ipath_qp, timerwait); | ||
468 | if (last->next == LIST_POISON1 || | ||
469 | last->next != &qp->timerwait || | ||
470 | qp->timerwait.prev != last) { | ||
471 | INIT_LIST_HEAD(last); | ||
472 | } else { | ||
473 | list_del(&qp->timerwait); | ||
474 | qp->timerwait.prev = (struct list_head *) resend; | ||
475 | resend = qp; | ||
476 | atomic_inc(&qp->refcount); | ||
477 | } | ||
478 | } | ||
479 | last = &dev->rnrwait; | ||
480 | if (!list_empty(last)) { | ||
481 | qp = list_entry(last->next, struct ipath_qp, timerwait); | ||
482 | if (--qp->s_rnr_timeout == 0) { | ||
483 | do { | ||
484 | if (last->next == LIST_POISON1 || | ||
485 | last->next != &qp->timerwait || | ||
486 | qp->timerwait.prev != last) { | ||
487 | INIT_LIST_HEAD(last); | ||
488 | break; | ||
489 | } | ||
490 | list_del(&qp->timerwait); | ||
491 | qp->timerwait.prev = | ||
492 | (struct list_head *) rnr; | ||
493 | rnr = qp; | ||
494 | if (list_empty(last)) | ||
495 | break; | ||
496 | qp = list_entry(last->next, struct ipath_qp, | ||
497 | timerwait); | ||
498 | } while (qp->s_rnr_timeout == 0); | ||
499 | } | ||
500 | } | ||
501 | /* | ||
502 | * We should only be in the started state if pma_sample_start != 0 | ||
503 | */ | ||
504 | if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && | ||
505 | --dev->pma_sample_start == 0) { | ||
506 | dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; | ||
507 | ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword, | ||
508 | &dev->ipath_rword, | ||
509 | &dev->ipath_spkts, | ||
510 | &dev->ipath_rpkts, | ||
511 | &dev->ipath_xmit_wait); | ||
512 | } | ||
513 | if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { | ||
514 | if (dev->pma_sample_interval == 0) { | ||
515 | u64 ta, tb, tc, td, te; | ||
516 | |||
517 | dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; | ||
518 | ipath_layer_snapshot_counters(dev->dd, &ta, &tb, | ||
519 | &tc, &td, &te); | ||
520 | |||
521 | dev->ipath_sword = ta - dev->ipath_sword; | ||
522 | dev->ipath_rword = tb - dev->ipath_rword; | ||
523 | dev->ipath_spkts = tc - dev->ipath_spkts; | ||
524 | dev->ipath_rpkts = td - dev->ipath_rpkts; | ||
525 | dev->ipath_xmit_wait = te - dev->ipath_xmit_wait; | ||
526 | } | ||
527 | else | ||
528 | dev->pma_sample_interval--; | ||
529 | } | ||
530 | spin_unlock_irqrestore(&dev->pending_lock, flags); | ||
531 | |||
532 | /* XXX What if timer fires again while this is running? */ | ||
533 | for (qp = resend; qp != NULL; | ||
534 | qp = (struct ipath_qp *) qp->timerwait.prev) { | ||
535 | struct ib_wc wc; | ||
536 | |||
537 | spin_lock_irqsave(&qp->s_lock, flags); | ||
538 | if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { | ||
539 | dev->n_timeouts++; | ||
540 | ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); | ||
541 | } | ||
542 | spin_unlock_irqrestore(&qp->s_lock, flags); | ||
543 | |||
544 | /* Notify ipath_destroy_qp() if it is waiting. */ | ||
545 | if (atomic_dec_and_test(&qp->refcount)) | ||
546 | wake_up(&qp->wait); | ||
547 | } | ||
548 | for (qp = rnr; qp != NULL; | ||
549 | qp = (struct ipath_qp *) qp->timerwait.prev) | ||
550 | tasklet_hi_schedule(&qp->s_task); | ||
551 | } | ||
552 | |||
553 | /** | ||
554 | * ipath_ib_piobufavail - callback when a PIO buffer is available | ||
555 | * @arg: the device pointer | ||
556 | * | ||
557 | * This is called from ipath_intr() at interrupt level when a PIO buffer is | ||
558 | * available after ipath_verbs_send() returned an error that no buffers were | ||
559 | * available. Return 0 if we consumed all the PIO buffers and we still have | ||
560 | * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and | ||
561 | * return one). | ||
562 | */ | ||
563 | static int ipath_ib_piobufavail(void *arg) | ||
564 | { | ||
565 | struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; | ||
566 | struct ipath_qp *qp; | ||
567 | unsigned long flags; | ||
568 | |||
569 | if (dev == NULL) | ||
570 | goto bail; | ||
571 | |||
572 | spin_lock_irqsave(&dev->pending_lock, flags); | ||
573 | while (!list_empty(&dev->piowait)) { | ||
574 | qp = list_entry(dev->piowait.next, struct ipath_qp, | ||
575 | piowait); | ||
576 | list_del(&qp->piowait); | ||
577 | tasklet_hi_schedule(&qp->s_task); | ||
578 | } | ||
579 | spin_unlock_irqrestore(&dev->pending_lock, flags); | ||
580 | |||
581 | bail: | ||
582 | return 1; | ||
583 | } | ||
584 | |||
585 | static int ipath_query_device(struct ib_device *ibdev, | ||
586 | struct ib_device_attr *props) | ||
587 | { | ||
588 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
589 | u32 vendor, boardrev, majrev, minrev; | ||
590 | |||
591 | memset(props, 0, sizeof(*props)); | ||
592 | |||
593 | props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | | ||
594 | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | | ||
595 | IB_DEVICE_SYS_IMAGE_GUID; | ||
596 | ipath_layer_query_device(dev->dd, &vendor, &boardrev, | ||
597 | &majrev, &minrev); | ||
598 | props->vendor_id = vendor; | ||
599 | props->vendor_part_id = boardrev; | ||
600 | props->hw_ver = boardrev << 16 | majrev << 8 | minrev; | ||
601 | |||
602 | props->sys_image_guid = dev->sys_image_guid; | ||
603 | |||
604 | props->max_mr_size = ~0ull; | ||
605 | props->max_qp = 0xffff; | ||
606 | props->max_qp_wr = 0xffff; | ||
607 | props->max_sge = 255; | ||
608 | props->max_cq = 0xffff; | ||
609 | props->max_cqe = 0xffff; | ||
610 | props->max_mr = 0xffff; | ||
611 | props->max_pd = 0xffff; | ||
612 | props->max_qp_rd_atom = 1; | ||
613 | props->max_qp_init_rd_atom = 1; | ||
614 | /* props->max_res_rd_atom */ | ||
615 | props->max_srq = 0xffff; | ||
616 | props->max_srq_wr = 0xffff; | ||
617 | props->max_srq_sge = 255; | ||
618 | /* props->local_ca_ack_delay */ | ||
619 | props->atomic_cap = IB_ATOMIC_HCA; | ||
620 | props->max_pkeys = ipath_layer_get_npkeys(dev->dd); | ||
621 | props->max_mcast_grp = 0xffff; | ||
622 | props->max_mcast_qp_attach = 0xffff; | ||
623 | props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * | ||
624 | props->max_mcast_grp; | ||
625 | |||
626 | return 0; | ||
627 | } | ||
628 | |||
629 | const u8 ipath_cvt_physportstate[16] = { | ||
630 | [INFINIPATH_IBCS_LT_STATE_DISABLED] = 3, | ||
631 | [INFINIPATH_IBCS_LT_STATE_LINKUP] = 5, | ||
632 | [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = 2, | ||
633 | [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = 2, | ||
634 | [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = 1, | ||
635 | [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = 1, | ||
636 | [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 4, | ||
637 | [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 4, | ||
638 | [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 4, | ||
639 | [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = 4, | ||
640 | [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 6, | ||
641 | [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 6, | ||
642 | [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6, | ||
643 | }; | ||
644 | |||
645 | static int ipath_query_port(struct ib_device *ibdev, | ||
646 | u8 port, struct ib_port_attr *props) | ||
647 | { | ||
648 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
649 | enum ib_mtu mtu; | ||
650 | u16 lid = ipath_layer_get_lid(dev->dd); | ||
651 | u64 ibcstat; | ||
652 | |||
653 | memset(props, 0, sizeof(*props)); | ||
654 | props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE); | ||
655 | props->lmc = dev->mkeyprot_resv_lmc & 7; | ||
656 | props->sm_lid = dev->sm_lid; | ||
657 | props->sm_sl = dev->sm_sl; | ||
658 | ibcstat = ipath_layer_get_lastibcstat(dev->dd); | ||
659 | props->state = ((ibcstat >> 4) & 0x3) + 1; | ||
660 | /* See phys_state_show() */ | ||
661 | props->phys_state = ipath_cvt_physportstate[ | ||
662 | ipath_layer_get_lastibcstat(dev->dd) & 0xf]; | ||
663 | props->port_cap_flags = dev->port_cap_flags; | ||
664 | props->gid_tbl_len = 1; | ||
665 | props->max_msg_sz = 4096; | ||
666 | props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd); | ||
667 | props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) - | ||
668 | dev->n_pkey_violations; | ||
669 | props->qkey_viol_cntr = dev->qkey_violations; | ||
670 | props->active_width = IB_WIDTH_4X; | ||
671 | /* See rate_show() */ | ||
672 | props->active_speed = 1; /* Regular 10Mbs speed. */ | ||
673 | props->max_vl_num = 1; /* VLCap = VL0 */ | ||
674 | props->init_type_reply = 0; | ||
675 | |||
676 | props->max_mtu = IB_MTU_4096; | ||
677 | switch (ipath_layer_get_ibmtu(dev->dd)) { | ||
678 | case 4096: | ||
679 | mtu = IB_MTU_4096; | ||
680 | break; | ||
681 | case 2048: | ||
682 | mtu = IB_MTU_2048; | ||
683 | break; | ||
684 | case 1024: | ||
685 | mtu = IB_MTU_1024; | ||
686 | break; | ||
687 | case 512: | ||
688 | mtu = IB_MTU_512; | ||
689 | break; | ||
690 | case 256: | ||
691 | mtu = IB_MTU_256; | ||
692 | break; | ||
693 | default: | ||
694 | mtu = IB_MTU_2048; | ||
695 | } | ||
696 | props->active_mtu = mtu; | ||
697 | props->subnet_timeout = dev->subnet_timeout; | ||
698 | |||
699 | return 0; | ||
700 | } | ||
701 | |||
702 | static int ipath_modify_device(struct ib_device *device, | ||
703 | int device_modify_mask, | ||
704 | struct ib_device_modify *device_modify) | ||
705 | { | ||
706 | int ret; | ||
707 | |||
708 | if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | | ||
709 | IB_DEVICE_MODIFY_NODE_DESC)) { | ||
710 | ret = -EOPNOTSUPP; | ||
711 | goto bail; | ||
712 | } | ||
713 | |||
714 | if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) | ||
715 | memcpy(device->node_desc, device_modify->node_desc, 64); | ||
716 | |||
717 | if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) | ||
718 | to_idev(device)->sys_image_guid = | ||
719 | cpu_to_be64(device_modify->sys_image_guid); | ||
720 | |||
721 | ret = 0; | ||
722 | |||
723 | bail: | ||
724 | return ret; | ||
725 | } | ||
726 | |||
727 | static int ipath_modify_port(struct ib_device *ibdev, | ||
728 | u8 port, int port_modify_mask, | ||
729 | struct ib_port_modify *props) | ||
730 | { | ||
731 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
732 | |||
733 | dev->port_cap_flags |= props->set_port_cap_mask; | ||
734 | dev->port_cap_flags &= ~props->clr_port_cap_mask; | ||
735 | if (port_modify_mask & IB_PORT_SHUTDOWN) | ||
736 | ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); | ||
737 | if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) | ||
738 | dev->qkey_violations = 0; | ||
739 | return 0; | ||
740 | } | ||
741 | |||
742 | static int ipath_query_gid(struct ib_device *ibdev, u8 port, | ||
743 | int index, union ib_gid *gid) | ||
744 | { | ||
745 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
746 | int ret; | ||
747 | |||
748 | if (index >= 1) { | ||
749 | ret = -EINVAL; | ||
750 | goto bail; | ||
751 | } | ||
752 | gid->global.subnet_prefix = dev->gid_prefix; | ||
753 | gid->global.interface_id = ipath_layer_get_guid(dev->dd); | ||
754 | |||
755 | ret = 0; | ||
756 | |||
757 | bail: | ||
758 | return ret; | ||
759 | } | ||
760 | |||
761 | static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev, | ||
762 | struct ib_ucontext *context, | ||
763 | struct ib_udata *udata) | ||
764 | { | ||
765 | struct ipath_pd *pd; | ||
766 | struct ib_pd *ret; | ||
767 | |||
768 | pd = kmalloc(sizeof *pd, GFP_KERNEL); | ||
769 | if (!pd) { | ||
770 | ret = ERR_PTR(-ENOMEM); | ||
771 | goto bail; | ||
772 | } | ||
773 | |||
774 | /* ib_alloc_pd() will initialize pd->ibpd. */ | ||
775 | pd->user = udata != NULL; | ||
776 | |||
777 | ret = &pd->ibpd; | ||
778 | |||
779 | bail: | ||
780 | return ret; | ||
781 | } | ||
782 | |||
783 | static int ipath_dealloc_pd(struct ib_pd *ibpd) | ||
784 | { | ||
785 | struct ipath_pd *pd = to_ipd(ibpd); | ||
786 | |||
787 | kfree(pd); | ||
788 | |||
789 | return 0; | ||
790 | } | ||
791 | |||
792 | /** | ||
793 | * ipath_create_ah - create an address handle | ||
794 | * @pd: the protection domain | ||
795 | * @ah_attr: the attributes of the AH | ||
796 | * | ||
797 | * This may be called from interrupt context. | ||
798 | */ | ||
799 | static struct ib_ah *ipath_create_ah(struct ib_pd *pd, | ||
800 | struct ib_ah_attr *ah_attr) | ||
801 | { | ||
802 | struct ipath_ah *ah; | ||
803 | struct ib_ah *ret; | ||
804 | |||
805 | /* A multicast address requires a GRH (see ch. 8.4.1). */ | ||
806 | if (ah_attr->dlid >= IPS_MULTICAST_LID_BASE && | ||
807 | ah_attr->dlid != IPS_PERMISSIVE_LID && | ||
808 | !(ah_attr->ah_flags & IB_AH_GRH)) { | ||
809 | ret = ERR_PTR(-EINVAL); | ||
810 | goto bail; | ||
811 | } | ||
812 | |||
813 | ah = kmalloc(sizeof *ah, GFP_ATOMIC); | ||
814 | if (!ah) { | ||
815 | ret = ERR_PTR(-ENOMEM); | ||
816 | goto bail; | ||
817 | } | ||
818 | |||
819 | /* ib_create_ah() will initialize ah->ibah. */ | ||
820 | ah->attr = *ah_attr; | ||
821 | |||
822 | ret = &ah->ibah; | ||
823 | |||
824 | bail: | ||
825 | return ret; | ||
826 | } | ||
827 | |||
828 | /** | ||
829 | * ipath_destroy_ah - destroy an address handle | ||
830 | * @ibah: the AH to destroy | ||
831 | * | ||
832 | * This may be called from interrupt context. | ||
833 | */ | ||
834 | static int ipath_destroy_ah(struct ib_ah *ibah) | ||
835 | { | ||
836 | struct ipath_ah *ah = to_iah(ibah); | ||
837 | |||
838 | kfree(ah); | ||
839 | |||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) | ||
844 | { | ||
845 | struct ipath_ah *ah = to_iah(ibah); | ||
846 | |||
847 | *ah_attr = ah->attr; | ||
848 | |||
849 | return 0; | ||
850 | } | ||
851 | |||
852 | static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, | ||
853 | u16 *pkey) | ||
854 | { | ||
855 | struct ipath_ibdev *dev = to_idev(ibdev); | ||
856 | int ret; | ||
857 | |||
858 | if (index >= ipath_layer_get_npkeys(dev->dd)) { | ||
859 | ret = -EINVAL; | ||
860 | goto bail; | ||
861 | } | ||
862 | |||
863 | *pkey = ipath_layer_get_pkey(dev->dd, index); | ||
864 | ret = 0; | ||
865 | |||
866 | bail: | ||
867 | return ret; | ||
868 | } | ||
869 | |||
870 | |||
871 | /** | ||
872 | * ipath_alloc_ucontext - allocate a ucontest | ||
873 | * @ibdev: the infiniband device | ||
874 | * @udata: not used by the InfiniPath driver | ||
875 | */ | ||
876 | |||
877 | static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev, | ||
878 | struct ib_udata *udata) | ||
879 | { | ||
880 | struct ipath_ucontext *context; | ||
881 | struct ib_ucontext *ret; | ||
882 | |||
883 | context = kmalloc(sizeof *context, GFP_KERNEL); | ||
884 | if (!context) { | ||
885 | ret = ERR_PTR(-ENOMEM); | ||
886 | goto bail; | ||
887 | } | ||
888 | |||
889 | ret = &context->ibucontext; | ||
890 | |||
891 | bail: | ||
892 | return ret; | ||
893 | } | ||
894 | |||
895 | static int ipath_dealloc_ucontext(struct ib_ucontext *context) | ||
896 | { | ||
897 | kfree(to_iucontext(context)); | ||
898 | return 0; | ||
899 | } | ||
900 | |||
901 | static int ipath_verbs_register_sysfs(struct ib_device *dev); | ||
902 | |||
903 | /** | ||
904 | * ipath_register_ib_device - register our device with the infiniband core | ||
905 | * @unit: the device number to register | ||
906 | * @dd: the device data structure | ||
907 | * Return the allocated ipath_ibdev pointer or NULL on error. | ||
908 | */ | ||
909 | static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) | ||
910 | { | ||
911 | struct ipath_ibdev *idev; | ||
912 | struct ib_device *dev; | ||
913 | int ret; | ||
914 | |||
915 | idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); | ||
916 | if (idev == NULL) | ||
917 | goto bail; | ||
918 | |||
919 | dev = &idev->ibdev; | ||
920 | |||
921 | /* Only need to initialize non-zero fields. */ | ||
922 | spin_lock_init(&idev->qp_table.lock); | ||
923 | spin_lock_init(&idev->lk_table.lock); | ||
924 | idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); | ||
925 | /* Set the prefix to the default value (see ch. 4.1.1) */ | ||
926 | idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL); | ||
927 | |||
928 | ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size); | ||
929 | if (ret) | ||
930 | goto err_qp; | ||
931 | |||
932 | /* | ||
933 | * The top ib_ipath_lkey_table_size bits are used to index the | ||
934 | * table. The lower 8 bits can be owned by the user (copied from | ||
935 | * the LKEY). The remaining bits act as a generation number or tag. | ||
936 | */ | ||
937 | idev->lk_table.max = 1 << ib_ipath_lkey_table_size; | ||
938 | idev->lk_table.table = kzalloc(idev->lk_table.max * | ||
939 | sizeof(*idev->lk_table.table), | ||
940 | GFP_KERNEL); | ||
941 | if (idev->lk_table.table == NULL) { | ||
942 | ret = -ENOMEM; | ||
943 | goto err_lk; | ||
944 | } | ||
945 | spin_lock_init(&idev->pending_lock); | ||
946 | INIT_LIST_HEAD(&idev->pending[0]); | ||
947 | INIT_LIST_HEAD(&idev->pending[1]); | ||
948 | INIT_LIST_HEAD(&idev->pending[2]); | ||
949 | INIT_LIST_HEAD(&idev->piowait); | ||
950 | INIT_LIST_HEAD(&idev->rnrwait); | ||
951 | idev->pending_index = 0; | ||
952 | idev->port_cap_flags = | ||
953 | IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP; | ||
954 | idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; | ||
955 | idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; | ||
956 | idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; | ||
957 | idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; | ||
958 | idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT; | ||
959 | idev->link_width_enabled = 3; /* 1x or 4x */ | ||
960 | |||
961 | /* | ||
962 | * The system image GUID is supposed to be the same for all | ||
963 | * IB HCAs in a single system but since there can be other | ||
964 | * device types in the system, we can't be sure this is unique. | ||
965 | */ | ||
966 | if (!sys_image_guid) | ||
967 | sys_image_guid = ipath_layer_get_guid(dd); | ||
968 | idev->sys_image_guid = sys_image_guid; | ||
969 | idev->ib_unit = unit; | ||
970 | idev->dd = dd; | ||
971 | |||
972 | strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); | ||
973 | dev->node_guid = ipath_layer_get_guid(dd); | ||
974 | dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; | ||
975 | dev->uverbs_cmd_mask = | ||
976 | (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | | ||
977 | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | | ||
978 | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | | ||
979 | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | | ||
980 | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | | ||
981 | (1ull << IB_USER_VERBS_CMD_CREATE_AH) | | ||
982 | (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | | ||
983 | (1ull << IB_USER_VERBS_CMD_QUERY_AH) | | ||
984 | (1ull << IB_USER_VERBS_CMD_REG_MR) | | ||
985 | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | | ||
986 | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | | ||
987 | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | | ||
988 | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | | ||
989 | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | | ||
990 | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | | ||
991 | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | | ||
992 | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | | ||
993 | (1ull << IB_USER_VERBS_CMD_QUERY_QP) | | ||
994 | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | | ||
995 | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | | ||
996 | (1ull << IB_USER_VERBS_CMD_POST_SEND) | | ||
997 | (1ull << IB_USER_VERBS_CMD_POST_RECV) | | ||
998 | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | | ||
999 | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | | ||
1000 | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | | ||
1001 | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | | ||
1002 | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | | ||
1003 | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | | ||
1004 | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); | ||
1005 | dev->node_type = IB_NODE_CA; | ||
1006 | dev->phys_port_cnt = 1; | ||
1007 | dev->dma_device = ipath_layer_get_device(dd); | ||
1008 | dev->class_dev.dev = dev->dma_device; | ||
1009 | dev->query_device = ipath_query_device; | ||
1010 | dev->modify_device = ipath_modify_device; | ||
1011 | dev->query_port = ipath_query_port; | ||
1012 | dev->modify_port = ipath_modify_port; | ||
1013 | dev->query_pkey = ipath_query_pkey; | ||
1014 | dev->query_gid = ipath_query_gid; | ||
1015 | dev->alloc_ucontext = ipath_alloc_ucontext; | ||
1016 | dev->dealloc_ucontext = ipath_dealloc_ucontext; | ||
1017 | dev->alloc_pd = ipath_alloc_pd; | ||
1018 | dev->dealloc_pd = ipath_dealloc_pd; | ||
1019 | dev->create_ah = ipath_create_ah; | ||
1020 | dev->destroy_ah = ipath_destroy_ah; | ||
1021 | dev->query_ah = ipath_query_ah; | ||
1022 | dev->create_srq = ipath_create_srq; | ||
1023 | dev->modify_srq = ipath_modify_srq; | ||
1024 | dev->query_srq = ipath_query_srq; | ||
1025 | dev->destroy_srq = ipath_destroy_srq; | ||
1026 | dev->create_qp = ipath_create_qp; | ||
1027 | dev->modify_qp = ipath_modify_qp; | ||
1028 | dev->query_qp = ipath_query_qp; | ||
1029 | dev->destroy_qp = ipath_destroy_qp; | ||
1030 | dev->post_send = ipath_post_send; | ||
1031 | dev->post_recv = ipath_post_receive; | ||
1032 | dev->post_srq_recv = ipath_post_srq_receive; | ||
1033 | dev->create_cq = ipath_create_cq; | ||
1034 | dev->destroy_cq = ipath_destroy_cq; | ||
1035 | dev->resize_cq = ipath_resize_cq; | ||
1036 | dev->poll_cq = ipath_poll_cq; | ||
1037 | dev->req_notify_cq = ipath_req_notify_cq; | ||
1038 | dev->get_dma_mr = ipath_get_dma_mr; | ||
1039 | dev->reg_phys_mr = ipath_reg_phys_mr; | ||
1040 | dev->reg_user_mr = ipath_reg_user_mr; | ||
1041 | dev->dereg_mr = ipath_dereg_mr; | ||
1042 | dev->alloc_fmr = ipath_alloc_fmr; | ||
1043 | dev->map_phys_fmr = ipath_map_phys_fmr; | ||
1044 | dev->unmap_fmr = ipath_unmap_fmr; | ||
1045 | dev->dealloc_fmr = ipath_dealloc_fmr; | ||
1046 | dev->attach_mcast = ipath_multicast_attach; | ||
1047 | dev->detach_mcast = ipath_multicast_detach; | ||
1048 | dev->process_mad = ipath_process_mad; | ||
1049 | |||
1050 | snprintf(dev->node_desc, sizeof(dev->node_desc), | ||
1051 | IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename); | ||
1052 | |||
1053 | ret = ib_register_device(dev); | ||
1054 | if (ret) | ||
1055 | goto err_reg; | ||
1056 | |||
1057 | if (ipath_verbs_register_sysfs(dev)) | ||
1058 | goto err_class; | ||
1059 | |||
1060 | ipath_layer_enable_timer(dd); | ||
1061 | |||
1062 | goto bail; | ||
1063 | |||
1064 | err_class: | ||
1065 | ib_unregister_device(dev); | ||
1066 | err_reg: | ||
1067 | kfree(idev->lk_table.table); | ||
1068 | err_lk: | ||
1069 | kfree(idev->qp_table.table); | ||
1070 | err_qp: | ||
1071 | ib_dealloc_device(dev); | ||
1072 | _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n", | ||
1073 | unit, -ret); | ||
1074 | idev = NULL; | ||
1075 | |||
1076 | bail: | ||
1077 | return idev; | ||
1078 | } | ||
1079 | |||
1080 | static void ipath_unregister_ib_device(void *arg) | ||
1081 | { | ||
1082 | struct ipath_ibdev *dev = (struct ipath_ibdev *) arg; | ||
1083 | struct ib_device *ibdev = &dev->ibdev; | ||
1084 | |||
1085 | ipath_layer_disable_timer(dev->dd); | ||
1086 | |||
1087 | ib_unregister_device(ibdev); | ||
1088 | |||
1089 | if (!list_empty(&dev->pending[0]) || | ||
1090 | !list_empty(&dev->pending[1]) || | ||
1091 | !list_empty(&dev->pending[2])) | ||
1092 | _VERBS_ERROR("ipath%d pending list not empty!\n", | ||
1093 | dev->ib_unit); | ||
1094 | if (!list_empty(&dev->piowait)) | ||
1095 | _VERBS_ERROR("ipath%d piowait list not empty!\n", | ||
1096 | dev->ib_unit); | ||
1097 | if (!list_empty(&dev->rnrwait)) | ||
1098 | _VERBS_ERROR("ipath%d rnrwait list not empty!\n", | ||
1099 | dev->ib_unit); | ||
1100 | if (!ipath_mcast_tree_empty()) | ||
1101 | _VERBS_ERROR("ipath%d multicast table memory leak!\n", | ||
1102 | dev->ib_unit); | ||
1103 | /* | ||
1104 | * Note that ipath_unregister_ib_device() can be called before all | ||
1105 | * the QPs are destroyed! | ||
1106 | */ | ||
1107 | ipath_free_all_qps(&dev->qp_table); | ||
1108 | kfree(dev->qp_table.table); | ||
1109 | kfree(dev->lk_table.table); | ||
1110 | ib_dealloc_device(ibdev); | ||
1111 | } | ||
1112 | |||
1113 | static int __init ipath_verbs_init(void) | ||
1114 | { | ||
1115 | return ipath_verbs_register(ipath_register_ib_device, | ||
1116 | ipath_unregister_ib_device, | ||
1117 | ipath_ib_piobufavail, ipath_ib_rcv, | ||
1118 | ipath_ib_timer); | ||
1119 | } | ||
1120 | |||
1121 | static void __exit ipath_verbs_cleanup(void) | ||
1122 | { | ||
1123 | ipath_verbs_unregister(); | ||
1124 | } | ||
1125 | |||
1126 | static ssize_t show_rev(struct class_device *cdev, char *buf) | ||
1127 | { | ||
1128 | struct ipath_ibdev *dev = | ||
1129 | container_of(cdev, struct ipath_ibdev, ibdev.class_dev); | ||
1130 | int vendor, boardrev, majrev, minrev; | ||
1131 | |||
1132 | ipath_layer_query_device(dev->dd, &vendor, &boardrev, | ||
1133 | &majrev, &minrev); | ||
1134 | return sprintf(buf, "%d.%d\n", majrev, minrev); | ||
1135 | } | ||
1136 | |||
1137 | static ssize_t show_hca(struct class_device *cdev, char *buf) | ||
1138 | { | ||
1139 | struct ipath_ibdev *dev = | ||
1140 | container_of(cdev, struct ipath_ibdev, ibdev.class_dev); | ||
1141 | int ret; | ||
1142 | |||
1143 | ret = ipath_layer_get_boardname(dev->dd, buf, 128); | ||
1144 | if (ret < 0) | ||
1145 | goto bail; | ||
1146 | strcat(buf, "\n"); | ||
1147 | ret = strlen(buf); | ||
1148 | |||
1149 | bail: | ||
1150 | return ret; | ||
1151 | } | ||
1152 | |||
1153 | static ssize_t show_stats(struct class_device *cdev, char *buf) | ||
1154 | { | ||
1155 | struct ipath_ibdev *dev = | ||
1156 | container_of(cdev, struct ipath_ibdev, ibdev.class_dev); | ||
1157 | int i; | ||
1158 | int len; | ||
1159 | |||
1160 | len = sprintf(buf, | ||
1161 | "RC resends %d\n" | ||
1162 | "RC QACKs %d\n" | ||
1163 | "RC ACKs %d\n" | ||
1164 | "RC SEQ NAKs %d\n" | ||
1165 | "RC RDMA seq %d\n" | ||
1166 | "RC RNR NAKs %d\n" | ||
1167 | "RC OTH NAKs %d\n" | ||
1168 | "RC timeouts %d\n" | ||
1169 | "RC RDMA dup %d\n" | ||
1170 | "piobuf wait %d\n" | ||
1171 | "no piobuf %d\n" | ||
1172 | "PKT drops %d\n" | ||
1173 | "WQE errs %d\n", | ||
1174 | dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, | ||
1175 | dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, | ||
1176 | dev->n_other_naks, dev->n_timeouts, | ||
1177 | dev->n_rdma_dup_busy, dev->n_piowait, | ||
1178 | dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs); | ||
1179 | for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { | ||
1180 | const struct ipath_opcode_stats *si = &dev->opstats[i]; | ||
1181 | |||
1182 | if (!si->n_packets && !si->n_bytes) | ||
1183 | continue; | ||
1184 | len += sprintf(buf + len, "%02x %llu/%llu\n", i, | ||
1185 | (unsigned long long) si->n_packets, | ||
1186 | (unsigned long long) si->n_bytes); | ||
1187 | } | ||
1188 | return len; | ||
1189 | } | ||
1190 | |||
1191 | static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); | ||
1192 | static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); | ||
1193 | static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); | ||
1194 | static CLASS_DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL); | ||
1195 | |||
1196 | static struct class_device_attribute *ipath_class_attributes[] = { | ||
1197 | &class_device_attr_hw_rev, | ||
1198 | &class_device_attr_hca_type, | ||
1199 | &class_device_attr_board_id, | ||
1200 | &class_device_attr_stats | ||
1201 | }; | ||
1202 | |||
1203 | static int ipath_verbs_register_sysfs(struct ib_device *dev) | ||
1204 | { | ||
1205 | int i; | ||
1206 | int ret; | ||
1207 | |||
1208 | for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) | ||
1209 | if (class_device_create_file(&dev->class_dev, | ||
1210 | ipath_class_attributes[i])) { | ||
1211 | ret = 1; | ||
1212 | goto bail; | ||
1213 | } | ||
1214 | |||
1215 | ret = 0; | ||
1216 | |||
1217 | bail: | ||
1218 | return ret; | ||
1219 | } | ||
1220 | |||
1221 | module_init(ipath_verbs_init); | ||
1222 | module_exit(ipath_verbs_cleanup); | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h new file mode 100644 index 000000000000..fcafbc7c9e71 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h | |||
@@ -0,0 +1,692 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef IPATH_VERBS_H | ||
34 | #define IPATH_VERBS_H | ||
35 | |||
36 | #include <linux/types.h> | ||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/kernel.h> | ||
39 | #include <linux/interrupt.h> | ||
40 | #include <rdma/ib_pack.h> | ||
41 | |||
42 | #include "ipath_layer.h" | ||
43 | #include "verbs_debug.h" | ||
44 | |||
45 | #define QPN_MAX (1 << 24) | ||
46 | #define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) | ||
47 | |||
48 | /* | ||
49 | * Increment this value if any changes that break userspace ABI | ||
50 | * compatibility are made. | ||
51 | */ | ||
52 | #define IPATH_UVERBS_ABI_VERSION 1 | ||
53 | |||
54 | /* | ||
55 | * Define an ib_cq_notify value that is not valid so we know when CQ | ||
56 | * notifications are armed. | ||
57 | */ | ||
58 | #define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1) | ||
59 | |||
60 | #define IB_RNR_NAK 0x20 | ||
61 | #define IB_NAK_PSN_ERROR 0x60 | ||
62 | #define IB_NAK_INVALID_REQUEST 0x61 | ||
63 | #define IB_NAK_REMOTE_ACCESS_ERROR 0x62 | ||
64 | #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 | ||
65 | #define IB_NAK_INVALID_RD_REQUEST 0x64 | ||
66 | |||
67 | #define IPATH_POST_SEND_OK 0x01 | ||
68 | #define IPATH_POST_RECV_OK 0x02 | ||
69 | #define IPATH_PROCESS_RECV_OK 0x04 | ||
70 | #define IPATH_PROCESS_SEND_OK 0x08 | ||
71 | |||
72 | /* IB Performance Manager status values */ | ||
73 | #define IB_PMA_SAMPLE_STATUS_DONE 0x00 | ||
74 | #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 | ||
75 | #define IB_PMA_SAMPLE_STATUS_RUNNING 0x02 | ||
76 | |||
77 | /* Mandatory IB performance counter select values. */ | ||
78 | #define IB_PMA_PORT_XMIT_DATA __constant_htons(0x0001) | ||
79 | #define IB_PMA_PORT_RCV_DATA __constant_htons(0x0002) | ||
80 | #define IB_PMA_PORT_XMIT_PKTS __constant_htons(0x0003) | ||
81 | #define IB_PMA_PORT_RCV_PKTS __constant_htons(0x0004) | ||
82 | #define IB_PMA_PORT_XMIT_WAIT __constant_htons(0x0005) | ||
83 | |||
84 | struct ib_reth { | ||
85 | __be64 vaddr; | ||
86 | __be32 rkey; | ||
87 | __be32 length; | ||
88 | } __attribute__ ((packed)); | ||
89 | |||
90 | struct ib_atomic_eth { | ||
91 | __be64 vaddr; | ||
92 | __be32 rkey; | ||
93 | __be64 swap_data; | ||
94 | __be64 compare_data; | ||
95 | } __attribute__ ((packed)); | ||
96 | |||
97 | struct ipath_other_headers { | ||
98 | __be32 bth[3]; | ||
99 | union { | ||
100 | struct { | ||
101 | __be32 deth[2]; | ||
102 | __be32 imm_data; | ||
103 | } ud; | ||
104 | struct { | ||
105 | struct ib_reth reth; | ||
106 | __be32 imm_data; | ||
107 | } rc; | ||
108 | struct { | ||
109 | __be32 aeth; | ||
110 | __be64 atomic_ack_eth; | ||
111 | } at; | ||
112 | __be32 imm_data; | ||
113 | __be32 aeth; | ||
114 | struct ib_atomic_eth atomic_eth; | ||
115 | } u; | ||
116 | } __attribute__ ((packed)); | ||
117 | |||
118 | /* | ||
119 | * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes | ||
120 | * long (72 w/ imm_data). Only the first 56 bytes of the IB header | ||
121 | * will be in the eager header buffer. The remaining 12 or 16 bytes | ||
122 | * are in the data buffer. | ||
123 | */ | ||
124 | struct ipath_ib_header { | ||
125 | __be16 lrh[4]; | ||
126 | union { | ||
127 | struct { | ||
128 | struct ib_grh grh; | ||
129 | struct ipath_other_headers oth; | ||
130 | } l; | ||
131 | struct ipath_other_headers oth; | ||
132 | } u; | ||
133 | } __attribute__ ((packed)); | ||
134 | |||
135 | /* | ||
136 | * There is one struct ipath_mcast for each multicast GID. | ||
137 | * All attached QPs are then stored as a list of | ||
138 | * struct ipath_mcast_qp. | ||
139 | */ | ||
140 | struct ipath_mcast_qp { | ||
141 | struct list_head list; | ||
142 | struct ipath_qp *qp; | ||
143 | }; | ||
144 | |||
145 | struct ipath_mcast { | ||
146 | struct rb_node rb_node; | ||
147 | union ib_gid mgid; | ||
148 | struct list_head qp_list; | ||
149 | wait_queue_head_t wait; | ||
150 | atomic_t refcount; | ||
151 | }; | ||
152 | |||
153 | /* Memory region */ | ||
154 | struct ipath_mr { | ||
155 | struct ib_mr ibmr; | ||
156 | struct ipath_mregion mr; /* must be last */ | ||
157 | }; | ||
158 | |||
159 | /* Fast memory region */ | ||
160 | struct ipath_fmr { | ||
161 | struct ib_fmr ibfmr; | ||
162 | u8 page_shift; | ||
163 | struct ipath_mregion mr; /* must be last */ | ||
164 | }; | ||
165 | |||
166 | /* Protection domain */ | ||
167 | struct ipath_pd { | ||
168 | struct ib_pd ibpd; | ||
169 | int user; /* non-zero if created from user space */ | ||
170 | }; | ||
171 | |||
172 | /* Address Handle */ | ||
173 | struct ipath_ah { | ||
174 | struct ib_ah ibah; | ||
175 | struct ib_ah_attr attr; | ||
176 | }; | ||
177 | |||
178 | /* | ||
179 | * Quick description of our CQ/QP locking scheme: | ||
180 | * | ||
181 | * We have one global lock that protects dev->cq/qp_table. Each | ||
182 | * struct ipath_cq/qp also has its own lock. An individual qp lock | ||
183 | * may be taken inside of an individual cq lock. Both cqs attached to | ||
184 | * a qp may be locked, with the send cq locked first. No other | ||
185 | * nesting should be done. | ||
186 | * | ||
187 | * Each struct ipath_cq/qp also has an atomic_t ref count. The | ||
188 | * pointer from the cq/qp_table to the struct counts as one reference. | ||
189 | * This reference also is good for access through the consumer API, so | ||
190 | * modifying the CQ/QP etc doesn't need to take another reference. | ||
191 | * Access because of a completion being polled does need a reference. | ||
192 | * | ||
193 | * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the | ||
194 | * destroy function to sleep on. | ||
195 | * | ||
196 | * This means that access from the consumer API requires nothing but | ||
197 | * taking the struct's lock. | ||
198 | * | ||
199 | * Access because of a completion event should go as follows: | ||
200 | * - lock cq/qp_table and look up struct | ||
201 | * - increment ref count in struct | ||
202 | * - drop cq/qp_table lock | ||
203 | * - lock struct, do your thing, and unlock struct | ||
204 | * - decrement ref count; if zero, wake up waiters | ||
205 | * | ||
206 | * To destroy a CQ/QP, we can do the following: | ||
207 | * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock | ||
208 | * - decrement ref count | ||
209 | * - wait_event until ref count is zero | ||
210 | * | ||
211 | * It is the consumer's responsibilty to make sure that no QP | ||
212 | * operations (WQE posting or state modification) are pending when the | ||
213 | * QP is destroyed. Also, the consumer must make sure that calls to | ||
214 | * qp_modify are serialized. | ||
215 | * | ||
216 | * Possible optimizations (wait for profile data to see if/where we | ||
217 | * have locks bouncing between CPUs): | ||
218 | * - split cq/qp table lock into n separate (cache-aligned) locks, | ||
219 | * indexed (say) by the page in the table | ||
220 | */ | ||
221 | |||
222 | struct ipath_cq { | ||
223 | struct ib_cq ibcq; | ||
224 | struct tasklet_struct comptask; | ||
225 | spinlock_t lock; | ||
226 | u8 notify; | ||
227 | u8 triggered; | ||
228 | u32 head; /* new records added to the head */ | ||
229 | u32 tail; /* poll_cq() reads from here. */ | ||
230 | struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */ | ||
231 | }; | ||
232 | |||
233 | /* | ||
234 | * Send work request queue entry. | ||
235 | * The size of the sg_list is determined when the QP is created and stored | ||
236 | * in qp->s_max_sge. | ||
237 | */ | ||
238 | struct ipath_swqe { | ||
239 | struct ib_send_wr wr; /* don't use wr.sg_list */ | ||
240 | u32 psn; /* first packet sequence number */ | ||
241 | u32 lpsn; /* last packet sequence number */ | ||
242 | u32 ssn; /* send sequence number */ | ||
243 | u32 length; /* total length of data in sg_list */ | ||
244 | struct ipath_sge sg_list[0]; | ||
245 | }; | ||
246 | |||
247 | /* | ||
248 | * Receive work request queue entry. | ||
249 | * The size of the sg_list is determined when the QP is created and stored | ||
250 | * in qp->r_max_sge. | ||
251 | */ | ||
252 | struct ipath_rwqe { | ||
253 | u64 wr_id; | ||
254 | u32 length; /* total length of data in sg_list */ | ||
255 | u8 num_sge; | ||
256 | struct ipath_sge sg_list[0]; | ||
257 | }; | ||
258 | |||
259 | struct ipath_rq { | ||
260 | spinlock_t lock; | ||
261 | u32 head; /* new work requests posted to the head */ | ||
262 | u32 tail; /* receives pull requests from here. */ | ||
263 | u32 size; /* size of RWQE array */ | ||
264 | u8 max_sge; | ||
265 | struct ipath_rwqe *wq; /* RWQE array */ | ||
266 | }; | ||
267 | |||
268 | struct ipath_srq { | ||
269 | struct ib_srq ibsrq; | ||
270 | struct ipath_rq rq; | ||
271 | /* send signal when number of RWQEs < limit */ | ||
272 | u32 limit; | ||
273 | }; | ||
274 | |||
275 | /* | ||
276 | * Variables prefixed with s_ are for the requester (sender). | ||
277 | * Variables prefixed with r_ are for the responder (receiver). | ||
278 | * Variables prefixed with ack_ are for responder replies. | ||
279 | * | ||
280 | * Common variables are protected by both r_rq.lock and s_lock in that order | ||
281 | * which only happens in modify_qp() or changing the QP 'state'. | ||
282 | */ | ||
283 | struct ipath_qp { | ||
284 | struct ib_qp ibqp; | ||
285 | struct ipath_qp *next; /* link list for QPN hash table */ | ||
286 | struct list_head piowait; /* link for wait PIO buf */ | ||
287 | struct list_head timerwait; /* link for waiting for timeouts */ | ||
288 | struct ib_ah_attr remote_ah_attr; | ||
289 | struct ipath_ib_header s_hdr; /* next packet header to send */ | ||
290 | atomic_t refcount; | ||
291 | wait_queue_head_t wait; | ||
292 | struct tasklet_struct s_task; | ||
293 | struct ipath_sge_state *s_cur_sge; | ||
294 | struct ipath_sge_state s_sge; /* current send request data */ | ||
295 | /* current RDMA read send data */ | ||
296 | struct ipath_sge_state s_rdma_sge; | ||
297 | struct ipath_sge_state r_sge; /* current receive data */ | ||
298 | spinlock_t s_lock; | ||
299 | unsigned long s_flags; | ||
300 | u32 s_hdrwords; /* size of s_hdr in 32 bit words */ | ||
301 | u32 s_cur_size; /* size of send packet in bytes */ | ||
302 | u32 s_len; /* total length of s_sge */ | ||
303 | u32 s_rdma_len; /* total length of s_rdma_sge */ | ||
304 | u32 s_next_psn; /* PSN for next request */ | ||
305 | u32 s_last_psn; /* last response PSN processed */ | ||
306 | u32 s_psn; /* current packet sequence number */ | ||
307 | u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ | ||
308 | u32 s_ack_psn; /* PSN for next ACK or RDMA_READ */ | ||
309 | u64 s_ack_atomic; /* data for atomic ACK */ | ||
310 | u64 r_wr_id; /* ID for current receive WQE */ | ||
311 | u64 r_atomic_data; /* data for last atomic op */ | ||
312 | u32 r_atomic_psn; /* PSN of last atomic op */ | ||
313 | u32 r_len; /* total length of r_sge */ | ||
314 | u32 r_rcv_len; /* receive data len processed */ | ||
315 | u32 r_psn; /* expected rcv packet sequence number */ | ||
316 | u8 state; /* QP state */ | ||
317 | u8 s_state; /* opcode of last packet sent */ | ||
318 | u8 s_ack_state; /* opcode of packet to ACK */ | ||
319 | u8 s_nak_state; /* non-zero if NAK is pending */ | ||
320 | u8 r_state; /* opcode of last packet received */ | ||
321 | u8 r_reuse_sge; /* for UC receive errors */ | ||
322 | u8 r_sge_inx; /* current index into sg_list */ | ||
323 | u8 s_max_sge; /* size of s_wq->sg_list */ | ||
324 | u8 qp_access_flags; | ||
325 | u8 s_retry_cnt; /* number of times to retry */ | ||
326 | u8 s_rnr_retry_cnt; | ||
327 | u8 s_min_rnr_timer; | ||
328 | u8 s_retry; /* requester retry counter */ | ||
329 | u8 s_rnr_retry; /* requester RNR retry counter */ | ||
330 | u8 s_pkey_index; /* PKEY index to use */ | ||
331 | enum ib_mtu path_mtu; | ||
332 | atomic_t msn; /* message sequence number */ | ||
333 | u32 remote_qpn; | ||
334 | u32 qkey; /* QKEY for this QP (for UD or RD) */ | ||
335 | u32 s_size; /* send work queue size */ | ||
336 | u32 s_head; /* new entries added here */ | ||
337 | u32 s_tail; /* next entry to process */ | ||
338 | u32 s_cur; /* current work queue entry */ | ||
339 | u32 s_last; /* last un-ACK'ed entry */ | ||
340 | u32 s_ssn; /* SSN of tail entry */ | ||
341 | u32 s_lsn; /* limit sequence number (credit) */ | ||
342 | struct ipath_swqe *s_wq; /* send work queue */ | ||
343 | struct ipath_rq r_rq; /* receive work queue */ | ||
344 | }; | ||
345 | |||
346 | /* | ||
347 | * Bit definitions for s_flags. | ||
348 | */ | ||
349 | #define IPATH_S_BUSY 0 | ||
350 | #define IPATH_S_SIGNAL_REQ_WR 1 | ||
351 | |||
352 | /* | ||
353 | * Since struct ipath_swqe is not a fixed size, we can't simply index into | ||
354 | * struct ipath_qp.s_wq. This function does the array index computation. | ||
355 | */ | ||
356 | static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp, | ||
357 | unsigned n) | ||
358 | { | ||
359 | return (struct ipath_swqe *)((char *)qp->s_wq + | ||
360 | (sizeof(struct ipath_swqe) + | ||
361 | qp->s_max_sge * | ||
362 | sizeof(struct ipath_sge)) * n); | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * Since struct ipath_rwqe is not a fixed size, we can't simply index into | ||
367 | * struct ipath_rq.wq. This function does the array index computation. | ||
368 | */ | ||
369 | static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, | ||
370 | unsigned n) | ||
371 | { | ||
372 | return (struct ipath_rwqe *) | ||
373 | ((char *) rq->wq + | ||
374 | (sizeof(struct ipath_rwqe) + | ||
375 | rq->max_sge * sizeof(struct ipath_sge)) * n); | ||
376 | } | ||
377 | |||
378 | /* | ||
379 | * QPN-map pages start out as NULL, they get allocated upon | ||
380 | * first use and are never deallocated. This way, | ||
381 | * large bitmaps are not allocated unless large numbers of QPs are used. | ||
382 | */ | ||
383 | struct qpn_map { | ||
384 | atomic_t n_free; | ||
385 | void *page; | ||
386 | }; | ||
387 | |||
388 | struct ipath_qp_table { | ||
389 | spinlock_t lock; | ||
390 | u32 last; /* last QP number allocated */ | ||
391 | u32 max; /* size of the hash table */ | ||
392 | u32 nmaps; /* size of the map table */ | ||
393 | struct ipath_qp **table; | ||
394 | /* bit map of free numbers */ | ||
395 | struct qpn_map map[QPNMAP_ENTRIES]; | ||
396 | }; | ||
397 | |||
398 | struct ipath_lkey_table { | ||
399 | spinlock_t lock; | ||
400 | u32 next; /* next unused index (speeds search) */ | ||
401 | u32 gen; /* generation count */ | ||
402 | u32 max; /* size of the table */ | ||
403 | struct ipath_mregion **table; | ||
404 | }; | ||
405 | |||
406 | struct ipath_opcode_stats { | ||
407 | u64 n_packets; /* number of packets */ | ||
408 | u64 n_bytes; /* total number of bytes */ | ||
409 | }; | ||
410 | |||
411 | struct ipath_ibdev { | ||
412 | struct ib_device ibdev; | ||
413 | struct list_head dev_list; | ||
414 | struct ipath_devdata *dd; | ||
415 | int ib_unit; /* This is the device number */ | ||
416 | u16 sm_lid; /* in host order */ | ||
417 | u8 sm_sl; | ||
418 | u8 mkeyprot_resv_lmc; | ||
419 | /* non-zero when timer is set */ | ||
420 | unsigned long mkey_lease_timeout; | ||
421 | |||
422 | /* The following fields are really per port. */ | ||
423 | struct ipath_qp_table qp_table; | ||
424 | struct ipath_lkey_table lk_table; | ||
425 | struct list_head pending[3]; /* FIFO of QPs waiting for ACKs */ | ||
426 | struct list_head piowait; /* list for wait PIO buf */ | ||
427 | /* list of QPs waiting for RNR timer */ | ||
428 | struct list_head rnrwait; | ||
429 | spinlock_t pending_lock; | ||
430 | __be64 sys_image_guid; /* in network order */ | ||
431 | __be64 gid_prefix; /* in network order */ | ||
432 | __be64 mkey; | ||
433 | u64 ipath_sword; /* total dwords sent (sample result) */ | ||
434 | u64 ipath_rword; /* total dwords received (sample result) */ | ||
435 | u64 ipath_spkts; /* total packets sent (sample result) */ | ||
436 | u64 ipath_rpkts; /* total packets received (sample result) */ | ||
437 | /* # of ticks no data sent (sample result) */ | ||
438 | u64 ipath_xmit_wait; | ||
439 | u64 rcv_errors; /* # of packets with SW detected rcv errs */ | ||
440 | u64 n_unicast_xmit; /* total unicast packets sent */ | ||
441 | u64 n_unicast_rcv; /* total unicast packets received */ | ||
442 | u64 n_multicast_xmit; /* total multicast packets sent */ | ||
443 | u64 n_multicast_rcv; /* total multicast packets received */ | ||
444 | u64 n_symbol_error_counter; /* starting count for PMA */ | ||
445 | u64 n_link_error_recovery_counter; /* starting count for PMA */ | ||
446 | u64 n_link_downed_counter; /* starting count for PMA */ | ||
447 | u64 n_port_rcv_errors; /* starting count for PMA */ | ||
448 | u64 n_port_rcv_remphys_errors; /* starting count for PMA */ | ||
449 | u64 n_port_xmit_discards; /* starting count for PMA */ | ||
450 | u64 n_port_xmit_data; /* starting count for PMA */ | ||
451 | u64 n_port_rcv_data; /* starting count for PMA */ | ||
452 | u64 n_port_xmit_packets; /* starting count for PMA */ | ||
453 | u64 n_port_rcv_packets; /* starting count for PMA */ | ||
454 | u32 n_pkey_violations; /* starting count for PMA */ | ||
455 | u32 n_rc_resends; | ||
456 | u32 n_rc_acks; | ||
457 | u32 n_rc_qacks; | ||
458 | u32 n_seq_naks; | ||
459 | u32 n_rdma_seq; | ||
460 | u32 n_rnr_naks; | ||
461 | u32 n_other_naks; | ||
462 | u32 n_timeouts; | ||
463 | u32 n_pkt_drops; | ||
464 | u32 n_wqe_errs; | ||
465 | u32 n_rdma_dup_busy; | ||
466 | u32 n_piowait; | ||
467 | u32 n_no_piobuf; | ||
468 | u32 port_cap_flags; | ||
469 | u32 pma_sample_start; | ||
470 | u32 pma_sample_interval; | ||
471 | __be16 pma_counter_select[5]; | ||
472 | u16 pma_tag; | ||
473 | u16 qkey_violations; | ||
474 | u16 mkey_violations; | ||
475 | u16 mkey_lease_period; | ||
476 | u16 pending_index; /* which pending queue is active */ | ||
477 | u8 pma_sample_status; | ||
478 | u8 subnet_timeout; | ||
479 | u8 link_width_enabled; | ||
480 | u8 vl_high_limit; | ||
481 | struct ipath_opcode_stats opstats[128]; | ||
482 | }; | ||
483 | |||
484 | struct ipath_ucontext { | ||
485 | struct ib_ucontext ibucontext; | ||
486 | }; | ||
487 | |||
488 | static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) | ||
489 | { | ||
490 | return container_of(ibmr, struct ipath_mr, ibmr); | ||
491 | } | ||
492 | |||
493 | static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr) | ||
494 | { | ||
495 | return container_of(ibfmr, struct ipath_fmr, ibfmr); | ||
496 | } | ||
497 | |||
498 | static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd) | ||
499 | { | ||
500 | return container_of(ibpd, struct ipath_pd, ibpd); | ||
501 | } | ||
502 | |||
503 | static inline struct ipath_ah *to_iah(struct ib_ah *ibah) | ||
504 | { | ||
505 | return container_of(ibah, struct ipath_ah, ibah); | ||
506 | } | ||
507 | |||
508 | static inline struct ipath_cq *to_icq(struct ib_cq *ibcq) | ||
509 | { | ||
510 | return container_of(ibcq, struct ipath_cq, ibcq); | ||
511 | } | ||
512 | |||
513 | static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq) | ||
514 | { | ||
515 | return container_of(ibsrq, struct ipath_srq, ibsrq); | ||
516 | } | ||
517 | |||
518 | static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp) | ||
519 | { | ||
520 | return container_of(ibqp, struct ipath_qp, ibqp); | ||
521 | } | ||
522 | |||
523 | static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev) | ||
524 | { | ||
525 | return container_of(ibdev, struct ipath_ibdev, ibdev); | ||
526 | } | ||
527 | |||
528 | int ipath_process_mad(struct ib_device *ibdev, | ||
529 | int mad_flags, | ||
530 | u8 port_num, | ||
531 | struct ib_wc *in_wc, | ||
532 | struct ib_grh *in_grh, | ||
533 | struct ib_mad *in_mad, struct ib_mad *out_mad); | ||
534 | |||
535 | static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext | ||
536 | *ibucontext) | ||
537 | { | ||
538 | return container_of(ibucontext, struct ipath_ucontext, ibucontext); | ||
539 | } | ||
540 | |||
541 | /* | ||
542 | * Compare the lower 24 bits of the two values. | ||
543 | * Returns an integer <, ==, or > than zero. | ||
544 | */ | ||
545 | static inline int ipath_cmp24(u32 a, u32 b) | ||
546 | { | ||
547 | return (((int) a) - ((int) b)) << 8; | ||
548 | } | ||
549 | |||
550 | struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid); | ||
551 | |||
552 | int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); | ||
553 | |||
554 | int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); | ||
555 | |||
556 | int ipath_mcast_tree_empty(void); | ||
557 | |||
558 | __be32 ipath_compute_aeth(struct ipath_qp *qp); | ||
559 | |||
560 | struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn); | ||
561 | |||
562 | struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, | ||
563 | struct ib_qp_init_attr *init_attr, | ||
564 | struct ib_udata *udata); | ||
565 | |||
566 | int ipath_destroy_qp(struct ib_qp *ibqp); | ||
567 | |||
568 | int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, | ||
569 | int attr_mask); | ||
570 | |||
571 | int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, | ||
572 | int attr_mask, struct ib_qp_init_attr *init_attr); | ||
573 | |||
574 | void ipath_free_all_qps(struct ipath_qp_table *qpt); | ||
575 | |||
576 | int ipath_init_qp_table(struct ipath_ibdev *idev, int size); | ||
577 | |||
578 | void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc); | ||
579 | |||
580 | void ipath_get_credit(struct ipath_qp *qp, u32 aeth); | ||
581 | |||
582 | void ipath_do_rc_send(unsigned long data); | ||
583 | |||
584 | void ipath_do_uc_send(unsigned long data); | ||
585 | |||
586 | void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); | ||
587 | |||
588 | int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, | ||
589 | u32 len, u64 vaddr, u32 rkey, int acc); | ||
590 | |||
591 | int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, | ||
592 | struct ib_sge *sge, int acc); | ||
593 | |||
594 | void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length); | ||
595 | |||
596 | void ipath_skip_sge(struct ipath_sge_state *ss, u32 length); | ||
597 | |||
598 | int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr); | ||
599 | |||
600 | void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | ||
601 | int has_grh, void *data, u32 tlen, struct ipath_qp *qp); | ||
602 | |||
603 | void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | ||
604 | int has_grh, void *data, u32 tlen, struct ipath_qp *qp); | ||
605 | |||
606 | void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc); | ||
607 | |||
608 | int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); | ||
609 | |||
610 | void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, | ||
611 | int has_grh, void *data, u32 tlen, struct ipath_qp *qp); | ||
612 | |||
613 | int ipath_alloc_lkey(struct ipath_lkey_table *rkt, | ||
614 | struct ipath_mregion *mr); | ||
615 | |||
616 | void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey); | ||
617 | |||
618 | int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge, | ||
619 | struct ib_sge *sge, int acc); | ||
620 | |||
621 | int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, | ||
622 | u32 len, u64 vaddr, u32 rkey, int acc); | ||
623 | |||
624 | int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, | ||
625 | struct ib_recv_wr **bad_wr); | ||
626 | |||
627 | struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, | ||
628 | struct ib_srq_init_attr *srq_init_attr, | ||
629 | struct ib_udata *udata); | ||
630 | |||
631 | int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, | ||
632 | enum ib_srq_attr_mask attr_mask); | ||
633 | |||
634 | int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); | ||
635 | |||
636 | int ipath_destroy_srq(struct ib_srq *ibsrq); | ||
637 | |||
638 | void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); | ||
639 | |||
640 | int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); | ||
641 | |||
642 | struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, | ||
643 | struct ib_ucontext *context, | ||
644 | struct ib_udata *udata); | ||
645 | |||
646 | int ipath_destroy_cq(struct ib_cq *ibcq); | ||
647 | |||
648 | int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify); | ||
649 | |||
650 | int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); | ||
651 | |||
652 | struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc); | ||
653 | |||
654 | struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, | ||
655 | struct ib_phys_buf *buffer_list, | ||
656 | int num_phys_buf, int acc, u64 *iova_start); | ||
657 | |||
658 | struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | ||
659 | int mr_access_flags, | ||
660 | struct ib_udata *udata); | ||
661 | |||
662 | int ipath_dereg_mr(struct ib_mr *ibmr); | ||
663 | |||
664 | struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags, | ||
665 | struct ib_fmr_attr *fmr_attr); | ||
666 | |||
667 | int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list, | ||
668 | int list_len, u64 iova); | ||
669 | |||
670 | int ipath_unmap_fmr(struct list_head *fmr_list); | ||
671 | |||
672 | int ipath_dealloc_fmr(struct ib_fmr *ibfmr); | ||
673 | |||
674 | void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); | ||
675 | |||
676 | void ipath_insert_rnr_queue(struct ipath_qp *qp); | ||
677 | |||
678 | int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only); | ||
679 | |||
680 | void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc); | ||
681 | |||
682 | extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; | ||
683 | |||
684 | extern const u8 ipath_cvt_physportstate[]; | ||
685 | |||
686 | extern const int ib_ipath_state_ops[]; | ||
687 | |||
688 | extern unsigned int ib_ipath_lkey_table_size; | ||
689 | |||
690 | extern const u32 ib_ipath_rnr_table[]; | ||
691 | |||
692 | #endif /* IPATH_VERBS_H */ | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c new file mode 100644 index 000000000000..10b31d2c4f20 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c | |||
@@ -0,0 +1,333 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/list.h> | ||
34 | #include <linux/rcupdate.h> | ||
35 | |||
36 | #include "ipath_verbs.h" | ||
37 | |||
38 | /* | ||
39 | * Global table of GID to attached QPs. | ||
40 | * The table is global to all ipath devices since a send from one QP/device | ||
41 | * needs to be locally routed to any locally attached QPs on the same | ||
42 | * or different device. | ||
43 | */ | ||
44 | static struct rb_root mcast_tree; | ||
45 | static DEFINE_SPINLOCK(mcast_lock); | ||
46 | |||
47 | /** | ||
48 | * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct | ||
49 | * @qp: the QP to link | ||
50 | */ | ||
51 | static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp) | ||
52 | { | ||
53 | struct ipath_mcast_qp *mqp; | ||
54 | |||
55 | mqp = kmalloc(sizeof *mqp, GFP_KERNEL); | ||
56 | if (!mqp) | ||
57 | goto bail; | ||
58 | |||
59 | mqp->qp = qp; | ||
60 | atomic_inc(&qp->refcount); | ||
61 | |||
62 | bail: | ||
63 | return mqp; | ||
64 | } | ||
65 | |||
66 | static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp) | ||
67 | { | ||
68 | struct ipath_qp *qp = mqp->qp; | ||
69 | |||
70 | /* Notify ipath_destroy_qp() if it is waiting. */ | ||
71 | if (atomic_dec_and_test(&qp->refcount)) | ||
72 | wake_up(&qp->wait); | ||
73 | |||
74 | kfree(mqp); | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * ipath_mcast_alloc - allocate the multicast GID structure | ||
79 | * @mgid: the multicast GID | ||
80 | * | ||
81 | * A list of QPs will be attached to this structure. | ||
82 | */ | ||
83 | static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid) | ||
84 | { | ||
85 | struct ipath_mcast *mcast; | ||
86 | |||
87 | mcast = kmalloc(sizeof *mcast, GFP_KERNEL); | ||
88 | if (!mcast) | ||
89 | goto bail; | ||
90 | |||
91 | mcast->mgid = *mgid; | ||
92 | INIT_LIST_HEAD(&mcast->qp_list); | ||
93 | init_waitqueue_head(&mcast->wait); | ||
94 | atomic_set(&mcast->refcount, 0); | ||
95 | |||
96 | bail: | ||
97 | return mcast; | ||
98 | } | ||
99 | |||
100 | static void ipath_mcast_free(struct ipath_mcast *mcast) | ||
101 | { | ||
102 | struct ipath_mcast_qp *p, *tmp; | ||
103 | |||
104 | list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) | ||
105 | ipath_mcast_qp_free(p); | ||
106 | |||
107 | kfree(mcast); | ||
108 | } | ||
109 | |||
110 | /** | ||
111 | * ipath_mcast_find - search the global table for the given multicast GID | ||
112 | * @mgid: the multicast GID to search for | ||
113 | * | ||
114 | * Returns NULL if not found. | ||
115 | * | ||
116 | * The caller is responsible for decrementing the reference count if found. | ||
117 | */ | ||
118 | struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid) | ||
119 | { | ||
120 | struct rb_node *n; | ||
121 | unsigned long flags; | ||
122 | struct ipath_mcast *mcast; | ||
123 | |||
124 | spin_lock_irqsave(&mcast_lock, flags); | ||
125 | n = mcast_tree.rb_node; | ||
126 | while (n) { | ||
127 | int ret; | ||
128 | |||
129 | mcast = rb_entry(n, struct ipath_mcast, rb_node); | ||
130 | |||
131 | ret = memcmp(mgid->raw, mcast->mgid.raw, | ||
132 | sizeof(union ib_gid)); | ||
133 | if (ret < 0) | ||
134 | n = n->rb_left; | ||
135 | else if (ret > 0) | ||
136 | n = n->rb_right; | ||
137 | else { | ||
138 | atomic_inc(&mcast->refcount); | ||
139 | spin_unlock_irqrestore(&mcast_lock, flags); | ||
140 | goto bail; | ||
141 | } | ||
142 | } | ||
143 | spin_unlock_irqrestore(&mcast_lock, flags); | ||
144 | |||
145 | mcast = NULL; | ||
146 | |||
147 | bail: | ||
148 | return mcast; | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * ipath_mcast_add - insert mcast GID into table and attach QP struct | ||
153 | * @mcast: the mcast GID table | ||
154 | * @mqp: the QP to attach | ||
155 | * | ||
156 | * Return zero if both were added. Return EEXIST if the GID was already in | ||
157 | * the table but the QP was added. Return ESRCH if the QP was already | ||
158 | * attached and neither structure was added. | ||
159 | */ | ||
160 | static int ipath_mcast_add(struct ipath_mcast *mcast, | ||
161 | struct ipath_mcast_qp *mqp) | ||
162 | { | ||
163 | struct rb_node **n = &mcast_tree.rb_node; | ||
164 | struct rb_node *pn = NULL; | ||
165 | unsigned long flags; | ||
166 | int ret; | ||
167 | |||
168 | spin_lock_irqsave(&mcast_lock, flags); | ||
169 | |||
170 | while (*n) { | ||
171 | struct ipath_mcast *tmcast; | ||
172 | struct ipath_mcast_qp *p; | ||
173 | |||
174 | pn = *n; | ||
175 | tmcast = rb_entry(pn, struct ipath_mcast, rb_node); | ||
176 | |||
177 | ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, | ||
178 | sizeof(union ib_gid)); | ||
179 | if (ret < 0) { | ||
180 | n = &pn->rb_left; | ||
181 | continue; | ||
182 | } | ||
183 | if (ret > 0) { | ||
184 | n = &pn->rb_right; | ||
185 | continue; | ||
186 | } | ||
187 | |||
188 | /* Search the QP list to see if this is already there. */ | ||
189 | list_for_each_entry_rcu(p, &tmcast->qp_list, list) { | ||
190 | if (p->qp == mqp->qp) { | ||
191 | spin_unlock_irqrestore(&mcast_lock, flags); | ||
192 | ret = ESRCH; | ||
193 | goto bail; | ||
194 | } | ||
195 | } | ||
196 | list_add_tail_rcu(&mqp->list, &tmcast->qp_list); | ||
197 | spin_unlock_irqrestore(&mcast_lock, flags); | ||
198 | ret = EEXIST; | ||
199 | goto bail; | ||
200 | } | ||
201 | |||
202 | list_add_tail_rcu(&mqp->list, &mcast->qp_list); | ||
203 | |||
204 | atomic_inc(&mcast->refcount); | ||
205 | rb_link_node(&mcast->rb_node, pn, n); | ||
206 | rb_insert_color(&mcast->rb_node, &mcast_tree); | ||
207 | |||
208 | spin_unlock_irqrestore(&mcast_lock, flags); | ||
209 | |||
210 | ret = 0; | ||
211 | |||
212 | bail: | ||
213 | return ret; | ||
214 | } | ||
215 | |||
216 | int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) | ||
217 | { | ||
218 | struct ipath_qp *qp = to_iqp(ibqp); | ||
219 | struct ipath_mcast *mcast; | ||
220 | struct ipath_mcast_qp *mqp; | ||
221 | int ret; | ||
222 | |||
223 | /* | ||
224 | * Allocate data structures since its better to do this outside of | ||
225 | * spin locks and it will most likely be needed. | ||
226 | */ | ||
227 | mcast = ipath_mcast_alloc(gid); | ||
228 | if (mcast == NULL) { | ||
229 | ret = -ENOMEM; | ||
230 | goto bail; | ||
231 | } | ||
232 | mqp = ipath_mcast_qp_alloc(qp); | ||
233 | if (mqp == NULL) { | ||
234 | ipath_mcast_free(mcast); | ||
235 | ret = -ENOMEM; | ||
236 | goto bail; | ||
237 | } | ||
238 | switch (ipath_mcast_add(mcast, mqp)) { | ||
239 | case ESRCH: | ||
240 | /* Neither was used: can't attach the same QP twice. */ | ||
241 | ipath_mcast_qp_free(mqp); | ||
242 | ipath_mcast_free(mcast); | ||
243 | ret = -EINVAL; | ||
244 | goto bail; | ||
245 | case EEXIST: /* The mcast wasn't used */ | ||
246 | ipath_mcast_free(mcast); | ||
247 | break; | ||
248 | default: | ||
249 | break; | ||
250 | } | ||
251 | |||
252 | ret = 0; | ||
253 | |||
254 | bail: | ||
255 | return ret; | ||
256 | } | ||
257 | |||
258 | int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) | ||
259 | { | ||
260 | struct ipath_qp *qp = to_iqp(ibqp); | ||
261 | struct ipath_mcast *mcast = NULL; | ||
262 | struct ipath_mcast_qp *p, *tmp; | ||
263 | struct rb_node *n; | ||
264 | unsigned long flags; | ||
265 | int last = 0; | ||
266 | int ret; | ||
267 | |||
268 | spin_lock_irqsave(&mcast_lock, flags); | ||
269 | |||
270 | /* Find the GID in the mcast table. */ | ||
271 | n = mcast_tree.rb_node; | ||
272 | while (1) { | ||
273 | if (n == NULL) { | ||
274 | spin_unlock_irqrestore(&mcast_lock, flags); | ||
275 | ret = 0; | ||
276 | goto bail; | ||
277 | } | ||
278 | |||
279 | mcast = rb_entry(n, struct ipath_mcast, rb_node); | ||
280 | ret = memcmp(gid->raw, mcast->mgid.raw, | ||
281 | sizeof(union ib_gid)); | ||
282 | if (ret < 0) | ||
283 | n = n->rb_left; | ||
284 | else if (ret > 0) | ||
285 | n = n->rb_right; | ||
286 | else | ||
287 | break; | ||
288 | } | ||
289 | |||
290 | /* Search the QP list. */ | ||
291 | list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { | ||
292 | if (p->qp != qp) | ||
293 | continue; | ||
294 | /* | ||
295 | * We found it, so remove it, but don't poison the forward | ||
296 | * link until we are sure there are no list walkers. | ||
297 | */ | ||
298 | list_del_rcu(&p->list); | ||
299 | |||
300 | /* If this was the last attached QP, remove the GID too. */ | ||
301 | if (list_empty(&mcast->qp_list)) { | ||
302 | rb_erase(&mcast->rb_node, &mcast_tree); | ||
303 | last = 1; | ||
304 | } | ||
305 | break; | ||
306 | } | ||
307 | |||
308 | spin_unlock_irqrestore(&mcast_lock, flags); | ||
309 | |||
310 | if (p) { | ||
311 | /* | ||
312 | * Wait for any list walkers to finish before freeing the | ||
313 | * list element. | ||
314 | */ | ||
315 | wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); | ||
316 | ipath_mcast_qp_free(p); | ||
317 | } | ||
318 | if (last) { | ||
319 | atomic_dec(&mcast->refcount); | ||
320 | wait_event(mcast->wait, !atomic_read(&mcast->refcount)); | ||
321 | ipath_mcast_free(mcast); | ||
322 | } | ||
323 | |||
324 | ret = 0; | ||
325 | |||
326 | bail: | ||
327 | return ret; | ||
328 | } | ||
329 | |||
330 | int ipath_mcast_tree_empty(void) | ||
331 | { | ||
332 | return mcast_tree.rb_node == NULL; | ||
333 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c new file mode 100644 index 000000000000..adc5322f15c1 --- /dev/null +++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c | |||
@@ -0,0 +1,157 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | /* | ||
34 | * This file is conditionally built on x86_64 only. Otherwise weak symbol | ||
35 | * versions of the functions exported from here are used. | ||
36 | */ | ||
37 | |||
38 | #include <linux/pci.h> | ||
39 | #include <asm/mtrr.h> | ||
40 | #include <asm/processor.h> | ||
41 | |||
42 | #include "ipath_kernel.h" | ||
43 | |||
44 | /** | ||
45 | * ipath_enable_wc - enable write combining for MMIO writes to the device | ||
46 | * @dd: infinipath device | ||
47 | * | ||
48 | * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable | ||
49 | * write combining. | ||
50 | */ | ||
51 | int ipath_enable_wc(struct ipath_devdata *dd) | ||
52 | { | ||
53 | int ret = 0; | ||
54 | u64 pioaddr, piolen; | ||
55 | unsigned bits; | ||
56 | const unsigned long addr = pci_resource_start(dd->pcidev, 0); | ||
57 | const size_t len = pci_resource_len(dd->pcidev, 0); | ||
58 | |||
59 | /* | ||
60 | * Set the PIO buffers to be WCCOMB, so we get HT bursts to the | ||
61 | * chip. Linux (possibly the hardware) requires it to be on a power | ||
62 | * of 2 address matching the length (which has to be a power of 2). | ||
63 | * For rev1, that means the base address, for rev2, it will be just | ||
64 | * the PIO buffers themselves. | ||
65 | */ | ||
66 | pioaddr = addr + dd->ipath_piobufbase; | ||
67 | piolen = (dd->ipath_piobcnt2k + | ||
68 | dd->ipath_piobcnt4k) * | ||
69 | ALIGN(dd->ipath_piobcnt2k + | ||
70 | dd->ipath_piobcnt4k, dd->ipath_palign); | ||
71 | |||
72 | for (bits = 0; !(piolen & (1ULL << bits)); bits++) | ||
73 | /* do nothing */ ; | ||
74 | |||
75 | if (piolen != (1ULL << bits)) { | ||
76 | piolen >>= bits; | ||
77 | while (piolen >>= 1) | ||
78 | bits++; | ||
79 | piolen = 1ULL << (bits + 1); | ||
80 | } | ||
81 | if (pioaddr & (piolen - 1)) { | ||
82 | u64 atmp; | ||
83 | ipath_dbg("pioaddr %llx not on right boundary for size " | ||
84 | "%llx, fixing\n", | ||
85 | (unsigned long long) pioaddr, | ||
86 | (unsigned long long) piolen); | ||
87 | atmp = pioaddr & ~(piolen - 1); | ||
88 | if (atmp < addr || (atmp + piolen) > (addr + len)) { | ||
89 | ipath_dev_err(dd, "No way to align address/size " | ||
90 | "(%llx/%llx), no WC mtrr\n", | ||
91 | (unsigned long long) atmp, | ||
92 | (unsigned long long) piolen << 1); | ||
93 | ret = -ENODEV; | ||
94 | } else { | ||
95 | ipath_dbg("changing WC base from %llx to %llx, " | ||
96 | "len from %llx to %llx\n", | ||
97 | (unsigned long long) pioaddr, | ||
98 | (unsigned long long) atmp, | ||
99 | (unsigned long long) piolen, | ||
100 | (unsigned long long) piolen << 1); | ||
101 | pioaddr = atmp; | ||
102 | piolen <<= 1; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | if (!ret) { | ||
107 | int cookie; | ||
108 | ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC " | ||
109 | "(addr %llx, len=0x%llx)\n", | ||
110 | (unsigned long long) pioaddr, | ||
111 | (unsigned long long) piolen); | ||
112 | cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); | ||
113 | if (cookie < 0) { | ||
114 | { | ||
115 | dev_info(&dd->pcidev->dev, | ||
116 | "mtrr_add() WC for PIO bufs " | ||
117 | "failed (%d)\n", | ||
118 | cookie); | ||
119 | ret = -EINVAL; | ||
120 | } | ||
121 | } else { | ||
122 | ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, " | ||
123 | "cookie is %d\n", cookie); | ||
124 | dd->ipath_wc_cookie = cookie; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | return ret; | ||
129 | } | ||
130 | |||
131 | /** | ||
132 | * ipath_disable_wc - disable write combining for MMIO writes to the device | ||
133 | * @dd: infinipath device | ||
134 | */ | ||
135 | void ipath_disable_wc(struct ipath_devdata *dd) | ||
136 | { | ||
137 | if (dd->ipath_wc_cookie) { | ||
138 | ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n"); | ||
139 | mtrr_del(dd->ipath_wc_cookie, 0, 0); | ||
140 | dd->ipath_wc_cookie = 0; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * ipath_unordered_wc - indicate whether write combining is ordered | ||
146 | * | ||
147 | * Because our performance depends on our ability to do write combining mmio | ||
148 | * writes in the most efficient way, we need to know if we are on an Intel | ||
149 | * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in | ||
150 | * the order completed, and so no special flushing is required to get | ||
151 | * correct ordering. Intel processors, however, will flush write buffers | ||
152 | * out in "random" orders, and so explicit ordering is needed at times. | ||
153 | */ | ||
154 | int ipath_unordered_wc(void) | ||
155 | { | ||
156 | return boot_cpu_data.x86_vendor != X86_VENDOR_AMD; | ||
157 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ips_common.h b/drivers/infiniband/hw/ipath/ips_common.h new file mode 100644 index 000000000000..410a764dfcef --- /dev/null +++ b/drivers/infiniband/hw/ipath/ips_common.h | |||
@@ -0,0 +1,263 @@ | |||
1 | #ifndef IPS_COMMON_H | ||
2 | #define IPS_COMMON_H | ||
3 | /* | ||
4 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
5 | * | ||
6 | * This software is available to you under a choice of one of two | ||
7 | * licenses. You may choose to be licensed under the terms of the GNU | ||
8 | * General Public License (GPL) Version 2, available from the file | ||
9 | * COPYING in the main directory of this source tree, or the | ||
10 | * OpenIB.org BSD license below: | ||
11 | * | ||
12 | * Redistribution and use in source and binary forms, with or | ||
13 | * without modification, are permitted provided that the following | ||
14 | * conditions are met: | ||
15 | * | ||
16 | * - Redistributions of source code must retain the above | ||
17 | * copyright notice, this list of conditions and the following | ||
18 | * disclaimer. | ||
19 | * | ||
20 | * - Redistributions in binary form must reproduce the above | ||
21 | * copyright notice, this list of conditions and the following | ||
22 | * disclaimer in the documentation and/or other materials | ||
23 | * provided with the distribution. | ||
24 | * | ||
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
32 | * SOFTWARE. | ||
33 | */ | ||
34 | |||
35 | #include "ipath_common.h" | ||
36 | |||
37 | struct ipath_header { | ||
38 | /* | ||
39 | * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset - | ||
40 | * 14 bits before ECO change ~28 Dec 03. After that, Vers 4, | ||
41 | * Port 3, TID 11, offset 14. | ||
42 | */ | ||
43 | __le32 ver_port_tid_offset; | ||
44 | __le16 chksum; | ||
45 | __le16 pkt_flags; | ||
46 | }; | ||
47 | |||
48 | struct ips_message_header { | ||
49 | __be16 lrh[4]; | ||
50 | __be32 bth[3]; | ||
51 | /* fields below this point are in host byte order */ | ||
52 | struct ipath_header iph; | ||
53 | __u8 sub_opcode; | ||
54 | __u8 flags; | ||
55 | __u16 src_rank; | ||
56 | /* 24 bits. The upper 8 bit is available for other use */ | ||
57 | union { | ||
58 | struct { | ||
59 | unsigned ack_seq_num:24; | ||
60 | unsigned port:4; | ||
61 | unsigned unused:4; | ||
62 | }; | ||
63 | __u32 ack_seq_num_org; | ||
64 | }; | ||
65 | __u8 expected_tid_session_id; | ||
66 | __u8 tinylen; /* to aid MPI */ | ||
67 | union { | ||
68 | __u16 tag; /* to aid MPI */ | ||
69 | __u16 mqhdr; /* for PSM MQ */ | ||
70 | }; | ||
71 | union { | ||
72 | __u32 mpi[4]; /* to aid MPI */ | ||
73 | __u32 data[4]; | ||
74 | __u64 mq[2]; /* for PSM MQ */ | ||
75 | struct { | ||
76 | __u16 mtu; | ||
77 | __u8 major_ver; | ||
78 | __u8 minor_ver; | ||
79 | __u32 not_used; //free | ||
80 | __u32 run_id; | ||
81 | __u32 client_ver; | ||
82 | }; | ||
83 | }; | ||
84 | }; | ||
85 | |||
86 | struct ether_header { | ||
87 | __be16 lrh[4]; | ||
88 | __be32 bth[3]; | ||
89 | struct ipath_header iph; | ||
90 | __u8 sub_opcode; | ||
91 | __u8 cmd; | ||
92 | __be16 lid; | ||
93 | __u16 mac[3]; | ||
94 | __u8 frag_num; | ||
95 | __u8 seq_num; | ||
96 | __le32 len; | ||
97 | /* MUST be of word size due to PIO write requirements */ | ||
98 | __u32 csum; | ||
99 | __le16 csum_offset; | ||
100 | __le16 flags; | ||
101 | __u16 first_2_bytes; | ||
102 | __u8 unused[2]; /* currently unused */ | ||
103 | }; | ||
104 | |||
105 | /* | ||
106 | * The PIO buffer used for sending infinipath messages must only be written | ||
107 | * in 32-bit words, all the data must be written, and no writes can occur | ||
108 | * after the last word is written (which transfers "ownership" of the buffer | ||
109 | * to the chip and triggers the message to be sent). | ||
110 | * Since the Linux sk_buff structure can be recursive, non-aligned, and | ||
111 | * any number of bytes in each segment, we use the following structure | ||
112 | * to keep information about the overall state of the copy operation. | ||
113 | * This is used to save the information needed to store the checksum | ||
114 | * in the right place before sending the last word to the hardware and | ||
115 | * to buffer the last 0-3 bytes of non-word sized segments. | ||
116 | */ | ||
117 | struct copy_data_s { | ||
118 | struct ether_header *hdr; | ||
119 | /* addr of PIO buf to write csum to */ | ||
120 | __u32 __iomem *csum_pio; | ||
121 | __u32 __iomem *to; /* addr of PIO buf to write data to */ | ||
122 | __u32 device; /* which device to allocate PIO bufs from */ | ||
123 | __s32 error; /* set if there is an error. */ | ||
124 | __s32 extra; /* amount of data saved in u.buf below */ | ||
125 | __u32 len; /* total length to send in bytes */ | ||
126 | __u32 flen; /* frament length in words */ | ||
127 | __u32 csum; /* partial IP checksum */ | ||
128 | __u32 pos; /* position for partial checksum */ | ||
129 | __u32 offset; /* offset to where data currently starts */ | ||
130 | __s32 checksum_calc; /* set to 1 when csum has been calculated */ | ||
131 | struct sk_buff *skb; | ||
132 | union { | ||
133 | __u32 w; | ||
134 | __u8 buf[4]; | ||
135 | } u; | ||
136 | }; | ||
137 | |||
138 | /* IB - LRH header consts */ | ||
139 | #define IPS_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ | ||
140 | #define IPS_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ | ||
141 | |||
142 | #define IPS_OFFSET 0 | ||
143 | |||
144 | /* | ||
145 | * defines the cut-off point between the header queue and eager/expected | ||
146 | * TID queue | ||
147 | */ | ||
148 | #define NUM_OF_EXTRA_WORDS_IN_HEADER_QUEUE \ | ||
149 | ((sizeof(struct ips_message_header) - \ | ||
150 | offsetof(struct ips_message_header, iph)) >> 2) | ||
151 | |||
152 | /* OpCodes */ | ||
153 | #define OPCODE_IPS 0xC0 | ||
154 | #define OPCODE_ITH4X 0xC1 | ||
155 | |||
156 | /* OpCode 30 is use by stand-alone test programs */ | ||
157 | #define OPCODE_RAW_DATA 0xDE | ||
158 | /* last OpCode (31) is reserved for test */ | ||
159 | #define OPCODE_TEST 0xDF | ||
160 | |||
161 | /* sub OpCodes - ips */ | ||
162 | #define OPCODE_SEQ_DATA 0x01 | ||
163 | #define OPCODE_SEQ_CTRL 0x02 | ||
164 | |||
165 | #define OPCODE_SEQ_MQ_DATA 0x03 | ||
166 | #define OPCODE_SEQ_MQ_CTRL 0x04 | ||
167 | |||
168 | #define OPCODE_ACK 0x10 | ||
169 | #define OPCODE_NAK 0x11 | ||
170 | |||
171 | #define OPCODE_ERR_CHK 0x20 | ||
172 | #define OPCODE_ERR_CHK_PLS 0x21 | ||
173 | |||
174 | #define OPCODE_STARTUP 0x30 | ||
175 | #define OPCODE_STARTUP_ACK 0x31 | ||
176 | #define OPCODE_STARTUP_NAK 0x32 | ||
177 | |||
178 | #define OPCODE_STARTUP_EXT 0x34 | ||
179 | #define OPCODE_STARTUP_ACK_EXT 0x35 | ||
180 | #define OPCODE_STARTUP_NAK_EXT 0x36 | ||
181 | |||
182 | #define OPCODE_TIDS_RELEASE 0x40 | ||
183 | #define OPCODE_TIDS_RELEASE_CONFIRM 0x41 | ||
184 | |||
185 | #define OPCODE_CLOSE 0x50 | ||
186 | #define OPCODE_CLOSE_ACK 0x51 | ||
187 | /* | ||
188 | * like OPCODE_CLOSE, but no complaint if other side has already closed. | ||
189 | * Used when doing abort(), MPI_Abort(), etc. | ||
190 | */ | ||
191 | #define OPCODE_ABORT 0x52 | ||
192 | |||
193 | /* sub OpCodes - ith4x */ | ||
194 | #define OPCODE_ENCAP 0x81 | ||
195 | #define OPCODE_LID_ARP 0x82 | ||
196 | |||
197 | /* Receive Header Queue: receive type (from infinipath) */ | ||
198 | #define RCVHQ_RCV_TYPE_EXPECTED 0 | ||
199 | #define RCVHQ_RCV_TYPE_EAGER 1 | ||
200 | #define RCVHQ_RCV_TYPE_NON_KD 2 | ||
201 | #define RCVHQ_RCV_TYPE_ERROR 3 | ||
202 | |||
203 | /* misc. */ | ||
204 | #define SIZE_OF_CRC 1 | ||
205 | |||
206 | #define EAGER_TID_ID INFINIPATH_I_TID_MASK | ||
207 | |||
208 | #define IPS_DEFAULT_P_KEY 0xFFFF | ||
209 | |||
210 | #define IPS_PERMISSIVE_LID 0xFFFF | ||
211 | #define IPS_MULTICAST_LID_BASE 0xC000 | ||
212 | |||
213 | #define IPS_AETH_CREDIT_SHIFT 24 | ||
214 | #define IPS_AETH_CREDIT_MASK 0x1F | ||
215 | #define IPS_AETH_CREDIT_INVAL 0x1F | ||
216 | |||
217 | #define IPS_PSN_MASK 0xFFFFFF | ||
218 | #define IPS_MSN_MASK 0xFFFFFF | ||
219 | #define IPS_QPN_MASK 0xFFFFFF | ||
220 | #define IPS_MULTICAST_QPN 0xFFFFFF | ||
221 | |||
222 | /* functions for extracting fields from rcvhdrq entries */ | ||
223 | static inline __u32 ips_get_hdr_err_flags(const __le32 * rbuf) | ||
224 | { | ||
225 | return __le32_to_cpu(rbuf[1]); | ||
226 | } | ||
227 | |||
228 | static inline __u32 ips_get_index(const __le32 * rbuf) | ||
229 | { | ||
230 | return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT) | ||
231 | & INFINIPATH_RHF_EGRINDEX_MASK; | ||
232 | } | ||
233 | |||
234 | static inline __u32 ips_get_rcv_type(const __le32 * rbuf) | ||
235 | { | ||
236 | return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT) | ||
237 | & INFINIPATH_RHF_RCVTYPE_MASK; | ||
238 | } | ||
239 | |||
240 | static inline __u32 ips_get_length_in_bytes(const __le32 * rbuf) | ||
241 | { | ||
242 | return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT) | ||
243 | & INFINIPATH_RHF_LENGTH_MASK) << 2; | ||
244 | } | ||
245 | |||
246 | static inline void *ips_get_first_protocol_header(const __u32 * rbuf) | ||
247 | { | ||
248 | return (void *)&rbuf[2]; | ||
249 | } | ||
250 | |||
251 | static inline struct ips_message_header *ips_get_ips_header(const __u32 * | ||
252 | rbuf) | ||
253 | { | ||
254 | return (struct ips_message_header *)&rbuf[2]; | ||
255 | } | ||
256 | |||
257 | static inline __u32 ips_get_ipath_ver(__le32 hdrword) | ||
258 | { | ||
259 | return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT) | ||
260 | & INFINIPATH_I_VERS_MASK; | ||
261 | } | ||
262 | |||
263 | #endif /* IPS_COMMON_H */ | ||
diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h new file mode 100644 index 000000000000..40d693cf3f94 --- /dev/null +++ b/drivers/infiniband/hw/ipath/verbs_debug.h | |||
@@ -0,0 +1,107 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef _VERBS_DEBUG_H | ||
34 | #define _VERBS_DEBUG_H | ||
35 | |||
36 | /* | ||
37 | * This file contains tracing code for the ib_ipath kernel module. | ||
38 | */ | ||
39 | #ifndef _VERBS_DEBUGGING /* tracing enabled or not */ | ||
40 | #define _VERBS_DEBUGGING 1 | ||
41 | #endif | ||
42 | |||
43 | extern unsigned ib_ipath_debug; | ||
44 | |||
45 | #define _VERBS_ERROR(fmt,...) \ | ||
46 | do { \ | ||
47 | printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \ | ||
48 | } while(0) | ||
49 | |||
50 | #define _VERBS_UNIT_ERROR(unit,fmt,...) \ | ||
51 | do { \ | ||
52 | printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \ | ||
53 | } while(0) | ||
54 | |||
55 | #if _VERBS_DEBUGGING | ||
56 | |||
57 | /* | ||
58 | * Mask values for debugging. The scheme allows us to compile out any | ||
59 | * of the debug tracing stuff, and if compiled in, to enable or | ||
60 | * disable dynamically. | ||
61 | * This can be set at modprobe time also: | ||
62 | * modprobe ib_path ib_ipath_debug=3 | ||
63 | */ | ||
64 | |||
65 | #define __VERBS_INFO 0x1 /* generic low verbosity stuff */ | ||
66 | #define __VERBS_DBG 0x2 /* generic debug */ | ||
67 | #define __VERBS_VDBG 0x4 /* verbose debug */ | ||
68 | #define __VERBS_SMADBG 0x8000 /* sma packet debug */ | ||
69 | |||
70 | #define _VERBS_INFO(fmt,...) \ | ||
71 | do { \ | ||
72 | if (unlikely(ib_ipath_debug&__VERBS_INFO)) \ | ||
73 | printk(KERN_INFO "%s: " fmt,"ib_ipath", \ | ||
74 | ##__VA_ARGS__); \ | ||
75 | } while(0) | ||
76 | |||
77 | #define _VERBS_DBG(fmt,...) \ | ||
78 | do { \ | ||
79 | if (unlikely(ib_ipath_debug&__VERBS_DBG)) \ | ||
80 | printk(KERN_DEBUG "%s: " fmt, __func__, \ | ||
81 | ##__VA_ARGS__); \ | ||
82 | } while(0) | ||
83 | |||
84 | #define _VERBS_VDBG(fmt,...) \ | ||
85 | do { \ | ||
86 | if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \ | ||
87 | printk(KERN_DEBUG "%s: " fmt, __func__, \ | ||
88 | ##__VA_ARGS__); \ | ||
89 | } while(0) | ||
90 | |||
91 | #define _VERBS_SMADBG(fmt,...) \ | ||
92 | do { \ | ||
93 | if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \ | ||
94 | printk(KERN_DEBUG "%s: " fmt, __func__, \ | ||
95 | ##__VA_ARGS__); \ | ||
96 | } while(0) | ||
97 | |||
98 | #else /* ! _VERBS_DEBUGGING */ | ||
99 | |||
100 | #define _VERBS_INFO(fmt,...) | ||
101 | #define _VERBS_DBG(fmt,...) | ||
102 | #define _VERBS_VDBG(fmt,...) | ||
103 | #define _VERBS_SMADBG(fmt,...) | ||
104 | |||
105 | #endif /* _VERBS_DEBUGGING */ | ||
106 | |||
107 | #endif /* _VERBS_DEBUG_H */ | ||
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig index e88be85b3d5c..9aa5a4468a75 100644 --- a/drivers/infiniband/hw/mthca/Kconfig +++ b/drivers/infiniband/hw/mthca/Kconfig | |||
@@ -7,10 +7,11 @@ config INFINIBAND_MTHCA | |||
7 | ("Tavor") and the MT25208 PCI Express HCA ("Arbel"). | 7 | ("Tavor") and the MT25208 PCI Express HCA ("Arbel"). |
8 | 8 | ||
9 | config INFINIBAND_MTHCA_DEBUG | 9 | config INFINIBAND_MTHCA_DEBUG |
10 | bool "Verbose debugging output" | 10 | bool "Verbose debugging output" if EMBEDDED |
11 | depends on INFINIBAND_MTHCA | 11 | depends on INFINIBAND_MTHCA |
12 | default n | 12 | default y |
13 | ---help--- | 13 | ---help--- |
14 | This option causes the mthca driver produce a bunch of debug | 14 | This option causes debugging code to be compiled into the |
15 | messages. Select this is you are developing the driver or | 15 | mthca driver. The output can be turned on via the |
16 | trying to diagnose a problem. | 16 | debug_level module parameter (which can also be set after |
17 | the driver is loaded through sysfs). | ||
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile index 47ec5a7cba0b..e388d95d0cf1 100644 --- a/drivers/infiniband/hw/mthca/Makefile +++ b/drivers/infiniband/hw/mthca/Makefile | |||
@@ -1,7 +1,3 @@ | |||
1 | ifdef CONFIG_INFINIBAND_MTHCA_DEBUG | ||
2 | EXTRA_CFLAGS += -DDEBUG | ||
3 | endif | ||
4 | |||
5 | obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o | 1 | obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o |
6 | 2 | ||
7 | ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ | 3 | ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ |
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index bc5bdcbe51b5..b12aa03be251 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c | |||
@@ -42,6 +42,20 @@ | |||
42 | 42 | ||
43 | #include "mthca_dev.h" | 43 | #include "mthca_dev.h" |
44 | 44 | ||
45 | enum { | ||
46 | MTHCA_RATE_TAVOR_FULL = 0, | ||
47 | MTHCA_RATE_TAVOR_1X = 1, | ||
48 | MTHCA_RATE_TAVOR_4X = 2, | ||
49 | MTHCA_RATE_TAVOR_1X_DDR = 3 | ||
50 | }; | ||
51 | |||
52 | enum { | ||
53 | MTHCA_RATE_MEMFREE_FULL = 0, | ||
54 | MTHCA_RATE_MEMFREE_QUARTER = 1, | ||
55 | MTHCA_RATE_MEMFREE_EIGHTH = 2, | ||
56 | MTHCA_RATE_MEMFREE_HALF = 3 | ||
57 | }; | ||
58 | |||
45 | struct mthca_av { | 59 | struct mthca_av { |
46 | __be32 port_pd; | 60 | __be32 port_pd; |
47 | u8 reserved1; | 61 | u8 reserved1; |
@@ -55,6 +69,90 @@ struct mthca_av { | |||
55 | __be32 dgid[4]; | 69 | __be32 dgid[4]; |
56 | }; | 70 | }; |
57 | 71 | ||
72 | static enum ib_rate memfree_rate_to_ib(u8 mthca_rate, u8 port_rate) | ||
73 | { | ||
74 | switch (mthca_rate) { | ||
75 | case MTHCA_RATE_MEMFREE_EIGHTH: | ||
76 | return mult_to_ib_rate(port_rate >> 3); | ||
77 | case MTHCA_RATE_MEMFREE_QUARTER: | ||
78 | return mult_to_ib_rate(port_rate >> 2); | ||
79 | case MTHCA_RATE_MEMFREE_HALF: | ||
80 | return mult_to_ib_rate(port_rate >> 1); | ||
81 | case MTHCA_RATE_MEMFREE_FULL: | ||
82 | default: | ||
83 | return mult_to_ib_rate(port_rate); | ||
84 | } | ||
85 | } | ||
86 | |||
87 | static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate) | ||
88 | { | ||
89 | switch (mthca_rate) { | ||
90 | case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS; | ||
91 | case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS; | ||
92 | case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS; | ||
93 | default: return port_rate; | ||
94 | } | ||
95 | } | ||
96 | |||
97 | enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port) | ||
98 | { | ||
99 | if (mthca_is_memfree(dev)) { | ||
100 | /* Handle old Arbel FW */ | ||
101 | if (dev->limits.stat_rate_support == 0x3 && mthca_rate) | ||
102 | return IB_RATE_2_5_GBPS; | ||
103 | |||
104 | return memfree_rate_to_ib(mthca_rate, dev->rate[port - 1]); | ||
105 | } else | ||
106 | return tavor_rate_to_ib(mthca_rate, dev->rate[port - 1]); | ||
107 | } | ||
108 | |||
109 | static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate) | ||
110 | { | ||
111 | if (cur_rate <= req_rate) | ||
112 | return 0; | ||
113 | |||
114 | /* | ||
115 | * Inter-packet delay (IPD) to get from rate X down to a rate | ||
116 | * no more than Y is (X - 1) / Y. | ||
117 | */ | ||
118 | switch ((cur_rate - 1) / req_rate) { | ||
119 | case 0: return MTHCA_RATE_MEMFREE_FULL; | ||
120 | case 1: return MTHCA_RATE_MEMFREE_HALF; | ||
121 | case 2: /* fall through */ | ||
122 | case 3: return MTHCA_RATE_MEMFREE_QUARTER; | ||
123 | default: return MTHCA_RATE_MEMFREE_EIGHTH; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | static u8 ib_rate_to_tavor(u8 static_rate) | ||
128 | { | ||
129 | switch (static_rate) { | ||
130 | case IB_RATE_2_5_GBPS: return MTHCA_RATE_TAVOR_1X; | ||
131 | case IB_RATE_5_GBPS: return MTHCA_RATE_TAVOR_1X_DDR; | ||
132 | case IB_RATE_10_GBPS: return MTHCA_RATE_TAVOR_4X; | ||
133 | default: return MTHCA_RATE_TAVOR_FULL; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port) | ||
138 | { | ||
139 | u8 rate; | ||
140 | |||
141 | if (!static_rate || ib_rate_to_mult(static_rate) >= dev->rate[port - 1]) | ||
142 | return 0; | ||
143 | |||
144 | if (mthca_is_memfree(dev)) | ||
145 | rate = ib_rate_to_memfree(ib_rate_to_mult(static_rate), | ||
146 | dev->rate[port - 1]); | ||
147 | else | ||
148 | rate = ib_rate_to_tavor(static_rate); | ||
149 | |||
150 | if (!(dev->limits.stat_rate_support & (1 << rate))) | ||
151 | rate = 1; | ||
152 | |||
153 | return rate; | ||
154 | } | ||
155 | |||
58 | int mthca_create_ah(struct mthca_dev *dev, | 156 | int mthca_create_ah(struct mthca_dev *dev, |
59 | struct mthca_pd *pd, | 157 | struct mthca_pd *pd, |
60 | struct ib_ah_attr *ah_attr, | 158 | struct ib_ah_attr *ah_attr, |
@@ -107,7 +205,7 @@ on_hca_fail: | |||
107 | av->g_slid = ah_attr->src_path_bits; | 205 | av->g_slid = ah_attr->src_path_bits; |
108 | av->dlid = cpu_to_be16(ah_attr->dlid); | 206 | av->dlid = cpu_to_be16(ah_attr->dlid); |
109 | av->msg_sr = (3 << 4) | /* 2K message */ | 207 | av->msg_sr = (3 << 4) | /* 2K message */ |
110 | ah_attr->static_rate; | 208 | mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num); |
111 | av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); | 209 | av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); |
112 | if (ah_attr->ah_flags & IB_AH_GRH) { | 210 | if (ah_attr->ah_flags & IB_AH_GRH) { |
113 | av->g_slid |= 0x80; | 211 | av->g_slid |= 0x80; |
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 343eca507870..1985b5dfa481 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c | |||
@@ -965,6 +965,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, | |||
965 | u32 *outbox; | 965 | u32 *outbox; |
966 | u8 field; | 966 | u8 field; |
967 | u16 size; | 967 | u16 size; |
968 | u16 stat_rate; | ||
968 | int err; | 969 | int err; |
969 | 970 | ||
970 | #define QUERY_DEV_LIM_OUT_SIZE 0x100 | 971 | #define QUERY_DEV_LIM_OUT_SIZE 0x100 |
@@ -995,6 +996,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, | |||
995 | #define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36 | 996 | #define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36 |
996 | #define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37 | 997 | #define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37 |
997 | #define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b | 998 | #define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b |
999 | #define QUERY_DEV_LIM_RATE_SUPPORT_OFFSET 0x3c | ||
998 | #define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f | 1000 | #define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f |
999 | #define QUERY_DEV_LIM_FLAGS_OFFSET 0x44 | 1001 | #define QUERY_DEV_LIM_FLAGS_OFFSET 0x44 |
1000 | #define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48 | 1002 | #define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48 |
@@ -1086,6 +1088,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, | |||
1086 | dev_lim->num_ports = field & 0xf; | 1088 | dev_lim->num_ports = field & 0xf; |
1087 | MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET); | 1089 | MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET); |
1088 | dev_lim->max_gids = 1 << (field & 0xf); | 1090 | dev_lim->max_gids = 1 << (field & 0xf); |
1091 | MTHCA_GET(stat_rate, outbox, QUERY_DEV_LIM_RATE_SUPPORT_OFFSET); | ||
1092 | dev_lim->stat_rate_support = stat_rate; | ||
1089 | MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET); | 1093 | MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET); |
1090 | dev_lim->max_pkeys = 1 << (field & 0xf); | 1094 | dev_lim->max_pkeys = 1 << (field & 0xf); |
1091 | MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET); | 1095 | MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET); |
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index e4ec35c40dd3..2f976f2051d6 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h | |||
@@ -146,6 +146,7 @@ struct mthca_dev_lim { | |||
146 | int max_vl; | 146 | int max_vl; |
147 | int num_ports; | 147 | int num_ports; |
148 | int max_gids; | 148 | int max_gids; |
149 | u16 stat_rate_support; | ||
149 | int max_pkeys; | 150 | int max_pkeys; |
150 | u32 flags; | 151 | u32 flags; |
151 | int reserved_uars; | 152 | int reserved_uars; |
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index ad52edbefe98..4c1dcb4c1822 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h | |||
@@ -151,6 +151,7 @@ struct mthca_limits { | |||
151 | int reserved_qps; | 151 | int reserved_qps; |
152 | int num_srqs; | 152 | int num_srqs; |
153 | int max_srq_wqes; | 153 | int max_srq_wqes; |
154 | int max_srq_sge; | ||
154 | int reserved_srqs; | 155 | int reserved_srqs; |
155 | int num_eecs; | 156 | int num_eecs; |
156 | int reserved_eecs; | 157 | int reserved_eecs; |
@@ -172,6 +173,7 @@ struct mthca_limits { | |||
172 | int reserved_pds; | 173 | int reserved_pds; |
173 | u32 page_size_cap; | 174 | u32 page_size_cap; |
174 | u32 flags; | 175 | u32 flags; |
176 | u16 stat_rate_support; | ||
175 | u8 port_width_cap; | 177 | u8 port_width_cap; |
176 | }; | 178 | }; |
177 | 179 | ||
@@ -353,10 +355,24 @@ struct mthca_dev { | |||
353 | struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2]; | 355 | struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2]; |
354 | struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; | 356 | struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; |
355 | spinlock_t sm_lock; | 357 | spinlock_t sm_lock; |
358 | u8 rate[MTHCA_MAX_PORTS]; | ||
356 | }; | 359 | }; |
357 | 360 | ||
358 | #define mthca_dbg(mdev, format, arg...) \ | 361 | #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG |
359 | dev_dbg(&mdev->pdev->dev, format, ## arg) | 362 | extern int mthca_debug_level; |
363 | |||
364 | #define mthca_dbg(mdev, format, arg...) \ | ||
365 | do { \ | ||
366 | if (mthca_debug_level) \ | ||
367 | dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \ | ||
368 | } while (0) | ||
369 | |||
370 | #else /* CONFIG_INFINIBAND_MTHCA_DEBUG */ | ||
371 | |||
372 | #define mthca_dbg(mdev, format, arg...) do { (void) mdev; } while (0) | ||
373 | |||
374 | #endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */ | ||
375 | |||
360 | #define mthca_err(mdev, format, arg...) \ | 376 | #define mthca_err(mdev, format, arg...) \ |
361 | dev_err(&mdev->pdev->dev, format, ## arg) | 377 | dev_err(&mdev->pdev->dev, format, ## arg) |
362 | #define mthca_info(mdev, format, arg...) \ | 378 | #define mthca_info(mdev, format, arg...) \ |
@@ -492,6 +508,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); | |||
492 | int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, | 508 | int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, |
493 | enum ib_srq_attr_mask attr_mask); | 509 | enum ib_srq_attr_mask attr_mask); |
494 | int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); | 510 | int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); |
511 | int mthca_max_srq_sge(struct mthca_dev *dev); | ||
495 | void mthca_srq_event(struct mthca_dev *dev, u32 srqn, | 512 | void mthca_srq_event(struct mthca_dev *dev, u32 srqn, |
496 | enum ib_event_type event_type); | 513 | enum ib_event_type event_type); |
497 | void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); | 514 | void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); |
@@ -542,6 +559,8 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, | |||
542 | struct ib_ud_header *header); | 559 | struct ib_ud_header *header); |
543 | int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr); | 560 | int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr); |
544 | int mthca_ah_grh_present(struct mthca_ah *ah); | 561 | int mthca_ah_grh_present(struct mthca_ah *ah); |
562 | u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port); | ||
563 | enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port); | ||
545 | 564 | ||
546 | int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); | 565 | int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); |
547 | int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); | 566 | int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); |
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index dfb482eac9a2..4730863ece9a 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c | |||
@@ -49,6 +49,30 @@ enum { | |||
49 | MTHCA_VENDOR_CLASS2 = 0xa | 49 | MTHCA_VENDOR_CLASS2 = 0xa |
50 | }; | 50 | }; |
51 | 51 | ||
52 | static int mthca_update_rate(struct mthca_dev *dev, u8 port_num) | ||
53 | { | ||
54 | struct ib_port_attr *tprops = NULL; | ||
55 | int ret; | ||
56 | |||
57 | tprops = kmalloc(sizeof *tprops, GFP_KERNEL); | ||
58 | if (!tprops) | ||
59 | return -ENOMEM; | ||
60 | |||
61 | ret = ib_query_port(&dev->ib_dev, port_num, tprops); | ||
62 | if (ret) { | ||
63 | printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n", | ||
64 | ret, dev->ib_dev.name, port_num); | ||
65 | goto out; | ||
66 | } | ||
67 | |||
68 | dev->rate[port_num - 1] = tprops->active_speed * | ||
69 | ib_width_enum_to_int(tprops->active_width); | ||
70 | |||
71 | out: | ||
72 | kfree(tprops); | ||
73 | return ret; | ||
74 | } | ||
75 | |||
52 | static void update_sm_ah(struct mthca_dev *dev, | 76 | static void update_sm_ah(struct mthca_dev *dev, |
53 | u8 port_num, u16 lid, u8 sl) | 77 | u8 port_num, u16 lid, u8 sl) |
54 | { | 78 | { |
@@ -90,6 +114,7 @@ static void smp_snoop(struct ib_device *ibdev, | |||
90 | mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && | 114 | mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && |
91 | mad->mad_hdr.method == IB_MGMT_METHOD_SET) { | 115 | mad->mad_hdr.method == IB_MGMT_METHOD_SET) { |
92 | if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { | 116 | if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { |
117 | mthca_update_rate(to_mdev(ibdev), port_num); | ||
93 | update_sm_ah(to_mdev(ibdev), port_num, | 118 | update_sm_ah(to_mdev(ibdev), port_num, |
94 | be16_to_cpup((__be16 *) (mad->data + 58)), | 119 | be16_to_cpup((__be16 *) (mad->data + 58)), |
95 | (*(u8 *) (mad->data + 76)) & 0xf); | 120 | (*(u8 *) (mad->data + 76)) & 0xf); |
@@ -246,6 +271,7 @@ int mthca_create_agents(struct mthca_dev *dev) | |||
246 | { | 271 | { |
247 | struct ib_mad_agent *agent; | 272 | struct ib_mad_agent *agent; |
248 | int p, q; | 273 | int p, q; |
274 | int ret; | ||
249 | 275 | ||
250 | spin_lock_init(&dev->sm_lock); | 276 | spin_lock_init(&dev->sm_lock); |
251 | 277 | ||
@@ -255,11 +281,23 @@ int mthca_create_agents(struct mthca_dev *dev) | |||
255 | q ? IB_QPT_GSI : IB_QPT_SMI, | 281 | q ? IB_QPT_GSI : IB_QPT_SMI, |
256 | NULL, 0, send_handler, | 282 | NULL, 0, send_handler, |
257 | NULL, NULL); | 283 | NULL, NULL); |
258 | if (IS_ERR(agent)) | 284 | if (IS_ERR(agent)) { |
285 | ret = PTR_ERR(agent); | ||
259 | goto err; | 286 | goto err; |
287 | } | ||
260 | dev->send_agent[p][q] = agent; | 288 | dev->send_agent[p][q] = agent; |
261 | } | 289 | } |
262 | 290 | ||
291 | |||
292 | for (p = 1; p <= dev->limits.num_ports; ++p) { | ||
293 | ret = mthca_update_rate(dev, p); | ||
294 | if (ret) { | ||
295 | mthca_err(dev, "Failed to obtain port %d rate." | ||
296 | " aborting.\n", p); | ||
297 | goto err; | ||
298 | } | ||
299 | } | ||
300 | |||
263 | return 0; | 301 | return 0; |
264 | 302 | ||
265 | err: | 303 | err: |
@@ -268,7 +306,7 @@ err: | |||
268 | if (dev->send_agent[p][q]) | 306 | if (dev->send_agent[p][q]) |
269 | ib_unregister_mad_agent(dev->send_agent[p][q]); | 307 | ib_unregister_mad_agent(dev->send_agent[p][q]); |
270 | 308 | ||
271 | return PTR_ERR(agent); | 309 | return ret; |
272 | } | 310 | } |
273 | 311 | ||
274 | void __devexit mthca_free_agents(struct mthca_dev *dev) | 312 | void __devexit mthca_free_agents(struct mthca_dev *dev) |
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 266f347c6707..9b9ff7bff357 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c | |||
@@ -52,6 +52,14 @@ MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver"); | |||
52 | MODULE_LICENSE("Dual BSD/GPL"); | 52 | MODULE_LICENSE("Dual BSD/GPL"); |
53 | MODULE_VERSION(DRV_VERSION); | 53 | MODULE_VERSION(DRV_VERSION); |
54 | 54 | ||
55 | #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG | ||
56 | |||
57 | int mthca_debug_level = 0; | ||
58 | module_param_named(debug_level, mthca_debug_level, int, 0644); | ||
59 | MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); | ||
60 | |||
61 | #endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */ | ||
62 | |||
55 | #ifdef CONFIG_PCI_MSI | 63 | #ifdef CONFIG_PCI_MSI |
56 | 64 | ||
57 | static int msi_x = 0; | 65 | static int msi_x = 0; |
@@ -69,6 +77,10 @@ MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero"); | |||
69 | 77 | ||
70 | #endif /* CONFIG_PCI_MSI */ | 78 | #endif /* CONFIG_PCI_MSI */ |
71 | 79 | ||
80 | static int tune_pci = 0; | ||
81 | module_param(tune_pci, int, 0444); | ||
82 | MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero"); | ||
83 | |||
72 | static const char mthca_version[] __devinitdata = | 84 | static const char mthca_version[] __devinitdata = |
73 | DRV_NAME ": Mellanox InfiniBand HCA driver v" | 85 | DRV_NAME ": Mellanox InfiniBand HCA driver v" |
74 | DRV_VERSION " (" DRV_RELDATE ")\n"; | 86 | DRV_VERSION " (" DRV_RELDATE ")\n"; |
@@ -90,6 +102,9 @@ static int __devinit mthca_tune_pci(struct mthca_dev *mdev) | |||
90 | int cap; | 102 | int cap; |
91 | u16 val; | 103 | u16 val; |
92 | 104 | ||
105 | if (!tune_pci) | ||
106 | return 0; | ||
107 | |||
93 | /* First try to max out Read Byte Count */ | 108 | /* First try to max out Read Byte Count */ |
94 | cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX); | 109 | cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX); |
95 | if (cap) { | 110 | if (cap) { |
@@ -176,6 +191,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim | |||
176 | mdev->limits.reserved_srqs = dev_lim->reserved_srqs; | 191 | mdev->limits.reserved_srqs = dev_lim->reserved_srqs; |
177 | mdev->limits.reserved_eecs = dev_lim->reserved_eecs; | 192 | mdev->limits.reserved_eecs = dev_lim->reserved_eecs; |
178 | mdev->limits.max_desc_sz = dev_lim->max_desc_sz; | 193 | mdev->limits.max_desc_sz = dev_lim->max_desc_sz; |
194 | mdev->limits.max_srq_sge = mthca_max_srq_sge(mdev); | ||
179 | /* | 195 | /* |
180 | * Subtract 1 from the limit because we need to allocate a | 196 | * Subtract 1 from the limit because we need to allocate a |
181 | * spare CQE so the HCA HW can tell the difference between an | 197 | * spare CQE so the HCA HW can tell the difference between an |
@@ -191,6 +207,18 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim | |||
191 | mdev->limits.port_width_cap = dev_lim->max_port_width; | 207 | mdev->limits.port_width_cap = dev_lim->max_port_width; |
192 | mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1); | 208 | mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1); |
193 | mdev->limits.flags = dev_lim->flags; | 209 | mdev->limits.flags = dev_lim->flags; |
210 | /* | ||
211 | * For old FW that doesn't return static rate support, use a | ||
212 | * value of 0x3 (only static rate values of 0 or 1 are handled), | ||
213 | * except on Sinai, where even old FW can handle static rate | ||
214 | * values of 2 and 3. | ||
215 | */ | ||
216 | if (dev_lim->stat_rate_support) | ||
217 | mdev->limits.stat_rate_support = dev_lim->stat_rate_support; | ||
218 | else if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT) | ||
219 | mdev->limits.stat_rate_support = 0xf; | ||
220 | else | ||
221 | mdev->limits.stat_rate_support = 0x3; | ||
194 | 222 | ||
195 | /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. | 223 | /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. |
196 | May be doable since hardware supports it for SRQ. | 224 | May be doable since hardware supports it for SRQ. |
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 2c250bc11c33..565a24b1756f 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c | |||
@@ -106,7 +106,7 @@ static int mthca_query_device(struct ib_device *ibdev, | |||
106 | props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; | 106 | props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; |
107 | props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; | 107 | props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; |
108 | props->max_srq_wr = mdev->limits.max_srq_wqes; | 108 | props->max_srq_wr = mdev->limits.max_srq_wqes; |
109 | props->max_srq_sge = mdev->limits.max_sg; | 109 | props->max_srq_sge = mdev->limits.max_srq_sge; |
110 | props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; | 110 | props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; |
111 | props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? | 111 | props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? |
112 | IB_ATOMIC_HCA : IB_ATOMIC_NONE; | 112 | IB_ATOMIC_HCA : IB_ATOMIC_NONE; |
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 2e7f52136965..6676a786d690 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h | |||
@@ -257,6 +257,8 @@ struct mthca_qp { | |||
257 | atomic_t refcount; | 257 | atomic_t refcount; |
258 | u32 qpn; | 258 | u32 qpn; |
259 | int is_direct; | 259 | int is_direct; |
260 | u8 port; /* for SQP and memfree use only */ | ||
261 | u8 alt_port; /* for memfree use only */ | ||
260 | u8 transport; | 262 | u8 transport; |
261 | u8 state; | 263 | u8 state; |
262 | u8 atomic_rd_en; | 264 | u8 atomic_rd_en; |
@@ -278,7 +280,6 @@ struct mthca_qp { | |||
278 | 280 | ||
279 | struct mthca_sqp { | 281 | struct mthca_sqp { |
280 | struct mthca_qp qp; | 282 | struct mthca_qp qp; |
281 | int port; | ||
282 | int pkey_index; | 283 | int pkey_index; |
283 | u32 qkey; | 284 | u32 qkey; |
284 | u32 send_psn; | 285 | u32 send_psn; |
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 057c8e6af87b..f37b0e367323 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c | |||
@@ -248,6 +248,9 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn, | |||
248 | return; | 248 | return; |
249 | } | 249 | } |
250 | 250 | ||
251 | if (event_type == IB_EVENT_PATH_MIG) | ||
252 | qp->port = qp->alt_port; | ||
253 | |||
251 | event.device = &dev->ib_dev; | 254 | event.device = &dev->ib_dev; |
252 | event.event = event_type; | 255 | event.event = event_type; |
253 | event.element.qp = &qp->ibqp; | 256 | event.element.qp = &qp->ibqp; |
@@ -392,10 +395,16 @@ static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr, | |||
392 | { | 395 | { |
393 | memset(ib_ah_attr, 0, sizeof *path); | 396 | memset(ib_ah_attr, 0, sizeof *path); |
394 | ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3; | 397 | ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3; |
398 | |||
399 | if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports) | ||
400 | return; | ||
401 | |||
395 | ib_ah_attr->dlid = be16_to_cpu(path->rlid); | 402 | ib_ah_attr->dlid = be16_to_cpu(path->rlid); |
396 | ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28; | 403 | ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28; |
397 | ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f; | 404 | ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f; |
398 | ib_ah_attr->static_rate = path->static_rate & 0x7; | 405 | ib_ah_attr->static_rate = mthca_rate_to_ib(dev, |
406 | path->static_rate & 0x7, | ||
407 | ib_ah_attr->port_num); | ||
399 | ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0; | 408 | ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0; |
400 | if (ib_ah_attr->ah_flags) { | 409 | if (ib_ah_attr->ah_flags) { |
401 | ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1); | 410 | ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1); |
@@ -455,8 +464,10 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m | |||
455 | qp_attr->cap.max_recv_sge = qp->rq.max_gs; | 464 | qp_attr->cap.max_recv_sge = qp->rq.max_gs; |
456 | qp_attr->cap.max_inline_data = qp->max_inline_data; | 465 | qp_attr->cap.max_inline_data = qp->max_inline_data; |
457 | 466 | ||
458 | to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); | 467 | if (qp->transport == RC || qp->transport == UC) { |
459 | to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); | 468 | to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); |
469 | to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); | ||
470 | } | ||
460 | 471 | ||
461 | qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; | 472 | qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; |
462 | qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f; | 473 | qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f; |
@@ -484,11 +495,11 @@ out: | |||
484 | } | 495 | } |
485 | 496 | ||
486 | static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, | 497 | static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, |
487 | struct mthca_qp_path *path) | 498 | struct mthca_qp_path *path, u8 port) |
488 | { | 499 | { |
489 | path->g_mylmc = ah->src_path_bits & 0x7f; | 500 | path->g_mylmc = ah->src_path_bits & 0x7f; |
490 | path->rlid = cpu_to_be16(ah->dlid); | 501 | path->rlid = cpu_to_be16(ah->dlid); |
491 | path->static_rate = !!ah->static_rate; | 502 | path->static_rate = mthca_get_rate(dev, ah->static_rate, port); |
492 | 503 | ||
493 | if (ah->ah_flags & IB_AH_GRH) { | 504 | if (ah->ah_flags & IB_AH_GRH) { |
494 | if (ah->grh.sgid_index >= dev->limits.gid_table_len) { | 505 | if (ah->grh.sgid_index >= dev->limits.gid_table_len) { |
@@ -634,7 +645,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) | |||
634 | 645 | ||
635 | if (qp->transport == MLX) | 646 | if (qp->transport == MLX) |
636 | qp_context->pri_path.port_pkey |= | 647 | qp_context->pri_path.port_pkey |= |
637 | cpu_to_be32(to_msqp(qp)->port << 24); | 648 | cpu_to_be32(qp->port << 24); |
638 | else { | 649 | else { |
639 | if (attr_mask & IB_QP_PORT) { | 650 | if (attr_mask & IB_QP_PORT) { |
640 | qp_context->pri_path.port_pkey |= | 651 | qp_context->pri_path.port_pkey |= |
@@ -657,7 +668,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) | |||
657 | } | 668 | } |
658 | 669 | ||
659 | if (attr_mask & IB_QP_AV) { | 670 | if (attr_mask & IB_QP_AV) { |
660 | if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path)) | 671 | if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path, |
672 | attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) | ||
661 | return -EINVAL; | 673 | return -EINVAL; |
662 | 674 | ||
663 | qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); | 675 | qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); |
@@ -681,7 +693,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) | |||
681 | return -EINVAL; | 693 | return -EINVAL; |
682 | } | 694 | } |
683 | 695 | ||
684 | if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path)) | 696 | if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path, |
697 | attr->alt_ah_attr.port_num)) | ||
685 | return -EINVAL; | 698 | return -EINVAL; |
686 | 699 | ||
687 | qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | | 700 | qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | |
@@ -791,6 +804,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) | |||
791 | qp->atomic_rd_en = attr->qp_access_flags; | 804 | qp->atomic_rd_en = attr->qp_access_flags; |
792 | if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) | 805 | if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) |
793 | qp->resp_depth = attr->max_dest_rd_atomic; | 806 | qp->resp_depth = attr->max_dest_rd_atomic; |
807 | if (attr_mask & IB_QP_PORT) | ||
808 | qp->port = attr->port_num; | ||
809 | if (attr_mask & IB_QP_ALT_PATH) | ||
810 | qp->alt_port = attr->alt_port_num; | ||
794 | 811 | ||
795 | if (is_sqp(dev, qp)) | 812 | if (is_sqp(dev, qp)) |
796 | store_attrs(to_msqp(qp), attr, attr_mask); | 813 | store_attrs(to_msqp(qp), attr, attr_mask); |
@@ -802,13 +819,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) | |||
802 | if (is_qp0(dev, qp)) { | 819 | if (is_qp0(dev, qp)) { |
803 | if (cur_state != IB_QPS_RTR && | 820 | if (cur_state != IB_QPS_RTR && |
804 | new_state == IB_QPS_RTR) | 821 | new_state == IB_QPS_RTR) |
805 | init_port(dev, to_msqp(qp)->port); | 822 | init_port(dev, qp->port); |
806 | 823 | ||
807 | if (cur_state != IB_QPS_RESET && | 824 | if (cur_state != IB_QPS_RESET && |
808 | cur_state != IB_QPS_ERR && | 825 | cur_state != IB_QPS_ERR && |
809 | (new_state == IB_QPS_RESET || | 826 | (new_state == IB_QPS_RESET || |
810 | new_state == IB_QPS_ERR)) | 827 | new_state == IB_QPS_ERR)) |
811 | mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status); | 828 | mthca_CLOSE_IB(dev, qp->port, &status); |
812 | } | 829 | } |
813 | 830 | ||
814 | /* | 831 | /* |
@@ -1212,6 +1229,9 @@ int mthca_alloc_qp(struct mthca_dev *dev, | |||
1212 | if (qp->qpn == -1) | 1229 | if (qp->qpn == -1) |
1213 | return -ENOMEM; | 1230 | return -ENOMEM; |
1214 | 1231 | ||
1232 | /* initialize port to zero for error-catching. */ | ||
1233 | qp->port = 0; | ||
1234 | |||
1215 | err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, | 1235 | err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, |
1216 | send_policy, qp); | 1236 | send_policy, qp); |
1217 | if (err) { | 1237 | if (err) { |
@@ -1261,7 +1281,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, | |||
1261 | if (err) | 1281 | if (err) |
1262 | goto err_out; | 1282 | goto err_out; |
1263 | 1283 | ||
1264 | sqp->port = port; | 1284 | sqp->qp.port = port; |
1265 | sqp->qp.qpn = mqpn; | 1285 | sqp->qp.qpn = mqpn; |
1266 | sqp->qp.transport = MLX; | 1286 | sqp->qp.transport = MLX; |
1267 | 1287 | ||
@@ -1404,10 +1424,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, | |||
1404 | sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; | 1424 | sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; |
1405 | sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); | 1425 | sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); |
1406 | if (!sqp->qp.ibqp.qp_num) | 1426 | if (!sqp->qp.ibqp.qp_num) |
1407 | ib_get_cached_pkey(&dev->ib_dev, sqp->port, | 1427 | ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, |
1408 | sqp->pkey_index, &pkey); | 1428 | sqp->pkey_index, &pkey); |
1409 | else | 1429 | else |
1410 | ib_get_cached_pkey(&dev->ib_dev, sqp->port, | 1430 | ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, |
1411 | wr->wr.ud.pkey_index, &pkey); | 1431 | wr->wr.ud.pkey_index, &pkey); |
1412 | sqp->ud_header.bth.pkey = cpu_to_be16(pkey); | 1432 | sqp->ud_header.bth.pkey = cpu_to_be16(pkey); |
1413 | sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); | 1433 | sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); |
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 2dd3aea05341..adcaf85355ae 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c | |||
@@ -192,7 +192,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, | |||
192 | 192 | ||
193 | /* Sanity check SRQ size before proceeding */ | 193 | /* Sanity check SRQ size before proceeding */ |
194 | if (attr->max_wr > dev->limits.max_srq_wqes || | 194 | if (attr->max_wr > dev->limits.max_srq_wqes || |
195 | attr->max_sge > dev->limits.max_sg) | 195 | attr->max_sge > dev->limits.max_srq_sge) |
196 | return -EINVAL; | 196 | return -EINVAL; |
197 | 197 | ||
198 | srq->max = attr->max_wr; | 198 | srq->max = attr->max_wr; |
@@ -660,6 +660,31 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, | |||
660 | return err; | 660 | return err; |
661 | } | 661 | } |
662 | 662 | ||
663 | int mthca_max_srq_sge(struct mthca_dev *dev) | ||
664 | { | ||
665 | if (mthca_is_memfree(dev)) | ||
666 | return dev->limits.max_sg; | ||
667 | |||
668 | /* | ||
669 | * SRQ allocations are based on powers of 2 for Tavor, | ||
670 | * (although they only need to be multiples of 16 bytes). | ||
671 | * | ||
672 | * Therefore, we need to base the max number of sg entries on | ||
673 | * the largest power of 2 descriptor size that is <= to the | ||
674 | * actual max WQE descriptor size, rather than return the | ||
675 | * max_sg value given by the firmware (which is based on WQE | ||
676 | * sizes as multiples of 16, not powers of 2). | ||
677 | * | ||
678 | * If SRQ implementation is changed for Tavor to be based on | ||
679 | * multiples of 16, the calculation below can be deleted and | ||
680 | * the FW max_sg value returned. | ||
681 | */ | ||
682 | return min_t(int, dev->limits.max_sg, | ||
683 | ((1 << (fls(dev->limits.max_desc_sz) - 1)) - | ||
684 | sizeof (struct mthca_next_seg)) / | ||
685 | sizeof (struct mthca_data_seg)); | ||
686 | } | ||
687 | |||
663 | int __devinit mthca_init_srq_table(struct mthca_dev *dev) | 688 | int __devinit mthca_init_srq_table(struct mthca_dev *dev) |
664 | { | 689 | { |
665 | int err; | 690 | int err; |
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 8d2e04cac68e..13d6d01c72c0 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig | |||
@@ -10,8 +10,9 @@ config INFINIBAND_IPOIB | |||
10 | group: <http://www.ietf.org/html.charters/ipoib-charter.html>. | 10 | group: <http://www.ietf.org/html.charters/ipoib-charter.html>. |
11 | 11 | ||
12 | config INFINIBAND_IPOIB_DEBUG | 12 | config INFINIBAND_IPOIB_DEBUG |
13 | bool "IP-over-InfiniBand debugging" | 13 | bool "IP-over-InfiniBand debugging" if EMBEDDED |
14 | depends on INFINIBAND_IPOIB | 14 | depends on INFINIBAND_IPOIB |
15 | default y | ||
15 | ---help--- | 16 | ---help--- |
16 | This option causes debugging code to be compiled into the | 17 | This option causes debugging code to be compiled into the |
17 | IPoIB driver. The output can be turned on via the | 18 | IPoIB driver. The output can be turned on via the |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index b640107fb732..12a1e0572ef2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h | |||
@@ -65,6 +65,8 @@ enum { | |||
65 | 65 | ||
66 | IPOIB_RX_RING_SIZE = 128, | 66 | IPOIB_RX_RING_SIZE = 128, |
67 | IPOIB_TX_RING_SIZE = 64, | 67 | IPOIB_TX_RING_SIZE = 64, |
68 | IPOIB_MAX_QUEUE_SIZE = 8192, | ||
69 | IPOIB_MIN_QUEUE_SIZE = 2, | ||
68 | 70 | ||
69 | IPOIB_NUM_WC = 4, | 71 | IPOIB_NUM_WC = 4, |
70 | 72 | ||
@@ -230,6 +232,9 @@ static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) | |||
230 | INFINIBAND_ALEN, sizeof(void *)); | 232 | INFINIBAND_ALEN, sizeof(void *)); |
231 | } | 233 | } |
232 | 234 | ||
235 | struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh); | ||
236 | void ipoib_neigh_free(struct ipoib_neigh *neigh); | ||
237 | |||
233 | extern struct workqueue_struct *ipoib_workqueue; | 238 | extern struct workqueue_struct *ipoib_workqueue; |
234 | 239 | ||
235 | /* functions */ | 240 | /* functions */ |
@@ -329,6 +334,8 @@ static inline void ipoib_unregister_debugfs(void) { } | |||
329 | #define ipoib_warn(priv, format, arg...) \ | 334 | #define ipoib_warn(priv, format, arg...) \ |
330 | ipoib_printk(KERN_WARNING, priv, format , ## arg) | 335 | ipoib_printk(KERN_WARNING, priv, format , ## arg) |
331 | 336 | ||
337 | extern int ipoib_sendq_size; | ||
338 | extern int ipoib_recvq_size; | ||
332 | 339 | ||
333 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | 340 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
334 | extern int ipoib_debug_level; | 341 | extern int ipoib_debug_level; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 685258e34034..5dde380e8dbe 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c | |||
@@ -213,7 +213,7 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr) | |||
213 | gid_buf, path.pathrec.dlid ? "yes" : "no"); | 213 | gid_buf, path.pathrec.dlid ? "yes" : "no"); |
214 | 214 | ||
215 | if (path.pathrec.dlid) { | 215 | if (path.pathrec.dlid) { |
216 | rate = ib_sa_rate_enum_to_int(path.pathrec.rate) * 25; | 216 | rate = ib_rate_to_mult(path.pathrec.rate) * 25; |
217 | 217 | ||
218 | seq_printf(file, | 218 | seq_printf(file, |
219 | " DLID: 0x%04x\n" | 219 | " DLID: 0x%04x\n" |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index ed65202878d8..a54da42849ae 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c | |||
@@ -161,7 +161,7 @@ static int ipoib_ib_post_receives(struct net_device *dev) | |||
161 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 161 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
162 | int i; | 162 | int i; |
163 | 163 | ||
164 | for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { | 164 | for (i = 0; i < ipoib_recvq_size; ++i) { |
165 | if (ipoib_alloc_rx_skb(dev, i)) { | 165 | if (ipoib_alloc_rx_skb(dev, i)) { |
166 | ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); | 166 | ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); |
167 | return -ENOMEM; | 167 | return -ENOMEM; |
@@ -187,7 +187,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, | |||
187 | if (wr_id & IPOIB_OP_RECV) { | 187 | if (wr_id & IPOIB_OP_RECV) { |
188 | wr_id &= ~IPOIB_OP_RECV; | 188 | wr_id &= ~IPOIB_OP_RECV; |
189 | 189 | ||
190 | if (wr_id < IPOIB_RX_RING_SIZE) { | 190 | if (wr_id < ipoib_recvq_size) { |
191 | struct sk_buff *skb = priv->rx_ring[wr_id].skb; | 191 | struct sk_buff *skb = priv->rx_ring[wr_id].skb; |
192 | dma_addr_t addr = priv->rx_ring[wr_id].mapping; | 192 | dma_addr_t addr = priv->rx_ring[wr_id].mapping; |
193 | 193 | ||
@@ -252,9 +252,9 @@ static void ipoib_ib_handle_wc(struct net_device *dev, | |||
252 | struct ipoib_tx_buf *tx_req; | 252 | struct ipoib_tx_buf *tx_req; |
253 | unsigned long flags; | 253 | unsigned long flags; |
254 | 254 | ||
255 | if (wr_id >= IPOIB_TX_RING_SIZE) { | 255 | if (wr_id >= ipoib_sendq_size) { |
256 | ipoib_warn(priv, "completion event with wrid %d (> %d)\n", | 256 | ipoib_warn(priv, "completion event with wrid %d (> %d)\n", |
257 | wr_id, IPOIB_TX_RING_SIZE); | 257 | wr_id, ipoib_sendq_size); |
258 | return; | 258 | return; |
259 | } | 259 | } |
260 | 260 | ||
@@ -275,7 +275,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev, | |||
275 | spin_lock_irqsave(&priv->tx_lock, flags); | 275 | spin_lock_irqsave(&priv->tx_lock, flags); |
276 | ++priv->tx_tail; | 276 | ++priv->tx_tail; |
277 | if (netif_queue_stopped(dev) && | 277 | if (netif_queue_stopped(dev) && |
278 | priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2) | 278 | priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) |
279 | netif_wake_queue(dev); | 279 | netif_wake_queue(dev); |
280 | spin_unlock_irqrestore(&priv->tx_lock, flags); | 280 | spin_unlock_irqrestore(&priv->tx_lock, flags); |
281 | 281 | ||
@@ -344,13 +344,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, | |||
344 | * means we have to make sure everything is properly recorded and | 344 | * means we have to make sure everything is properly recorded and |
345 | * our state is consistent before we call post_send(). | 345 | * our state is consistent before we call post_send(). |
346 | */ | 346 | */ |
347 | tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)]; | 347 | tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)]; |
348 | tx_req->skb = skb; | 348 | tx_req->skb = skb; |
349 | addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, | 349 | addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, |
350 | DMA_TO_DEVICE); | 350 | DMA_TO_DEVICE); |
351 | pci_unmap_addr_set(tx_req, mapping, addr); | 351 | pci_unmap_addr_set(tx_req, mapping, addr); |
352 | 352 | ||
353 | if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1), | 353 | if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), |
354 | address->ah, qpn, addr, skb->len))) { | 354 | address->ah, qpn, addr, skb->len))) { |
355 | ipoib_warn(priv, "post_send failed\n"); | 355 | ipoib_warn(priv, "post_send failed\n"); |
356 | ++priv->stats.tx_errors; | 356 | ++priv->stats.tx_errors; |
@@ -363,7 +363,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, | |||
363 | address->last_send = priv->tx_head; | 363 | address->last_send = priv->tx_head; |
364 | ++priv->tx_head; | 364 | ++priv->tx_head; |
365 | 365 | ||
366 | if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) { | 366 | if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { |
367 | ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); | 367 | ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); |
368 | netif_stop_queue(dev); | 368 | netif_stop_queue(dev); |
369 | } | 369 | } |
@@ -488,7 +488,7 @@ static int recvs_pending(struct net_device *dev) | |||
488 | int pending = 0; | 488 | int pending = 0; |
489 | int i; | 489 | int i; |
490 | 490 | ||
491 | for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) | 491 | for (i = 0; i < ipoib_recvq_size; ++i) |
492 | if (priv->rx_ring[i].skb) | 492 | if (priv->rx_ring[i].skb) |
493 | ++pending; | 493 | ++pending; |
494 | 494 | ||
@@ -527,7 +527,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) | |||
527 | */ | 527 | */ |
528 | while ((int) priv->tx_tail - (int) priv->tx_head < 0) { | 528 | while ((int) priv->tx_tail - (int) priv->tx_head < 0) { |
529 | tx_req = &priv->tx_ring[priv->tx_tail & | 529 | tx_req = &priv->tx_ring[priv->tx_tail & |
530 | (IPOIB_TX_RING_SIZE - 1)]; | 530 | (ipoib_sendq_size - 1)]; |
531 | dma_unmap_single(priv->ca->dma_device, | 531 | dma_unmap_single(priv->ca->dma_device, |
532 | pci_unmap_addr(tx_req, mapping), | 532 | pci_unmap_addr(tx_req, mapping), |
533 | tx_req->skb->len, | 533 | tx_req->skb->len, |
@@ -536,7 +536,7 @@ int ipoib_ib_dev_stop(struct net_device *dev) | |||
536 | ++priv->tx_tail; | 536 | ++priv->tx_tail; |
537 | } | 537 | } |
538 | 538 | ||
539 | for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) | 539 | for (i = 0; i < ipoib_recvq_size; ++i) |
540 | if (priv->rx_ring[i].skb) { | 540 | if (priv->rx_ring[i].skb) { |
541 | dma_unmap_single(priv->ca->dma_device, | 541 | dma_unmap_single(priv->ca->dma_device, |
542 | pci_unmap_addr(&priv->rx_ring[i], | 542 | pci_unmap_addr(&priv->rx_ring[i], |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9b0bd7c746ca..cb078a7d0bf5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/init.h> | 41 | #include <linux/init.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/vmalloc.h> | 43 | #include <linux/vmalloc.h> |
44 | #include <linux/kernel.h> | ||
44 | 45 | ||
45 | #include <linux/if_arp.h> /* For ARPHRD_xxx */ | 46 | #include <linux/if_arp.h> /* For ARPHRD_xxx */ |
46 | 47 | ||
@@ -53,6 +54,14 @@ MODULE_AUTHOR("Roland Dreier"); | |||
53 | MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); | 54 | MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); |
54 | MODULE_LICENSE("Dual BSD/GPL"); | 55 | MODULE_LICENSE("Dual BSD/GPL"); |
55 | 56 | ||
57 | int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; | ||
58 | int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; | ||
59 | |||
60 | module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); | ||
61 | MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); | ||
62 | module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); | ||
63 | MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); | ||
64 | |||
56 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | 65 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
57 | int ipoib_debug_level; | 66 | int ipoib_debug_level; |
58 | 67 | ||
@@ -252,8 +261,8 @@ static void path_free(struct net_device *dev, struct ipoib_path *path) | |||
252 | */ | 261 | */ |
253 | if (neigh->ah) | 262 | if (neigh->ah) |
254 | ipoib_put_ah(neigh->ah); | 263 | ipoib_put_ah(neigh->ah); |
255 | *to_ipoib_neigh(neigh->neighbour) = NULL; | 264 | |
256 | kfree(neigh); | 265 | ipoib_neigh_free(neigh); |
257 | } | 266 | } |
258 | 267 | ||
259 | spin_unlock_irqrestore(&priv->lock, flags); | 268 | spin_unlock_irqrestore(&priv->lock, flags); |
@@ -327,9 +336,8 @@ void ipoib_flush_paths(struct net_device *dev) | |||
327 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 336 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
328 | struct ipoib_path *path, *tp; | 337 | struct ipoib_path *path, *tp; |
329 | LIST_HEAD(remove_list); | 338 | LIST_HEAD(remove_list); |
330 | unsigned long flags; | ||
331 | 339 | ||
332 | spin_lock_irqsave(&priv->lock, flags); | 340 | spin_lock_irq(&priv->lock); |
333 | 341 | ||
334 | list_splice(&priv->path_list, &remove_list); | 342 | list_splice(&priv->path_list, &remove_list); |
335 | INIT_LIST_HEAD(&priv->path_list); | 343 | INIT_LIST_HEAD(&priv->path_list); |
@@ -337,14 +345,15 @@ void ipoib_flush_paths(struct net_device *dev) | |||
337 | list_for_each_entry(path, &remove_list, list) | 345 | list_for_each_entry(path, &remove_list, list) |
338 | rb_erase(&path->rb_node, &priv->path_tree); | 346 | rb_erase(&path->rb_node, &priv->path_tree); |
339 | 347 | ||
340 | spin_unlock_irqrestore(&priv->lock, flags); | ||
341 | |||
342 | list_for_each_entry_safe(path, tp, &remove_list, list) { | 348 | list_for_each_entry_safe(path, tp, &remove_list, list) { |
343 | if (path->query) | 349 | if (path->query) |
344 | ib_sa_cancel_query(path->query_id, path->query); | 350 | ib_sa_cancel_query(path->query_id, path->query); |
351 | spin_unlock_irq(&priv->lock); | ||
345 | wait_for_completion(&path->done); | 352 | wait_for_completion(&path->done); |
346 | path_free(dev, path); | 353 | path_free(dev, path); |
354 | spin_lock_irq(&priv->lock); | ||
347 | } | 355 | } |
356 | spin_unlock_irq(&priv->lock); | ||
348 | } | 357 | } |
349 | 358 | ||
350 | static void path_rec_completion(int status, | 359 | static void path_rec_completion(int status, |
@@ -373,16 +382,9 @@ static void path_rec_completion(int status, | |||
373 | struct ib_ah_attr av = { | 382 | struct ib_ah_attr av = { |
374 | .dlid = be16_to_cpu(pathrec->dlid), | 383 | .dlid = be16_to_cpu(pathrec->dlid), |
375 | .sl = pathrec->sl, | 384 | .sl = pathrec->sl, |
376 | .port_num = priv->port | 385 | .port_num = priv->port, |
386 | .static_rate = pathrec->rate | ||
377 | }; | 387 | }; |
378 | int path_rate = ib_sa_rate_enum_to_int(pathrec->rate); | ||
379 | |||
380 | if (path_rate > 0 && priv->local_rate > path_rate) | ||
381 | av.static_rate = (priv->local_rate - 1) / path_rate; | ||
382 | |||
383 | ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n", | ||
384 | av.static_rate, priv->local_rate, | ||
385 | ib_sa_rate_enum_to_int(pathrec->rate)); | ||
386 | 388 | ||
387 | ah = ipoib_create_ah(dev, priv->pd, &av); | 389 | ah = ipoib_create_ah(dev, priv->pd, &av); |
388 | } | 390 | } |
@@ -481,7 +483,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) | |||
481 | struct ipoib_path *path; | 483 | struct ipoib_path *path; |
482 | struct ipoib_neigh *neigh; | 484 | struct ipoib_neigh *neigh; |
483 | 485 | ||
484 | neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); | 486 | neigh = ipoib_neigh_alloc(skb->dst->neighbour); |
485 | if (!neigh) { | 487 | if (!neigh) { |
486 | ++priv->stats.tx_dropped; | 488 | ++priv->stats.tx_dropped; |
487 | dev_kfree_skb_any(skb); | 489 | dev_kfree_skb_any(skb); |
@@ -489,8 +491,6 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) | |||
489 | } | 491 | } |
490 | 492 | ||
491 | skb_queue_head_init(&neigh->queue); | 493 | skb_queue_head_init(&neigh->queue); |
492 | neigh->neighbour = skb->dst->neighbour; | ||
493 | *to_ipoib_neigh(skb->dst->neighbour) = neigh; | ||
494 | 494 | ||
495 | /* | 495 | /* |
496 | * We can only be called from ipoib_start_xmit, so we're | 496 | * We can only be called from ipoib_start_xmit, so we're |
@@ -503,7 +503,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) | |||
503 | path = path_rec_create(dev, | 503 | path = path_rec_create(dev, |
504 | (union ib_gid *) (skb->dst->neighbour->ha + 4)); | 504 | (union ib_gid *) (skb->dst->neighbour->ha + 4)); |
505 | if (!path) | 505 | if (!path) |
506 | goto err; | 506 | goto err_path; |
507 | 507 | ||
508 | __path_add(dev, path); | 508 | __path_add(dev, path); |
509 | } | 509 | } |
@@ -521,17 +521,17 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) | |||
521 | __skb_queue_tail(&neigh->queue, skb); | 521 | __skb_queue_tail(&neigh->queue, skb); |
522 | 522 | ||
523 | if (!path->query && path_rec_start(dev, path)) | 523 | if (!path->query && path_rec_start(dev, path)) |
524 | goto err; | 524 | goto err_list; |
525 | } | 525 | } |
526 | 526 | ||
527 | spin_unlock(&priv->lock); | 527 | spin_unlock(&priv->lock); |
528 | return; | 528 | return; |
529 | 529 | ||
530 | err: | 530 | err_list: |
531 | *to_ipoib_neigh(skb->dst->neighbour) = NULL; | ||
532 | list_del(&neigh->list); | 531 | list_del(&neigh->list); |
533 | kfree(neigh); | ||
534 | 532 | ||
533 | err_path: | ||
534 | ipoib_neigh_free(neigh); | ||
535 | ++priv->stats.tx_dropped; | 535 | ++priv->stats.tx_dropped; |
536 | dev_kfree_skb_any(skb); | 536 | dev_kfree_skb_any(skb); |
537 | 537 | ||
@@ -763,8 +763,7 @@ static void ipoib_neigh_destructor(struct neighbour *n) | |||
763 | if (neigh->ah) | 763 | if (neigh->ah) |
764 | ah = neigh->ah; | 764 | ah = neigh->ah; |
765 | list_del(&neigh->list); | 765 | list_del(&neigh->list); |
766 | *to_ipoib_neigh(n) = NULL; | 766 | ipoib_neigh_free(neigh); |
767 | kfree(neigh); | ||
768 | } | 767 | } |
769 | 768 | ||
770 | spin_unlock_irqrestore(&priv->lock, flags); | 769 | spin_unlock_irqrestore(&priv->lock, flags); |
@@ -773,6 +772,26 @@ static void ipoib_neigh_destructor(struct neighbour *n) | |||
773 | ipoib_put_ah(ah); | 772 | ipoib_put_ah(ah); |
774 | } | 773 | } |
775 | 774 | ||
775 | struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) | ||
776 | { | ||
777 | struct ipoib_neigh *neigh; | ||
778 | |||
779 | neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); | ||
780 | if (!neigh) | ||
781 | return NULL; | ||
782 | |||
783 | neigh->neighbour = neighbour; | ||
784 | *to_ipoib_neigh(neighbour) = neigh; | ||
785 | |||
786 | return neigh; | ||
787 | } | ||
788 | |||
789 | void ipoib_neigh_free(struct ipoib_neigh *neigh) | ||
790 | { | ||
791 | *to_ipoib_neigh(neigh->neighbour) = NULL; | ||
792 | kfree(neigh); | ||
793 | } | ||
794 | |||
776 | static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) | 795 | static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) |
777 | { | 796 | { |
778 | parms->neigh_destructor = ipoib_neigh_destructor; | 797 | parms->neigh_destructor = ipoib_neigh_destructor; |
@@ -785,20 +804,19 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) | |||
785 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 804 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
786 | 805 | ||
787 | /* Allocate RX/TX "rings" to hold queued skbs */ | 806 | /* Allocate RX/TX "rings" to hold queued skbs */ |
788 | 807 | priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, | |
789 | priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf), | ||
790 | GFP_KERNEL); | 808 | GFP_KERNEL); |
791 | if (!priv->rx_ring) { | 809 | if (!priv->rx_ring) { |
792 | printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", | 810 | printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", |
793 | ca->name, IPOIB_RX_RING_SIZE); | 811 | ca->name, ipoib_recvq_size); |
794 | goto out; | 812 | goto out; |
795 | } | 813 | } |
796 | 814 | ||
797 | priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), | 815 | priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, |
798 | GFP_KERNEL); | 816 | GFP_KERNEL); |
799 | if (!priv->tx_ring) { | 817 | if (!priv->tx_ring) { |
800 | printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", | 818 | printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", |
801 | ca->name, IPOIB_TX_RING_SIZE); | 819 | ca->name, ipoib_sendq_size); |
802 | goto out_rx_ring_cleanup; | 820 | goto out_rx_ring_cleanup; |
803 | } | 821 | } |
804 | 822 | ||
@@ -866,7 +884,7 @@ static void ipoib_setup(struct net_device *dev) | |||
866 | dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; | 884 | dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; |
867 | dev->addr_len = INFINIBAND_ALEN; | 885 | dev->addr_len = INFINIBAND_ALEN; |
868 | dev->type = ARPHRD_INFINIBAND; | 886 | dev->type = ARPHRD_INFINIBAND; |
869 | dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; | 887 | dev->tx_queue_len = ipoib_sendq_size * 2; |
870 | dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; | 888 | dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; |
871 | 889 | ||
872 | /* MTU will be reset when mcast join happens */ | 890 | /* MTU will be reset when mcast join happens */ |
@@ -1118,6 +1136,14 @@ static int __init ipoib_init_module(void) | |||
1118 | { | 1136 | { |
1119 | int ret; | 1137 | int ret; |
1120 | 1138 | ||
1139 | ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size); | ||
1140 | ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE); | ||
1141 | ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE); | ||
1142 | |||
1143 | ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size); | ||
1144 | ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE); | ||
1145 | ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE); | ||
1146 | |||
1121 | ret = ipoib_register_debugfs(); | 1147 | ret = ipoib_register_debugfs(); |
1122 | if (ret) | 1148 | if (ret) |
1123 | return ret; | 1149 | return ret; |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 93c462eaf4fd..1dae4b238252 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |||
@@ -114,8 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) | |||
114 | */ | 114 | */ |
115 | if (neigh->ah) | 115 | if (neigh->ah) |
116 | ipoib_put_ah(neigh->ah); | 116 | ipoib_put_ah(neigh->ah); |
117 | *to_ipoib_neigh(neigh->neighbour) = NULL; | 117 | ipoib_neigh_free(neigh); |
118 | kfree(neigh); | ||
119 | } | 118 | } |
120 | 119 | ||
121 | spin_unlock_irqrestore(&priv->lock, flags); | 120 | spin_unlock_irqrestore(&priv->lock, flags); |
@@ -251,6 +250,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, | |||
251 | .port_num = priv->port, | 250 | .port_num = priv->port, |
252 | .sl = mcast->mcmember.sl, | 251 | .sl = mcast->mcmember.sl, |
253 | .ah_flags = IB_AH_GRH, | 252 | .ah_flags = IB_AH_GRH, |
253 | .static_rate = mcast->mcmember.rate, | ||
254 | .grh = { | 254 | .grh = { |
255 | .flow_label = be32_to_cpu(mcast->mcmember.flow_label), | 255 | .flow_label = be32_to_cpu(mcast->mcmember.flow_label), |
256 | .hop_limit = mcast->mcmember.hop_limit, | 256 | .hop_limit = mcast->mcmember.hop_limit, |
@@ -258,17 +258,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, | |||
258 | .traffic_class = mcast->mcmember.traffic_class | 258 | .traffic_class = mcast->mcmember.traffic_class |
259 | } | 259 | } |
260 | }; | 260 | }; |
261 | int path_rate = ib_sa_rate_enum_to_int(mcast->mcmember.rate); | ||
262 | |||
263 | av.grh.dgid = mcast->mcmember.mgid; | 261 | av.grh.dgid = mcast->mcmember.mgid; |
264 | 262 | ||
265 | if (path_rate > 0 && priv->local_rate > path_rate) | ||
266 | av.static_rate = (priv->local_rate - 1) / path_rate; | ||
267 | |||
268 | ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n", | ||
269 | av.static_rate, priv->local_rate, | ||
270 | ib_sa_rate_enum_to_int(mcast->mcmember.rate)); | ||
271 | |||
272 | ah = ipoib_create_ah(dev, priv->pd, &av); | 263 | ah = ipoib_create_ah(dev, priv->pd, &av); |
273 | if (!ah) { | 264 | if (!ah) { |
274 | ipoib_warn(priv, "ib_address_create failed\n"); | 265 | ipoib_warn(priv, "ib_address_create failed\n"); |
@@ -618,6 +609,22 @@ int ipoib_mcast_start_thread(struct net_device *dev) | |||
618 | return 0; | 609 | return 0; |
619 | } | 610 | } |
620 | 611 | ||
612 | static void wait_for_mcast_join(struct ipoib_dev_priv *priv, | ||
613 | struct ipoib_mcast *mcast) | ||
614 | { | ||
615 | spin_lock_irq(&priv->lock); | ||
616 | if (mcast && mcast->query) { | ||
617 | ib_sa_cancel_query(mcast->query_id, mcast->query); | ||
618 | mcast->query = NULL; | ||
619 | spin_unlock_irq(&priv->lock); | ||
620 | ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", | ||
621 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
622 | wait_for_completion(&mcast->done); | ||
623 | } | ||
624 | else | ||
625 | spin_unlock_irq(&priv->lock); | ||
626 | } | ||
627 | |||
621 | int ipoib_mcast_stop_thread(struct net_device *dev, int flush) | 628 | int ipoib_mcast_stop_thread(struct net_device *dev, int flush) |
622 | { | 629 | { |
623 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 630 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
@@ -637,28 +644,10 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush) | |||
637 | if (flush) | 644 | if (flush) |
638 | flush_workqueue(ipoib_workqueue); | 645 | flush_workqueue(ipoib_workqueue); |
639 | 646 | ||
640 | spin_lock_irq(&priv->lock); | 647 | wait_for_mcast_join(priv, priv->broadcast); |
641 | if (priv->broadcast && priv->broadcast->query) { | ||
642 | ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query); | ||
643 | priv->broadcast->query = NULL; | ||
644 | spin_unlock_irq(&priv->lock); | ||
645 | ipoib_dbg_mcast(priv, "waiting for bcast\n"); | ||
646 | wait_for_completion(&priv->broadcast->done); | ||
647 | } else | ||
648 | spin_unlock_irq(&priv->lock); | ||
649 | 648 | ||
650 | list_for_each_entry(mcast, &priv->multicast_list, list) { | 649 | list_for_each_entry(mcast, &priv->multicast_list, list) |
651 | spin_lock_irq(&priv->lock); | 650 | wait_for_mcast_join(priv, mcast); |
652 | if (mcast->query) { | ||
653 | ib_sa_cancel_query(mcast->query_id, mcast->query); | ||
654 | mcast->query = NULL; | ||
655 | spin_unlock_irq(&priv->lock); | ||
656 | ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n", | ||
657 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | ||
658 | wait_for_completion(&mcast->done); | ||
659 | } else | ||
660 | spin_unlock_irq(&priv->lock); | ||
661 | } | ||
662 | 651 | ||
663 | return 0; | 652 | return 0; |
664 | } | 653 | } |
@@ -772,13 +761,11 @@ out: | |||
772 | if (skb->dst && | 761 | if (skb->dst && |
773 | skb->dst->neighbour && | 762 | skb->dst->neighbour && |
774 | !*to_ipoib_neigh(skb->dst->neighbour)) { | 763 | !*to_ipoib_neigh(skb->dst->neighbour)) { |
775 | struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); | 764 | struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour); |
776 | 765 | ||
777 | if (neigh) { | 766 | if (neigh) { |
778 | kref_get(&mcast->ah->ref); | 767 | kref_get(&mcast->ah->ref); |
779 | neigh->ah = mcast->ah; | 768 | neigh->ah = mcast->ah; |
780 | neigh->neighbour = skb->dst->neighbour; | ||
781 | *to_ipoib_neigh(skb->dst->neighbour) = neigh; | ||
782 | list_add_tail(&neigh->list, &mcast->neigh_list); | 769 | list_add_tail(&neigh->list, &mcast->neigh_list); |
783 | } | 770 | } |
784 | } | 771 | } |
@@ -913,6 +900,7 @@ void ipoib_mcast_restart_task(void *dev_ptr) | |||
913 | 900 | ||
914 | /* We have to cancel outside of the spinlock */ | 901 | /* We have to cancel outside of the spinlock */ |
915 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | 902 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { |
903 | wait_for_mcast_join(priv, mcast); | ||
916 | ipoib_mcast_leave(mcast->dev, mcast); | 904 | ipoib_mcast_leave(mcast->dev, mcast); |
917 | ipoib_mcast_free(mcast); | 905 | ipoib_mcast_free(mcast); |
918 | } | 906 | } |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 5f0388027b25..1d49d1643c59 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c | |||
@@ -159,8 +159,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
159 | struct ipoib_dev_priv *priv = netdev_priv(dev); | 159 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
160 | struct ib_qp_init_attr init_attr = { | 160 | struct ib_qp_init_attr init_attr = { |
161 | .cap = { | 161 | .cap = { |
162 | .max_send_wr = IPOIB_TX_RING_SIZE, | 162 | .max_send_wr = ipoib_sendq_size, |
163 | .max_recv_wr = IPOIB_RX_RING_SIZE, | 163 | .max_recv_wr = ipoib_recvq_size, |
164 | .max_send_sge = 1, | 164 | .max_send_sge = 1, |
165 | .max_recv_sge = 1 | 165 | .max_recv_sge = 1 |
166 | }, | 166 | }, |
@@ -175,7 +175,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) | |||
175 | } | 175 | } |
176 | 176 | ||
177 | priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, | 177 | priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, |
178 | IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1); | 178 | ipoib_sendq_size + ipoib_recvq_size + 1); |
179 | if (IS_ERR(priv->cq)) { | 179 | if (IS_ERR(priv->cq)) { |
180 | printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); | 180 | printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); |
181 | goto out_free_pd; | 181 | goto out_free_pd; |
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index fd8a95a9c5d3..5bb55742ada6 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c | |||
@@ -617,6 +617,14 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, | |||
617 | scmnd->sc_data_direction); | 617 | scmnd->sc_data_direction); |
618 | } | 618 | } |
619 | 619 | ||
620 | static void srp_remove_req(struct srp_target_port *target, struct srp_request *req, | ||
621 | int index) | ||
622 | { | ||
623 | list_del(&req->list); | ||
624 | req->next = target->req_head; | ||
625 | target->req_head = index; | ||
626 | } | ||
627 | |||
620 | static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) | 628 | static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) |
621 | { | 629 | { |
622 | struct srp_request *req; | 630 | struct srp_request *req; |
@@ -664,9 +672,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) | |||
664 | scmnd->host_scribble = (void *) -1L; | 672 | scmnd->host_scribble = (void *) -1L; |
665 | scmnd->scsi_done(scmnd); | 673 | scmnd->scsi_done(scmnd); |
666 | 674 | ||
667 | list_del(&req->list); | 675 | srp_remove_req(target, req, rsp->tag & ~SRP_TAG_TSK_MGMT); |
668 | req->next = target->req_head; | ||
669 | target->req_head = rsp->tag & ~SRP_TAG_TSK_MGMT; | ||
670 | } else | 676 | } else |
671 | req->cmd_done = 1; | 677 | req->cmd_done = 1; |
672 | } | 678 | } |
@@ -1188,12 +1194,10 @@ static int srp_send_tsk_mgmt(struct scsi_cmnd *scmnd, u8 func) | |||
1188 | spin_lock_irq(target->scsi_host->host_lock); | 1194 | spin_lock_irq(target->scsi_host->host_lock); |
1189 | 1195 | ||
1190 | if (req->cmd_done) { | 1196 | if (req->cmd_done) { |
1191 | list_del(&req->list); | 1197 | srp_remove_req(target, req, req_index); |
1192 | req->next = target->req_head; | ||
1193 | target->req_head = req_index; | ||
1194 | |||
1195 | scmnd->scsi_done(scmnd); | 1198 | scmnd->scsi_done(scmnd); |
1196 | } else if (!req->tsk_status) { | 1199 | } else if (!req->tsk_status) { |
1200 | srp_remove_req(target, req, req_index); | ||
1197 | scmnd->result = DID_ABORT << 16; | 1201 | scmnd->result = DID_ABORT << 16; |
1198 | ret = SUCCESS; | 1202 | ret = SUCCESS; |
1199 | } | 1203 | } |
@@ -1434,6 +1438,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) | |||
1434 | p = match_strdup(args); | 1438 | p = match_strdup(args); |
1435 | if (strlen(p) != 32) { | 1439 | if (strlen(p) != 32) { |
1436 | printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); | 1440 | printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); |
1441 | kfree(p); | ||
1437 | goto out; | 1442 | goto out; |
1438 | } | 1443 | } |
1439 | 1444 | ||