aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/cache.c2
-rw-r--r--drivers/infiniband/core/mad.c7
-rw-r--r--drivers/infiniband/core/verbs.c34
-rw-r--r--drivers/infiniband/hw/ipath/Kconfig16
-rw-r--r--drivers/infiniband/hw/ipath/Makefile36
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h616
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c295
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h96
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c367
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c1983
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c613
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c1910
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c605
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ht400.c1586
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c951
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c841
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h883
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c236
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.c1515
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.h181
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c1352
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c383
-rw-r--r--drivers/infiniband/hw/ipath/ipath_pe800.c1247
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c913
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c1857
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h446
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c552
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c273
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c303
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c778
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c645
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c621
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c207
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c1222
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h692
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c333
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_x86_64.c157
-rw-r--r--drivers/infiniband/hw/ipath/ips_common.h263
-rw-r--r--drivers/infiniband/hw/ipath/verbs_debug.h107
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig11
-rw-r--r--drivers/infiniband/hw/mthca/Makefile4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c100
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h23
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c42
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c28
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c46
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c27
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c22
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c88
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c58
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c6
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c19
61 files changed, 25497 insertions, 125 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index bdf0891a92dd..afc612b8577d 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -30,6 +30,7 @@ config INFINIBAND_USER_ACCESS
30 <http://www.openib.org>. 30 <http://www.openib.org>.
31 31
32source "drivers/infiniband/hw/mthca/Kconfig" 32source "drivers/infiniband/hw/mthca/Kconfig"
33source "drivers/infiniband/hw/ipath/Kconfig"
33 34
34source "drivers/infiniband/ulp/ipoib/Kconfig" 35source "drivers/infiniband/ulp/ipoib/Kconfig"
35 36
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index a43fb34cca94..eea27322a22d 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,4 +1,5 @@
1obj-$(CONFIG_INFINIBAND) += core/ 1obj-$(CONFIG_INFINIBAND) += core/
2obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ 2obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
3obj-$(CONFIG_IPATH_CORE) += hw/ipath/
3obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 4obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
4obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ 5obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index c57a3871184c..50364c0b090c 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -302,7 +302,7 @@ static void ib_cache_setup_one(struct ib_device *device)
302 kmalloc(sizeof *device->cache.pkey_cache * 302 kmalloc(sizeof *device->cache.pkey_cache *
303 (end_port(device) - start_port(device) + 1), GFP_KERNEL); 303 (end_port(device) - start_port(device) + 1), GFP_KERNEL);
304 device->cache.gid_cache = 304 device->cache.gid_cache =
305 kmalloc(sizeof *device->cache.pkey_cache * 305 kmalloc(sizeof *device->cache.gid_cache *
306 (end_port(device) - start_port(device) + 1), GFP_KERNEL); 306 (end_port(device) - start_port(device) + 1), GFP_KERNEL);
307 307
308 if (!device->cache.pkey_cache || !device->cache.gid_cache) { 308 if (!device->cache.pkey_cache || !device->cache.gid_cache) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ba54c856b0e5..469b6923a2e2 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -228,10 +228,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
228 goto error1; 228 goto error1;
229 } 229 }
230 /* Make sure class supplied is consistent with RMPP */ 230 /* Make sure class supplied is consistent with RMPP */
231 if (ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { 231 if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
232 if (!rmpp_version)
233 goto error1;
234 } else {
235 if (rmpp_version) 232 if (rmpp_version)
236 goto error1; 233 goto error1;
237 } 234 }
@@ -2311,6 +2308,7 @@ static void local_completions(void *data)
2311 local = list_entry(mad_agent_priv->local_list.next, 2308 local = list_entry(mad_agent_priv->local_list.next,
2312 struct ib_mad_local_private, 2309 struct ib_mad_local_private,
2313 completion_list); 2310 completion_list);
2311 list_del(&local->completion_list);
2314 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 2312 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2315 if (local->mad_priv) { 2313 if (local->mad_priv) {
2316 recv_mad_agent = local->recv_mad_agent; 2314 recv_mad_agent = local->recv_mad_agent;
@@ -2362,7 +2360,6 @@ local_send_completion:
2362 &mad_send_wc); 2360 &mad_send_wc);
2363 2361
2364 spin_lock_irqsave(&mad_agent_priv->lock, flags); 2362 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2365 list_del(&local->completion_list);
2366 atomic_dec(&mad_agent_priv->refcount); 2363 atomic_dec(&mad_agent_priv->refcount);
2367 if (!recv) 2364 if (!recv)
2368 kmem_cache_free(ib_mad_cache, local->mad_priv); 2365 kmem_cache_free(ib_mad_cache, local->mad_priv);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index cae0845f472a..b78e7dc69330 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -45,6 +45,40 @@
45#include <rdma/ib_verbs.h> 45#include <rdma/ib_verbs.h>
46#include <rdma/ib_cache.h> 46#include <rdma/ib_cache.h>
47 47
48int ib_rate_to_mult(enum ib_rate rate)
49{
50 switch (rate) {
51 case IB_RATE_2_5_GBPS: return 1;
52 case IB_RATE_5_GBPS: return 2;
53 case IB_RATE_10_GBPS: return 4;
54 case IB_RATE_20_GBPS: return 8;
55 case IB_RATE_30_GBPS: return 12;
56 case IB_RATE_40_GBPS: return 16;
57 case IB_RATE_60_GBPS: return 24;
58 case IB_RATE_80_GBPS: return 32;
59 case IB_RATE_120_GBPS: return 48;
60 default: return -1;
61 }
62}
63EXPORT_SYMBOL(ib_rate_to_mult);
64
65enum ib_rate mult_to_ib_rate(int mult)
66{
67 switch (mult) {
68 case 1: return IB_RATE_2_5_GBPS;
69 case 2: return IB_RATE_5_GBPS;
70 case 4: return IB_RATE_10_GBPS;
71 case 8: return IB_RATE_20_GBPS;
72 case 12: return IB_RATE_30_GBPS;
73 case 16: return IB_RATE_40_GBPS;
74 case 24: return IB_RATE_60_GBPS;
75 case 32: return IB_RATE_80_GBPS;
76 case 48: return IB_RATE_120_GBPS;
77 default: return IB_RATE_PORT_CURRENT;
78 }
79}
80EXPORT_SYMBOL(mult_to_ib_rate);
81
48/* Protection domains */ 82/* Protection domains */
49 83
50struct ib_pd *ib_alloc_pd(struct ib_device *device) 84struct ib_pd *ib_alloc_pd(struct ib_device *device)
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
new file mode 100644
index 000000000000..9ea67c409b6d
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -0,0 +1,16 @@
1config IPATH_CORE
2 tristate "PathScale InfiniPath Driver"
3 depends on 64BIT && PCI_MSI && NET
4 ---help---
5 This is a low-level driver for PathScale InfiniPath host channel
6 adapters (HCAs) based on the HT-400 and PE-800 chips.
7
8config INFINIBAND_IPATH
9 tristate "PathScale InfiniPath Verbs Driver"
10 depends on IPATH_CORE && INFINIBAND
11 ---help---
12 This is a driver that provides InfiniBand verbs support for
13 PathScale InfiniPath host channel adapters (HCAs). This
14 allows these devices to be used with both kernel upper level
15 protocols such as IP-over-InfiniBand as well as with userspace
16 applications (in conjunction with InfiniBand userspace access).
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
new file mode 100644
index 000000000000..b4d084abfd22
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -0,0 +1,36 @@
1EXTRA_CFLAGS += -DIPATH_IDSTR='"PathScale kernel.org driver"' \
2 -DIPATH_KERN_TYPE=0
3
4obj-$(CONFIG_IPATH_CORE) += ipath_core.o
5obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
6
7ipath_core-y := \
8 ipath_diag.o \
9 ipath_driver.o \
10 ipath_eeprom.o \
11 ipath_file_ops.o \
12 ipath_fs.o \
13 ipath_ht400.o \
14 ipath_init_chip.o \
15 ipath_intr.o \
16 ipath_layer.o \
17 ipath_pe800.o \
18 ipath_stats.o \
19 ipath_sysfs.o \
20 ipath_user_pages.o
21
22ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
23
24ib_ipath-y := \
25 ipath_cq.o \
26 ipath_keys.o \
27 ipath_mad.o \
28 ipath_mr.o \
29 ipath_qp.o \
30 ipath_rc.o \
31 ipath_ruc.o \
32 ipath_srq.o \
33 ipath_uc.o \
34 ipath_ud.o \
35 ipath_verbs.o \
36 ipath_verbs_mcast.o
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
new file mode 100644
index 000000000000..48a55247b832
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -0,0 +1,616 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef _IPATH_COMMON_H
34#define _IPATH_COMMON_H
35
36/*
37 * This file contains defines, structures, etc. that are used
38 * to communicate between kernel and user code.
39 */
40
41/* This is the IEEE-assigned OUI for PathScale, Inc. */
42#define IPATH_SRC_OUI_1 0x00
43#define IPATH_SRC_OUI_2 0x11
44#define IPATH_SRC_OUI_3 0x75
45
46/* version of protocol header (known to chip also). In the long run,
47 * we should be able to generate and accept a range of version numbers;
48 * for now we only accept one, and it's compiled in.
49 */
50#define IPS_PROTO_VERSION 2
51
52/*
53 * These are compile time constants that you may want to enable or disable
54 * if you are trying to debug problems with code or performance.
55 * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in
56 * fastpath code
57 * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be
58 * traced in faspath code
59 * _IPATH_TRACING define as 0 if you want to remove all tracing in a
60 * compilation unit
61 * _IPATH_DEBUGGING define as 0 if you want to remove debug prints
62 */
63
64/*
65 * The value in the BTH QP field that InfiniPath uses to differentiate
66 * an infinipath protocol IB packet vs standard IB transport
67 */
68#define IPATH_KD_QP 0x656b79
69
70/*
71 * valid states passed to ipath_set_linkstate() user call
72 */
73#define IPATH_IB_LINKDOWN 0
74#define IPATH_IB_LINKARM 1
75#define IPATH_IB_LINKACTIVE 2
76#define IPATH_IB_LINKINIT 3
77#define IPATH_IB_LINKDOWN_SLEEP 4
78#define IPATH_IB_LINKDOWN_DISABLE 5
79
80/*
81 * stats maintained by the driver. For now, at least, this is global
82 * to all minor devices.
83 */
84struct infinipath_stats {
85 /* number of interrupts taken */
86 __u64 sps_ints;
87 /* number of interrupts for errors */
88 __u64 sps_errints;
89 /* number of errors from chip (not incl. packet errors or CRC) */
90 __u64 sps_errs;
91 /* number of packet errors from chip other than CRC */
92 __u64 sps_pkterrs;
93 /* number of packets with CRC errors (ICRC and VCRC) */
94 __u64 sps_crcerrs;
95 /* number of hardware errors reported (parity, etc.) */
96 __u64 sps_hwerrs;
97 /* number of times IB link changed state unexpectedly */
98 __u64 sps_iblink;
99 /* no longer used; left for compatibility */
100 __u64 sps_unused3;
101 /* number of kernel (port0) packets received */
102 __u64 sps_port0pkts;
103 /* number of "ethernet" packets sent by driver */
104 __u64 sps_ether_spkts;
105 /* number of "ethernet" packets received by driver */
106 __u64 sps_ether_rpkts;
107 /* number of SMA packets sent by driver */
108 __u64 sps_sma_spkts;
109 /* number of SMA packets received by driver */
110 __u64 sps_sma_rpkts;
111 /* number of times all ports rcvhdrq was full and packet dropped */
112 __u64 sps_hdrqfull;
113 /* number of times all ports egrtid was full and packet dropped */
114 __u64 sps_etidfull;
115 /*
116 * number of times we tried to send from driver, but no pio buffers
117 * avail
118 */
119 __u64 sps_nopiobufs;
120 /* number of ports currently open */
121 __u64 sps_ports;
122 /* list of pkeys (other than default) accepted (0 means not set) */
123 __u16 sps_pkeys[4];
124 /* lids for up to 4 infinipaths, indexed by infinipath # */
125 __u16 sps_lid[4];
126 /* number of user ports per chip (not IB ports) */
127 __u32 sps_nports;
128 /* not our interrupt, or already handled */
129 __u32 sps_nullintr;
130 /* max number of packets handled per receive call */
131 __u32 sps_maxpkts_call;
132 /* avg number of packets handled per receive call */
133 __u32 sps_avgpkts_call;
134 /* total number of pages locked */
135 __u64 sps_pagelocks;
136 /* total number of pages unlocked */
137 __u64 sps_pageunlocks;
138 /*
139 * Number of packets dropped in kernel other than errors (ether
140 * packets if ipath not configured, sma/mad, etc.)
141 */
142 __u64 sps_krdrops;
143 /* mlids for up to 4 infinipaths, indexed by infinipath # */
144 __u16 sps_mlid[4];
145 /* pad for future growth */
146 __u64 __sps_pad[45];
147};
148
149/*
150 * These are the status bits readable (in ascii form, 64bit value)
151 * from the "status" sysfs file.
152 */
153#define IPATH_STATUS_INITTED 0x1 /* basic initialization done */
154#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */
155/* Device has been disabled via admin request */
156#define IPATH_STATUS_ADMIN_DISABLED 0x4
157#define IPATH_STATUS_OIB_SMA 0x8 /* ipath_mad kernel SMA running */
158#define IPATH_STATUS_SMA 0x10 /* user SMA running */
159/* Chip has been found and initted */
160#define IPATH_STATUS_CHIP_PRESENT 0x20
161/* IB link is at ACTIVE, usable for data traffic */
162#define IPATH_STATUS_IB_READY 0x40
163/* link is configured, LID, MTU, etc. have been set */
164#define IPATH_STATUS_IB_CONF 0x80
165/* no link established, probably no cable */
166#define IPATH_STATUS_IB_NOCABLE 0x100
167/* A Fatal hardware error has occurred. */
168#define IPATH_STATUS_HWERROR 0x200
169
170/*
171 * The list of usermode accessible registers. Also see Reg_* later in file.
172 */
173typedef enum _ipath_ureg {
174 /* (RO) DMA RcvHdr to be used next. */
175 ur_rcvhdrtail = 0,
176 /* (RW) RcvHdr entry to be processed next by host. */
177 ur_rcvhdrhead = 1,
178 /* (RO) Index of next Eager index to use. */
179 ur_rcvegrindextail = 2,
180 /* (RW) Eager TID to be processed next */
181 ur_rcvegrindexhead = 3,
182 /* For internal use only; max register number. */
183 _IPATH_UregMax
184} ipath_ureg;
185
186/* bit values for spi_runtime_flags */
187#define IPATH_RUNTIME_HT 0x1
188#define IPATH_RUNTIME_PCIE 0x2
189#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
190#define IPATH_RUNTIME_RCVHDR_COPY 0x8
191
192/*
193 * This structure is returned by ipath_userinit() immediately after
194 * open to get implementation-specific info, and info specific to this
195 * instance.
196 *
197 * This struct must have explict pad fields where type sizes
198 * may result in different alignments between 32 and 64 bit
199 * programs, since the 64 bit * bit kernel requires the user code
200 * to have matching offsets
201 */
202struct ipath_base_info {
203 /* version of hardware, for feature checking. */
204 __u32 spi_hw_version;
205 /* version of software, for feature checking. */
206 __u32 spi_sw_version;
207 /* InfiniPath port assigned, goes into sent packets */
208 __u32 spi_port;
209 /*
210 * IB MTU, packets IB data must be less than this.
211 * The MTU is in bytes, and will be a multiple of 4 bytes.
212 */
213 __u32 spi_mtu;
214 /*
215 * Size of a PIO buffer. Any given packet's total size must be less
216 * than this (in words). Included is the starting control word, so
217 * if 513 is returned, then total pkt size is 512 words or less.
218 */
219 __u32 spi_piosize;
220 /* size of the TID cache in infinipath, in entries */
221 __u32 spi_tidcnt;
222 /* size of the TID Eager list in infinipath, in entries */
223 __u32 spi_tidegrcnt;
224 /* size of a single receive header queue entry. */
225 __u32 spi_rcvhdrent_size;
226 /*
227 * Count of receive header queue entries allocated.
228 * This may be less than the spu_rcvhdrcnt passed in!.
229 */
230 __u32 spi_rcvhdr_cnt;
231
232 /* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */
233 __u32 spi_runtime_flags;
234
235 /* address where receive buffer queue is mapped into */
236 __u64 spi_rcvhdr_base;
237
238 /* user program. */
239
240 /* base address of eager TID receive buffers. */
241 __u64 spi_rcv_egrbufs;
242
243 /* Allocated by initialization code, not by protocol. */
244
245 /*
246 * Size of each TID buffer in host memory, starting at
247 * spi_rcv_egrbufs. The buffers are virtually contiguous.
248 */
249 __u32 spi_rcv_egrbufsize;
250 /*
251 * The special QP (queue pair) value that identifies an infinipath
252 * protocol packet from standard IB packets. More, probably much
253 * more, to be added.
254 */
255 __u32 spi_qpair;
256
257 /*
258 * User register base for init code, not to be used directly by
259 * protocol or applications.
260 */
261 __u64 __spi_uregbase;
262 /*
263 * Maximum buffer size in bytes that can be used in a single TID
264 * entry (assuming the buffer is aligned to this boundary). This is
265 * the minimum of what the hardware and software support Guaranteed
266 * to be a power of 2.
267 */
268 __u32 spi_tid_maxsize;
269 /*
270 * alignment of each pio send buffer (byte count
271 * to add to spi_piobufbase to get to second buffer)
272 */
273 __u32 spi_pioalign;
274 /*
275 * The index of the first pio buffer available to this process;
276 * needed to do lookup in spi_pioavailaddr; not added to
277 * spi_piobufbase.
278 */
279 __u32 spi_pioindex;
280 /* number of buffers mapped for this process */
281 __u32 spi_piocnt;
282
283 /*
284 * Base address of writeonly pio buffers for this process.
285 * Each buffer has spi_piosize words, and is aligned on spi_pioalign
286 * boundaries. spi_piocnt buffers are mapped from this address
287 */
288 __u64 spi_piobufbase;
289
290 /*
291 * Base address of readonly memory copy of the pioavail registers.
292 * There are 2 bits for each buffer.
293 */
294 __u64 spi_pioavailaddr;
295
296 /*
297 * Address where driver updates a copy of the interface and driver
298 * status (IPATH_STATUS_*) as a 64 bit value. It's followed by a
299 * string indicating hardware error, if there was one.
300 */
301 __u64 spi_status;
302
303 /* number of chip ports available to user processes */
304 __u32 spi_nports;
305 /* unit number of chip we are using */
306 __u32 spi_unit;
307 /* num bufs in each contiguous set */
308 __u32 spi_rcv_egrperchunk;
309 /* size in bytes of each contiguous set */
310 __u32 spi_rcv_egrchunksize;
311 /* total size of mmap to cover full rcvegrbuffers */
312 __u32 spi_rcv_egrbuftotlen;
313} __attribute__ ((aligned(8)));
314
315
316/*
317 * This version number is given to the driver by the user code during
318 * initialization in the spu_userversion field of ipath_user_info, so
319 * the driver can check for compatibility with user code.
320 *
321 * The major version changes when data structures
322 * change in an incompatible way. The driver must be the same or higher
323 * for initialization to succeed. In some cases, a higher version
324 * driver will not interoperate with older software, and initialization
325 * will return an error.
326 */
327#define IPATH_USER_SWMAJOR 1
328
329/*
330 * Minor version differences are always compatible
331 * a within a major version, however if if user software is larger
332 * than driver software, some new features and/or structure fields
333 * may not be implemented; the user code must deal with this if it
334 * cares, or it must abort after initialization reports the difference
335 */
336#define IPATH_USER_SWMINOR 2
337
338#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
339
340#define IPATH_KERN_TYPE 0
341
342/*
343 * Similarly, this is the kernel version going back to the user. It's
344 * slightly different, in that we want to tell if the driver was built as
345 * part of a PathScale release, or from the driver from OpenIB, kernel.org,
346 * or a standard distribution, for support reasons. The high bit is 0 for
347 * non-PathScale, and 1 for PathScale-built/supplied.
348 *
349 * It's returned by the driver to the user code during initialization in the
350 * spi_sw_version field of ipath_base_info, so the user code can in turn
351 * check for compatibility with the kernel.
352*/
353#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION)
354
355/*
356 * This structure is passed to ipath_userinit() to tell the driver where
357 * user code buffers are, sizes, etc. The offsets and sizes of the
358 * fields must remain unchanged, for binary compatibility. It can
359 * be extended, if userversion is changed so user code can tell, if needed
360 */
361struct ipath_user_info {
362 /*
363 * version of user software, to detect compatibility issues.
364 * Should be set to IPATH_USER_SWVERSION.
365 */
366 __u32 spu_userversion;
367
368 /* desired number of receive header queue entries */
369 __u32 spu_rcvhdrcnt;
370
371 /* size of struct base_info to write to */
372 __u32 spu_base_info_size;
373
374 /*
375 * number of words in KD protocol header
376 * This tells InfiniPath how many words to copy to rcvhdrq. If 0,
377 * kernel uses a default. Once set, attempts to set any other value
378 * are an error (EAGAIN) until driver is reloaded.
379 */
380 __u32 spu_rcvhdrsize;
381
382 /*
383 * cache line aligned (64 byte) user address to
384 * which the rcvhdrtail register will be written by infinipath
385 * whenever it changes, so that no chip registers are read in
386 * the performance path.
387 */
388 __u64 spu_rcvhdraddr;
389
390 /*
391 * address of struct base_info to write to
392 */
393 __u64 spu_base_info;
394
395} __attribute__ ((aligned(8)));
396
397/* User commands. */
398
399#define IPATH_CMD_MIN 16
400
401#define IPATH_CMD_USER_INIT 16 /* set up userspace */
402#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */
403#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */
404#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
405#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
406#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
407
408#define IPATH_CMD_MAX 21
409
410struct ipath_port_info {
411 __u32 num_active; /* number of active units */
412 __u32 unit; /* unit (chip) assigned to caller */
413 __u32 port; /* port on unit assigned to caller */
414};
415
416struct ipath_tid_info {
417 __u32 tidcnt;
418 /* make structure same size in 32 and 64 bit */
419 __u32 tid__unused;
420 /* virtual address of first page in transfer */
421 __u64 tidvaddr;
422 /* pointer (same size 32/64 bit) to __u16 tid array */
423 __u64 tidlist;
424
425 /*
426 * pointer (same size 32/64 bit) to bitmap of TIDs used
427 * for this call; checked for being large enough at open
428 */
429 __u64 tidmap;
430};
431
432struct ipath_cmd {
433 __u32 type; /* command type */
434 union {
435 struct ipath_tid_info tid_info;
436 struct ipath_user_info user_info;
437 /* address in userspace of struct ipath_port_info to
438 write result to */
439 __u64 port_info;
440 /* enable/disable receipt of packets */
441 __u32 recv_ctrl;
442 /* partition key to set */
443 __u16 part_key;
444 } cmd;
445};
446
447struct ipath_iovec {
448 /* Pointer to data, but same size 32 and 64 bit */
449 __u64 iov_base;
450
451 /*
452 * Length of data; don't need 64 bits, but want
453 * ipath_sendpkt to remain same size as before 32 bit changes, so...
454 */
455 __u64 iov_len;
456};
457
458/*
459 * Describes a single packet for send. Each packet can have one or more
460 * buffers, but the total length (exclusive of IB headers) must be less
461 * than the MTU, and if using the PIO method, entire packet length,
462 * including IB headers, must be less than the ipath_piosize value (words).
463 * Use of this necessitates including sys/uio.h
464 */
465struct __ipath_sendpkt {
466 __u32 sps_flags; /* flags for packet (TBD) */
467 __u32 sps_cnt; /* number of entries to use in sps_iov */
468 /* array of iov's describing packet. TEMPORARY */
469 struct ipath_iovec sps_iov[4];
470};
471
472/* Passed into SMA special file's ->read and ->write methods. */
473struct ipath_sma_pkt
474{
475 __u32 unit; /* unit on which to send packet */
476 __u64 data; /* address of payload in userspace */
477 __u32 len; /* length of payload */
478};
479
480/*
481 * Data layout in I2C flash (for GUID, etc.)
482 * All fields are little-endian binary unless otherwise stated
483 */
484#define IPATH_FLASH_VERSION 1
485struct ipath_flash {
486 /* flash layout version (IPATH_FLASH_VERSION) */
487 __u8 if_fversion;
488 /* checksum protecting if_length bytes */
489 __u8 if_csum;
490 /*
491 * valid length (in use, protected by if_csum), including
492 * if_fversion and if_sum themselves)
493 */
494 __u8 if_length;
495 /* the GUID, in network order */
496 __u8 if_guid[8];
497 /* number of GUIDs to use, starting from if_guid */
498 __u8 if_numguid;
499 /* the board serial number, in ASCII */
500 char if_serial[12];
501 /* board mfg date (YYYYMMDD ASCII) */
502 char if_mfgdate[8];
503 /* last board rework/test date (YYYYMMDD ASCII) */
504 char if_testdate[8];
505 /* logging of error counts, TBD */
506 __u8 if_errcntp[4];
507 /* powered on hours, updated at driver unload */
508 __u8 if_powerhour[2];
509 /* ASCII free-form comment field */
510 char if_comment[32];
511 /* 78 bytes used, min flash size is 128 bytes */
512 __u8 if_future[50];
513};
514
515/*
516 * These are the counters implemented in the chip, and are listed in order.
517 * The InterCaps naming is taken straight from the chip spec.
518 */
519struct infinipath_counters {
520 __u64 LBIntCnt;
521 __u64 LBFlowStallCnt;
522 __u64 Reserved1;
523 __u64 TxUnsupVLErrCnt;
524 __u64 TxDataPktCnt;
525 __u64 TxFlowPktCnt;
526 __u64 TxDwordCnt;
527 __u64 TxLenErrCnt;
528 __u64 TxMaxMinLenErrCnt;
529 __u64 TxUnderrunCnt;
530 __u64 TxFlowStallCnt;
531 __u64 TxDroppedPktCnt;
532 __u64 RxDroppedPktCnt;
533 __u64 RxDataPktCnt;
534 __u64 RxFlowPktCnt;
535 __u64 RxDwordCnt;
536 __u64 RxLenErrCnt;
537 __u64 RxMaxMinLenErrCnt;
538 __u64 RxICRCErrCnt;
539 __u64 RxVCRCErrCnt;
540 __u64 RxFlowCtrlErrCnt;
541 __u64 RxBadFormatCnt;
542 __u64 RxLinkProblemCnt;
543 __u64 RxEBPCnt;
544 __u64 RxLPCRCErrCnt;
545 __u64 RxBufOvflCnt;
546 __u64 RxTIDFullErrCnt;
547 __u64 RxTIDValidErrCnt;
548 __u64 RxPKeyMismatchCnt;
549 __u64 RxP0HdrEgrOvflCnt;
550 __u64 RxP1HdrEgrOvflCnt;
551 __u64 RxP2HdrEgrOvflCnt;
552 __u64 RxP3HdrEgrOvflCnt;
553 __u64 RxP4HdrEgrOvflCnt;
554 __u64 RxP5HdrEgrOvflCnt;
555 __u64 RxP6HdrEgrOvflCnt;
556 __u64 RxP7HdrEgrOvflCnt;
557 __u64 RxP8HdrEgrOvflCnt;
558 __u64 Reserved6;
559 __u64 Reserved7;
560 __u64 IBStatusChangeCnt;
561 __u64 IBLinkErrRecoveryCnt;
562 __u64 IBLinkDownedCnt;
563 __u64 IBSymbolErrCnt;
564};
565
566/*
567 * The next set of defines are for packet headers, and chip register
568 * and memory bits that are visible to and/or used by user-mode software
569 * The other bits that are used only by the driver or diags are in
570 * ipath_registers.h
571 */
572
573/* RcvHdrFlags bits */
574#define INFINIPATH_RHF_LENGTH_MASK 0x7FF
575#define INFINIPATH_RHF_LENGTH_SHIFT 0
576#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
577#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
578#define INFINIPATH_RHF_EGRINDEX_MASK 0x7FF
579#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
580#define INFINIPATH_RHF_H_ICRCERR 0x80000000
581#define INFINIPATH_RHF_H_VCRCERR 0x40000000
582#define INFINIPATH_RHF_H_PARITYERR 0x20000000
583#define INFINIPATH_RHF_H_LENERR 0x10000000
584#define INFINIPATH_RHF_H_MTUERR 0x08000000
585#define INFINIPATH_RHF_H_IHDRERR 0x04000000
586#define INFINIPATH_RHF_H_TIDERR 0x02000000
587#define INFINIPATH_RHF_H_MKERR 0x01000000
588#define INFINIPATH_RHF_H_IBERR 0x00800000
589#define INFINIPATH_RHF_L_SWA 0x00008000
590#define INFINIPATH_RHF_L_SWB 0x00004000
591
592/* infinipath header fields */
593#define INFINIPATH_I_VERS_MASK 0xF
594#define INFINIPATH_I_VERS_SHIFT 28
595#define INFINIPATH_I_PORT_MASK 0xF
596#define INFINIPATH_I_PORT_SHIFT 24
597#define INFINIPATH_I_TID_MASK 0x7FF
598#define INFINIPATH_I_TID_SHIFT 13
599#define INFINIPATH_I_OFFSET_MASK 0x1FFF
600#define INFINIPATH_I_OFFSET_SHIFT 0
601
602/* K_PktFlags bits */
603#define INFINIPATH_KPF_INTR 0x1
604
605/* SendPIO per-buffer control */
606#define INFINIPATH_SP_LENGTHP1_MASK 0x3FF
607#define INFINIPATH_SP_LENGTHP1_SHIFT 0
608#define INFINIPATH_SP_INTR 0x80000000
609#define INFINIPATH_SP_TEST 0x40000000
610#define INFINIPATH_SP_TESTEBP 0x20000000
611
612/* SendPIOAvail bits */
613#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
614#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
615
616#endif /* _IPATH_COMMON_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
new file mode 100644
index 000000000000..7ece1135ddfe
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -0,0 +1,295 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/err.h>
34#include <linux/vmalloc.h>
35
36#include "ipath_verbs.h"
37
38/**
39 * ipath_cq_enter - add a new entry to the completion queue
40 * @cq: completion queue
41 * @entry: work completion entry to add
42 * @sig: true if @entry is a solicitated entry
43 *
44 * This may be called with one of the qp->s_lock or qp->r_rq.lock held.
45 */
46void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
47{
48 unsigned long flags;
49 u32 next;
50
51 spin_lock_irqsave(&cq->lock, flags);
52
53 if (cq->head == cq->ibcq.cqe)
54 next = 0;
55 else
56 next = cq->head + 1;
57 if (unlikely(next == cq->tail)) {
58 spin_unlock_irqrestore(&cq->lock, flags);
59 if (cq->ibcq.event_handler) {
60 struct ib_event ev;
61
62 ev.device = cq->ibcq.device;
63 ev.element.cq = &cq->ibcq;
64 ev.event = IB_EVENT_CQ_ERR;
65 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
66 }
67 return;
68 }
69 cq->queue[cq->head] = *entry;
70 cq->head = next;
71
72 if (cq->notify == IB_CQ_NEXT_COMP ||
73 (cq->notify == IB_CQ_SOLICITED && solicited)) {
74 cq->notify = IB_CQ_NONE;
75 cq->triggered++;
76 /*
77 * This will cause send_complete() to be called in
78 * another thread.
79 */
80 tasklet_hi_schedule(&cq->comptask);
81 }
82
83 spin_unlock_irqrestore(&cq->lock, flags);
84
85 if (entry->status != IB_WC_SUCCESS)
86 to_idev(cq->ibcq.device)->n_wqe_errs++;
87}
88
89/**
90 * ipath_poll_cq - poll for work completion entries
91 * @ibcq: the completion queue to poll
92 * @num_entries: the maximum number of entries to return
93 * @entry: pointer to array where work completions are placed
94 *
95 * Returns the number of completion entries polled.
96 *
97 * This may be called from interrupt context. Also called by ib_poll_cq()
98 * in the generic verbs code.
99 */
100int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
101{
102 struct ipath_cq *cq = to_icq(ibcq);
103 unsigned long flags;
104 int npolled;
105
106 spin_lock_irqsave(&cq->lock, flags);
107
108 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
109 if (cq->tail == cq->head)
110 break;
111 *entry = cq->queue[cq->tail];
112 if (cq->tail == cq->ibcq.cqe)
113 cq->tail = 0;
114 else
115 cq->tail++;
116 }
117
118 spin_unlock_irqrestore(&cq->lock, flags);
119
120 return npolled;
121}
122
123static void send_complete(unsigned long data)
124{
125 struct ipath_cq *cq = (struct ipath_cq *)data;
126
127 /*
128 * The completion handler will most likely rearm the notification
129 * and poll for all pending entries. If a new completion entry
130 * is added while we are in this routine, tasklet_hi_schedule()
131 * won't call us again until we return so we check triggered to
132 * see if we need to call the handler again.
133 */
134 for (;;) {
135 u8 triggered = cq->triggered;
136
137 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
138
139 if (cq->triggered == triggered)
140 return;
141 }
142}
143
144/**
145 * ipath_create_cq - create a completion queue
146 * @ibdev: the device this completion queue is attached to
147 * @entries: the minimum size of the completion queue
148 * @context: unused by the InfiniPath driver
149 * @udata: unused by the InfiniPath driver
150 *
151 * Returns a pointer to the completion queue or negative errno values
152 * for failure.
153 *
154 * Called by ib_create_cq() in the generic verbs code.
155 */
156struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
157 struct ib_ucontext *context,
158 struct ib_udata *udata)
159{
160 struct ipath_cq *cq;
161 struct ib_wc *wc;
162 struct ib_cq *ret;
163
164 /*
165 * Need to use vmalloc() if we want to support large #s of
166 * entries.
167 */
168 cq = kmalloc(sizeof(*cq), GFP_KERNEL);
169 if (!cq) {
170 ret = ERR_PTR(-ENOMEM);
171 goto bail;
172 }
173
174 /*
175 * Need to use vmalloc() if we want to support large #s of entries.
176 */
177 wc = vmalloc(sizeof(*wc) * (entries + 1));
178 if (!wc) {
179 kfree(cq);
180 ret = ERR_PTR(-ENOMEM);
181 goto bail;
182 }
183 /*
184 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
185 * The number of entries should be >= the number requested or return
186 * an error.
187 */
188 cq->ibcq.cqe = entries;
189 cq->notify = IB_CQ_NONE;
190 cq->triggered = 0;
191 spin_lock_init(&cq->lock);
192 tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
193 cq->head = 0;
194 cq->tail = 0;
195 cq->queue = wc;
196
197 ret = &cq->ibcq;
198
199bail:
200 return ret;
201}
202
203/**
204 * ipath_destroy_cq - destroy a completion queue
205 * @ibcq: the completion queue to destroy.
206 *
207 * Returns 0 for success.
208 *
209 * Called by ib_destroy_cq() in the generic verbs code.
210 */
211int ipath_destroy_cq(struct ib_cq *ibcq)
212{
213 struct ipath_cq *cq = to_icq(ibcq);
214
215 tasklet_kill(&cq->comptask);
216 vfree(cq->queue);
217 kfree(cq);
218
219 return 0;
220}
221
222/**
223 * ipath_req_notify_cq - change the notification type for a completion queue
224 * @ibcq: the completion queue
225 * @notify: the type of notification to request
226 *
227 * Returns 0 for success.
228 *
229 * This may be called from interrupt context. Also called by
230 * ib_req_notify_cq() in the generic verbs code.
231 */
232int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
233{
234 struct ipath_cq *cq = to_icq(ibcq);
235 unsigned long flags;
236
237 spin_lock_irqsave(&cq->lock, flags);
238 /*
239 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
240 * any other transitions.
241 */
242 if (cq->notify != IB_CQ_NEXT_COMP)
243 cq->notify = notify;
244 spin_unlock_irqrestore(&cq->lock, flags);
245 return 0;
246}
247
248int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
249{
250 struct ipath_cq *cq = to_icq(ibcq);
251 struct ib_wc *wc, *old_wc;
252 u32 n;
253 int ret;
254
255 /*
256 * Need to use vmalloc() if we want to support large #s of entries.
257 */
258 wc = vmalloc(sizeof(*wc) * (cqe + 1));
259 if (!wc) {
260 ret = -ENOMEM;
261 goto bail;
262 }
263
264 spin_lock_irq(&cq->lock);
265 if (cq->head < cq->tail)
266 n = cq->ibcq.cqe + 1 + cq->head - cq->tail;
267 else
268 n = cq->head - cq->tail;
269 if (unlikely((u32)cqe < n)) {
270 spin_unlock_irq(&cq->lock);
271 vfree(wc);
272 ret = -EOVERFLOW;
273 goto bail;
274 }
275 for (n = 0; cq->tail != cq->head; n++) {
276 wc[n] = cq->queue[cq->tail];
277 if (cq->tail == cq->ibcq.cqe)
278 cq->tail = 0;
279 else
280 cq->tail++;
281 }
282 cq->ibcq.cqe = cqe;
283 cq->head = n;
284 cq->tail = 0;
285 old_wc = cq->queue;
286 cq->queue = wc;
287 spin_unlock_irq(&cq->lock);
288
289 vfree(old_wc);
290
291 ret = 0;
292
293bail:
294 return ret;
295}
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
new file mode 100644
index 000000000000..593e28969c69
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -0,0 +1,96 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef _IPATH_DEBUG_H
34#define _IPATH_DEBUG_H
35
36#ifndef _IPATH_DEBUGGING /* debugging enabled or not */
37#define _IPATH_DEBUGGING 1
38#endif
39
40#if _IPATH_DEBUGGING
41
42/*
43 * Mask values for debugging. The scheme allows us to compile out any
44 * of the debug tracing stuff, and if compiled in, to enable or disable
45 * dynamically. This can be set at modprobe time also:
46 * modprobe infinipath.ko infinipath_debug=7
47 */
48
49#define __IPATH_INFO 0x1 /* generic low verbosity stuff */
50#define __IPATH_DBG 0x2 /* generic debug */
51#define __IPATH_TRSAMPLE 0x8 /* generate trace buffer sample entries */
52/* leave some low verbosity spots open */
53#define __IPATH_VERBDBG 0x40 /* very verbose debug */
54#define __IPATH_PKTDBG 0x80 /* print packet data */
55/* print process startup (init)/exit messages */
56#define __IPATH_PROCDBG 0x100
57/* print mmap/nopage stuff, not using VDBG any more */
58#define __IPATH_MMDBG 0x200
59#define __IPATH_USER_SEND 0x1000 /* use user mode send */
60#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
61#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
62#define __IPATH_SMADBG 0x8000 /* sma packet debug */
63#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) general debug on */
64#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings on */
65#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors on */
66#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump on */
67#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump on */
68
69#else /* _IPATH_DEBUGGING */
70
71/*
72 * define all of these even with debugging off, for the few places that do
73 * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the
74 * compiler eliminate the code
75 */
76
77#define __IPATH_INFO 0x0 /* generic low verbosity stuff */
78#define __IPATH_DBG 0x0 /* generic debug */
79#define __IPATH_TRSAMPLE 0x0 /* generate trace buffer sample entries */
80#define __IPATH_VERBDBG 0x0 /* very verbose debug */
81#define __IPATH_PKTDBG 0x0 /* print packet data */
82#define __IPATH_PROCDBG 0x0 /* print process startup (init)/exit messages */
83/* print mmap/nopage stuff, not using VDBG any more */
84#define __IPATH_MMDBG 0x0
85#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */
86#define __IPATH_SMADBG 0x0 /* print process startup (init)/exit messages */#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */
87#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */
88#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */
89#define __IPATH_IPATHPD 0x0 /* Ethernet (IPATH) packet dump on */
90#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on */
91
92#endif /* _IPATH_DEBUGGING */
93
94#define __IPATH_VERBOSEDBG __IPATH_VERBDBG
95
96#endif /* _IPATH_DEBUG_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
new file mode 100644
index 000000000000..7d3fb6996b41
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -0,0 +1,367 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33/*
34 * This file contains support for diagnostic functions. It is accessed by
35 * opening the ipath_diag device, normally minor number 129. Diagnostic use
36 * of the InfiniPath chip may render the chip or board unusable until the
37 * driver is unloaded, or in some cases, until the system is rebooted.
38 *
39 * Accesses to the chip through this interface are not similar to going
40 * through the /sys/bus/pci resource mmap interface.
41 */
42
43#include <linux/pci.h>
44#include <asm/uaccess.h>
45
46#include "ipath_common.h"
47#include "ipath_kernel.h"
48#include "ips_common.h"
49#include "ipath_layer.h"
50
51int ipath_diag_inuse;
52static int diag_set_link;
53
54static int ipath_diag_open(struct inode *in, struct file *fp);
55static int ipath_diag_release(struct inode *in, struct file *fp);
56static ssize_t ipath_diag_read(struct file *fp, char __user *data,
57 size_t count, loff_t *off);
58static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
59 size_t count, loff_t *off);
60
61static struct file_operations diag_file_ops = {
62 .owner = THIS_MODULE,
63 .write = ipath_diag_write,
64 .read = ipath_diag_read,
65 .open = ipath_diag_open,
66 .release = ipath_diag_release
67};
68
69static struct cdev *diag_cdev;
70static struct class_device *diag_class_dev;
71
72int ipath_diag_init(void)
73{
74 return ipath_cdev_init(IPATH_DIAG_MINOR, "ipath_diag",
75 &diag_file_ops, &diag_cdev, &diag_class_dev);
76}
77
78void ipath_diag_cleanup(void)
79{
80 ipath_cdev_cleanup(&diag_cdev, &diag_class_dev);
81}
82
83/**
84 * ipath_read_umem64 - read a 64-bit quantity from the chip into user space
85 * @dd: the infinipath device
86 * @uaddr: the location to store the data in user memory
87 * @caddr: the source chip address (full pointer, not offset)
88 * @count: number of bytes to copy (multiple of 32 bits)
89 *
90 * This function also localizes all chip memory accesses.
91 * The copy should be written such that we read full cacheline packets
92 * from the chip. This is usually used for a single qword
93 *
94 * NOTE: This assumes the chip address is 64-bit aligned.
95 */
96static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
97 const void __iomem *caddr, size_t count)
98{
99 const u64 __iomem *reg_addr = caddr;
100 const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
101 int ret;
102
103 /* not very efficient, but it works for now */
104 if (reg_addr < dd->ipath_kregbase ||
105 reg_end > dd->ipath_kregend) {
106 ret = -EINVAL;
107 goto bail;
108 }
109 while (reg_addr < reg_end) {
110 u64 data = readq(reg_addr);
111 if (copy_to_user(uaddr, &data, sizeof(u64))) {
112 ret = -EFAULT;
113 goto bail;
114 }
115 reg_addr++;
116 uaddr++;
117 }
118 ret = 0;
119bail:
120 return ret;
121}
122
123/**
124 * ipath_write_umem64 - write a 64-bit quantity to the chip from user space
125 * @dd: the infinipath device
126 * @caddr: the destination chip address (full pointer, not offset)
127 * @uaddr: the source of the data in user memory
128 * @count: the number of bytes to copy (multiple of 32 bits)
129 *
130 * This is usually used for a single qword
131 * NOTE: This assumes the chip address is 64-bit aligned.
132 */
133
134static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
135 const void __user *uaddr, size_t count)
136{
137 u64 __iomem *reg_addr = caddr;
138 const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
139 int ret;
140
141 /* not very efficient, but it works for now */
142 if (reg_addr < dd->ipath_kregbase ||
143 reg_end > dd->ipath_kregend) {
144 ret = -EINVAL;
145 goto bail;
146 }
147 while (reg_addr < reg_end) {
148 u64 data;
149 if (copy_from_user(&data, uaddr, sizeof(data))) {
150 ret = -EFAULT;
151 goto bail;
152 }
153 writeq(data, reg_addr);
154
155 reg_addr++;
156 uaddr++;
157 }
158 ret = 0;
159bail:
160 return ret;
161}
162
163/**
164 * ipath_read_umem32 - read a 32-bit quantity from the chip into user space
165 * @dd: the infinipath device
166 * @uaddr: the location to store the data in user memory
167 * @caddr: the source chip address (full pointer, not offset)
168 * @count: number of bytes to copy
169 *
170 * read 32 bit values, not 64 bit; for memories that only
171 * support 32 bit reads; usually a single dword.
172 */
173static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
174 const void __iomem *caddr, size_t count)
175{
176 const u32 __iomem *reg_addr = caddr;
177 const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
178 int ret;
179
180 if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
181 reg_end > (u32 __iomem *) dd->ipath_kregend) {
182 ret = -EINVAL;
183 goto bail;
184 }
185 /* not very efficient, but it works for now */
186 while (reg_addr < reg_end) {
187 u32 data = readl(reg_addr);
188 if (copy_to_user(uaddr, &data, sizeof(data))) {
189 ret = -EFAULT;
190 goto bail;
191 }
192
193 reg_addr++;
194 uaddr++;
195 }
196 ret = 0;
197bail:
198 return ret;
199}
200
201/**
202 * ipath_write_umem32 - write a 32-bit quantity to the chip from user space
203 * @dd: the infinipath device
204 * @caddr: the destination chip address (full pointer, not offset)
205 * @uaddr: the source of the data in user memory
206 * @count: number of bytes to copy
207 *
208 * write 32 bit values, not 64 bit; for memories that only
209 * support 32 bit write; usually a single dword.
210 */
211
212static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
213 const void __user *uaddr, size_t count)
214{
215 u32 __iomem *reg_addr = caddr;
216 const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
217 int ret;
218
219 if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
220 reg_end > (u32 __iomem *) dd->ipath_kregend) {
221 ret = -EINVAL;
222 goto bail;
223 }
224 while (reg_addr < reg_end) {
225 u32 data;
226 if (copy_from_user(&data, uaddr, sizeof(data))) {
227 ret = -EFAULT;
228 goto bail;
229 }
230 writel(data, reg_addr);
231
232 reg_addr++;
233 uaddr++;
234 }
235 ret = 0;
236bail:
237 return ret;
238}
239
240static int ipath_diag_open(struct inode *in, struct file *fp)
241{
242 struct ipath_devdata *dd;
243 int unit = 0; /* XXX this is bogus */
244 unsigned long flags;
245 int ret;
246
247 dd = ipath_lookup(unit);
248
249 mutex_lock(&ipath_mutex);
250 spin_lock_irqsave(&ipath_devs_lock, flags);
251
252 if (ipath_diag_inuse) {
253 ret = -EBUSY;
254 goto bail;
255 }
256
257 list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
258 /*
259 * we need at least one infinipath device to be present
260 * (don't use INITTED, because we want to be able to open
261 * even if device is in freeze mode, which cleared INITTED).
262 * There is a small amount of risk to this, which is why we
263 * also verify kregbase is set.
264 */
265
266 if (!(dd->ipath_flags & IPATH_PRESENT) ||
267 !dd->ipath_kregbase)
268 continue;
269
270 ipath_diag_inuse = 1;
271 diag_set_link = 0;
272 ret = 0;
273 goto bail;
274 }
275
276 ret = -ENODEV;
277
278bail:
279 spin_unlock_irqrestore(&ipath_devs_lock, flags);
280 mutex_unlock(&ipath_mutex);
281
282 /* Only expose a way to reset the device if we
283 make it into diag mode. */
284 if (ret == 0)
285 ipath_expose_reset(&dd->pcidev->dev);
286
287 return ret;
288}
289
290static int ipath_diag_release(struct inode *i, struct file *f)
291{
292 mutex_lock(&ipath_mutex);
293 ipath_diag_inuse = 0;
294 mutex_unlock(&ipath_mutex);
295 return 0;
296}
297
298static ssize_t ipath_diag_read(struct file *fp, char __user *data,
299 size_t count, loff_t *off)
300{
301 int unit = 0; /* XXX provide for reads on other units some day */
302 struct ipath_devdata *dd;
303 void __iomem *kreg_base;
304 ssize_t ret;
305
306 dd = ipath_lookup(unit);
307 if (!dd) {
308 ret = -ENODEV;
309 goto bail;
310 }
311
312 kreg_base = dd->ipath_kregbase;
313
314 if (count == 0)
315 ret = 0;
316 else if ((count % 4) || (*off % 4))
317 /* address or length is not 32-bit aligned, hence invalid */
318 ret = -EINVAL;
319 else if ((count % 8) || (*off % 8))
320 /* address or length not 64-bit aligned; do 32-bit reads */
321 ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
322 else
323 ret = ipath_read_umem64(dd, data, kreg_base + *off, count);
324
325 if (ret >= 0) {
326 *off += count;
327 ret = count;
328 }
329
330bail:
331 return ret;
332}
333
334static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
335 size_t count, loff_t *off)
336{
337 int unit = 0; /* XXX this is bogus */
338 struct ipath_devdata *dd;
339 void __iomem *kreg_base;
340 ssize_t ret;
341
342 dd = ipath_lookup(unit);
343 if (!dd) {
344 ret = -ENODEV;
345 goto bail;
346 }
347 kreg_base = dd->ipath_kregbase;
348
349 if (count == 0)
350 ret = 0;
351 else if ((count % 4) || (*off % 4))
352 /* address or length is not 32-bit aligned, hence invalid */
353 ret = -EINVAL;
354 else if ((count % 8) || (*off % 8))
355 /* address or length not 64-bit aligned; do 32-bit writes */
356 ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
357 else
358 ret = ipath_write_umem64(dd, kreg_base + *off, data, count);
359
360 if (ret >= 0) {
361 *off += count;
362 ret = count;
363 }
364
365bail:
366 return ret;
367}
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
new file mode 100644
index 000000000000..e7617c3982ea
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -0,0 +1,1983 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/spinlock.h>
34#include <linux/idr.h>
35#include <linux/pci.h>
36#include <linux/delay.h>
37#include <linux/netdevice.h>
38#include <linux/vmalloc.h>
39
40#include "ipath_kernel.h"
41#include "ips_common.h"
42#include "ipath_layer.h"
43
44static void ipath_update_pio_bufs(struct ipath_devdata *);
45
46const char *ipath_get_unit_name(int unit)
47{
48 static char iname[16];
49 snprintf(iname, sizeof iname, "infinipath%u", unit);
50 return iname;
51}
52
53EXPORT_SYMBOL_GPL(ipath_get_unit_name);
54
55#define DRIVER_LOAD_MSG "PathScale " IPATH_DRV_NAME " loaded: "
56#define PFX IPATH_DRV_NAME ": "
57
58/*
59 * The size has to be longer than this string, so we can append
60 * board/chip information to it in the init code.
61 */
62const char ipath_core_version[] = IPATH_IDSTR "\n";
63
64static struct idr unit_table;
65DEFINE_SPINLOCK(ipath_devs_lock);
66LIST_HEAD(ipath_dev_list);
67
68wait_queue_head_t ipath_sma_state_wait;
69
70unsigned ipath_debug = __IPATH_INFO;
71
72module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
73MODULE_PARM_DESC(debug, "mask for debug prints");
74EXPORT_SYMBOL_GPL(ipath_debug);
75
76MODULE_LICENSE("GPL");
77MODULE_AUTHOR("PathScale <support@pathscale.com>");
78MODULE_DESCRIPTION("Pathscale InfiniPath driver");
79
80const char *ipath_ibcstatus_str[] = {
81 "Disabled",
82 "LinkUp",
83 "PollActive",
84 "PollQuiet",
85 "SleepDelay",
86 "SleepQuiet",
87 "LState6", /* unused */
88 "LState7", /* unused */
89 "CfgDebounce",
90 "CfgRcvfCfg",
91 "CfgWaitRmt",
92 "CfgIdle",
93 "RecovRetrain",
94 "LState0xD", /* unused */
95 "RecovWaitRmt",
96 "RecovIdle",
97};
98
99/*
100 * These variables are initialized in the chip-specific files
101 * but are defined here.
102 */
103u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
104u64 ipath_gpio_sda, ipath_gpio_scl;
105u64 infinipath_i_bitsextant;
106ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
107u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
108
109static void __devexit ipath_remove_one(struct pci_dev *);
110static int __devinit ipath_init_one(struct pci_dev *,
111 const struct pci_device_id *);
112
113/* Only needed for registration, nothing else needs this info */
114#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
115#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
116#define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
117
118static const struct pci_device_id ipath_pci_tbl[] = {
119 {PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE,
120 PCI_DEVICE_ID_INFINIPATH_HT)},
121 {PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE,
122 PCI_DEVICE_ID_INFINIPATH_PE800)},
123};
124
125MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
126
127static struct pci_driver ipath_driver = {
128 .name = IPATH_DRV_NAME,
129 .probe = ipath_init_one,
130 .remove = __devexit_p(ipath_remove_one),
131 .id_table = ipath_pci_tbl,
132};
133
134/*
135 * This is where port 0's rcvhdrtail register is written back; we also
136 * want nothing else sharing the cache line, so make it a cache line
137 * in size. Used for all units.
138 */
139volatile __le64 *ipath_port0_rcvhdrtail;
140dma_addr_t ipath_port0_rcvhdrtail_dma;
141static int port0_rcvhdrtail_refs;
142
143static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
144 u32 *bar0, u32 *bar1)
145{
146 int ret;
147
148 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
149 if (ret)
150 ipath_dev_err(dd, "failed to read bar0 before enable: "
151 "error %d\n", -ret);
152
153 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
154 if (ret)
155 ipath_dev_err(dd, "failed to read bar1 before enable: "
156 "error %d\n", -ret);
157
158 ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
159}
160
161static void ipath_free_devdata(struct pci_dev *pdev,
162 struct ipath_devdata *dd)
163{
164 unsigned long flags;
165
166 pci_set_drvdata(pdev, NULL);
167
168 if (dd->ipath_unit != -1) {
169 spin_lock_irqsave(&ipath_devs_lock, flags);
170 idr_remove(&unit_table, dd->ipath_unit);
171 list_del(&dd->ipath_list);
172 spin_unlock_irqrestore(&ipath_devs_lock, flags);
173 }
174 dma_free_coherent(&pdev->dev, sizeof(*dd), dd, dd->ipath_dma_addr);
175}
176
177static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
178{
179 unsigned long flags;
180 struct ipath_devdata *dd;
181 dma_addr_t dma_addr;
182 int ret;
183
184 if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
185 dd = ERR_PTR(-ENOMEM);
186 goto bail;
187 }
188
189 dd = dma_alloc_coherent(&pdev->dev, sizeof(*dd), &dma_addr,
190 GFP_KERNEL);
191
192 if (!dd) {
193 dd = ERR_PTR(-ENOMEM);
194 goto bail;
195 }
196
197 dd->ipath_dma_addr = dma_addr;
198 dd->ipath_unit = -1;
199
200 spin_lock_irqsave(&ipath_devs_lock, flags);
201
202 ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
203 if (ret < 0) {
204 printk(KERN_ERR IPATH_DRV_NAME
205 ": Could not allocate unit ID: error %d\n", -ret);
206 ipath_free_devdata(pdev, dd);
207 dd = ERR_PTR(ret);
208 goto bail_unlock;
209 }
210
211 dd->pcidev = pdev;
212 pci_set_drvdata(pdev, dd);
213
214 list_add(&dd->ipath_list, &ipath_dev_list);
215
216bail_unlock:
217 spin_unlock_irqrestore(&ipath_devs_lock, flags);
218
219bail:
220 return dd;
221}
222
223static inline struct ipath_devdata *__ipath_lookup(int unit)
224{
225 return idr_find(&unit_table, unit);
226}
227
228struct ipath_devdata *ipath_lookup(int unit)
229{
230 struct ipath_devdata *dd;
231 unsigned long flags;
232
233 spin_lock_irqsave(&ipath_devs_lock, flags);
234 dd = __ipath_lookup(unit);
235 spin_unlock_irqrestore(&ipath_devs_lock, flags);
236
237 return dd;
238}
239
240int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp)
241{
242 int nunits, npresent, nup;
243 struct ipath_devdata *dd;
244 unsigned long flags;
245 u32 maxports;
246
247 nunits = npresent = nup = maxports = 0;
248
249 spin_lock_irqsave(&ipath_devs_lock, flags);
250
251 list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
252 nunits++;
253 if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
254 npresent++;
255 if (dd->ipath_lid &&
256 !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
257 | IPATH_LINKUNK)))
258 nup++;
259 if (dd->ipath_cfgports > maxports)
260 maxports = dd->ipath_cfgports;
261 }
262
263 spin_unlock_irqrestore(&ipath_devs_lock, flags);
264
265 if (npresentp)
266 *npresentp = npresent;
267 if (nupp)
268 *nupp = nup;
269 if (maxportsp)
270 *maxportsp = maxports;
271
272 return nunits;
273}
274
275static int init_port0_rcvhdrtail(struct pci_dev *pdev)
276{
277 int ret;
278
279 mutex_lock(&ipath_mutex);
280
281 if (!ipath_port0_rcvhdrtail) {
282 ipath_port0_rcvhdrtail =
283 dma_alloc_coherent(&pdev->dev,
284 IPATH_PORT0_RCVHDRTAIL_SIZE,
285 &ipath_port0_rcvhdrtail_dma,
286 GFP_KERNEL);
287
288 if (!ipath_port0_rcvhdrtail) {
289 ret = -ENOMEM;
290 goto bail;
291 }
292 }
293 port0_rcvhdrtail_refs++;
294 ret = 0;
295
296bail:
297 mutex_unlock(&ipath_mutex);
298
299 return ret;
300}
301
302static void cleanup_port0_rcvhdrtail(struct pci_dev *pdev)
303{
304 mutex_lock(&ipath_mutex);
305
306 if (!--port0_rcvhdrtail_refs) {
307 dma_free_coherent(&pdev->dev, IPATH_PORT0_RCVHDRTAIL_SIZE,
308 (void *) ipath_port0_rcvhdrtail,
309 ipath_port0_rcvhdrtail_dma);
310 ipath_port0_rcvhdrtail = NULL;
311 }
312
313 mutex_unlock(&ipath_mutex);
314}
315
316/*
317 * These next two routines are placeholders in case we don't have per-arch
318 * code for controlling write combining. If explicit control of write
319 * combining is not available, performance will probably be awful.
320 */
321
322int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
323{
324 return -EOPNOTSUPP;
325}
326
327void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
328{
329}
330
331static int __devinit ipath_init_one(struct pci_dev *pdev,
332 const struct pci_device_id *ent)
333{
334 int ret, len, j;
335 struct ipath_devdata *dd;
336 unsigned long long addr;
337 u32 bar0 = 0, bar1 = 0;
338 u8 rev;
339
340 ret = init_port0_rcvhdrtail(pdev);
341 if (ret < 0) {
342 printk(KERN_ERR IPATH_DRV_NAME
343 ": Could not allocate port0_rcvhdrtail: error %d\n",
344 -ret);
345 goto bail;
346 }
347
348 dd = ipath_alloc_devdata(pdev);
349 if (IS_ERR(dd)) {
350 ret = PTR_ERR(dd);
351 printk(KERN_ERR IPATH_DRV_NAME
352 ": Could not allocate devdata: error %d\n", -ret);
353 goto bail_rcvhdrtail;
354 }
355
356 ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
357
358 read_bars(dd, pdev, &bar0, &bar1);
359
360 ret = pci_enable_device(pdev);
361 if (ret) {
362 /* This can happen iff:
363 *
364 * We did a chip reset, and then failed to reprogram the
365 * BAR, or the chip reset due to an internal error. We then
366 * unloaded the driver and reloaded it.
367 *
368 * Both reset cases set the BAR back to initial state. For
369 * the latter case, the AER sticky error bit at offset 0x718
370 * should be set, but the Linux kernel doesn't yet know
371 * about that, it appears. If the original BAR was retained
372 * in the kernel data structures, this may be OK.
373 */
374 ipath_dev_err(dd, "enable unit %d failed: error %d\n",
375 dd->ipath_unit, -ret);
376 goto bail_devdata;
377 }
378 addr = pci_resource_start(pdev, 0);
379 len = pci_resource_len(pdev, 0);
380 ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %x, vend %x/%x "
381 "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
382 ent->device, ent->driver_data);
383
384 read_bars(dd, pdev, &bar0, &bar1);
385
386 if (!bar1 && !(bar0 & ~0xf)) {
387 if (addr) {
388 dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
389 "rewriting as %llx\n", addr);
390 ret = pci_write_config_dword(
391 pdev, PCI_BASE_ADDRESS_0, addr);
392 if (ret) {
393 ipath_dev_err(dd, "rewrite of BAR0 "
394 "failed: err %d\n", -ret);
395 goto bail_disable;
396 }
397 ret = pci_write_config_dword(
398 pdev, PCI_BASE_ADDRESS_1, addr >> 32);
399 if (ret) {
400 ipath_dev_err(dd, "rewrite of BAR1 "
401 "failed: err %d\n", -ret);
402 goto bail_disable;
403 }
404 } else {
405 ipath_dev_err(dd, "BAR is 0 (probable RESET), "
406 "not usable until reboot\n");
407 ret = -ENODEV;
408 goto bail_disable;
409 }
410 }
411
412 ret = pci_request_regions(pdev, IPATH_DRV_NAME);
413 if (ret) {
414 dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
415 "err %d\n", dd->ipath_unit, -ret);
416 goto bail_disable;
417 }
418
419 ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
420 if (ret) {
421 dev_info(&pdev->dev, "pci_set_dma_mask unit %u "
422 "fails: %d\n", dd->ipath_unit, ret);
423 goto bail_regions;
424 }
425
426 pci_set_master(pdev);
427
428 /*
429 * Save BARs to rewrite after device reset. Save all 64 bits of
430 * BAR, just in case.
431 */
432 dd->ipath_pcibar0 = addr;
433 dd->ipath_pcibar1 = addr >> 32;
434 dd->ipath_deviceid = ent->device; /* save for later use */
435 dd->ipath_vendorid = ent->vendor;
436
437 /* setup the chip-specific functions, as early as possible. */
438 switch (ent->device) {
439 case PCI_DEVICE_ID_INFINIPATH_HT:
440 ipath_init_ht400_funcs(dd);
441 break;
442 case PCI_DEVICE_ID_INFINIPATH_PE800:
443 ipath_init_pe800_funcs(dd);
444 break;
445 default:
446 ipath_dev_err(dd, "Found unknown PathScale deviceid 0x%x, "
447 "failing\n", ent->device);
448 return -ENODEV;
449 }
450
451 for (j = 0; j < 6; j++) {
452 if (!pdev->resource[j].start)
453 continue;
454 ipath_cdbg(VERBOSE, "BAR %d start %lx, end %lx, len %lx\n",
455 j, pdev->resource[j].start,
456 pdev->resource[j].end,
457 pci_resource_len(pdev, j));
458 }
459
460 if (!addr) {
461 ipath_dev_err(dd, "No valid address in BAR 0!\n");
462 ret = -ENODEV;
463 goto bail_regions;
464 }
465
466 dd->ipath_deviceid = ent->device; /* save for later use */
467 dd->ipath_vendorid = ent->vendor;
468
469 ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
470 if (ret) {
471 ipath_dev_err(dd, "Failed to read PCI revision ID unit "
472 "%u: err %d\n", dd->ipath_unit, -ret);
473 goto bail_regions; /* shouldn't ever happen */
474 }
475 dd->ipath_pcirev = rev;
476
477 dd->ipath_kregbase = ioremap_nocache(addr, len);
478
479 if (!dd->ipath_kregbase) {
480 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
481 addr);
482 ret = -ENOMEM;
483 goto bail_iounmap;
484 }
485 dd->ipath_kregend = (u64 __iomem *)
486 ((void __iomem *)dd->ipath_kregbase + len);
487 dd->ipath_physaddr = addr; /* used for io_remap, etc. */
488 /* for user mmap */
489 dd->ipath_kregvirt = (u64 __iomem *) phys_to_virt(addr);
490 ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p "
491 "kregvirt %p\n", addr, dd->ipath_kregbase,
492 dd->ipath_kregvirt);
493
494 /*
495 * clear ipath_flags here instead of in ipath_init_chip as it is set
496 * by ipath_setup_htconfig.
497 */
498 dd->ipath_flags = 0;
499
500 if (dd->ipath_f_bus(dd, pdev))
501 ipath_dev_err(dd, "Failed to setup config space; "
502 "continuing anyway\n");
503
504 /*
505 * set up our interrupt handler; SA_SHIRQ probably not needed,
506 * since MSI interrupts shouldn't be shared but won't hurt for now.
507 * check 0 irq after we return from chip-specific bus setup, since
508 * that can affect this due to setup
509 */
510 if (!pdev->irq)
511 ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
512 "work\n");
513 else {
514 ret = request_irq(pdev->irq, ipath_intr, SA_SHIRQ,
515 IPATH_DRV_NAME, dd);
516 if (ret) {
517 ipath_dev_err(dd, "Couldn't setup irq handler, "
518 "irq=%u: %d\n", pdev->irq, ret);
519 goto bail_iounmap;
520 }
521 }
522
523 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
524 if (ret)
525 goto bail_iounmap;
526
527 ret = ipath_enable_wc(dd);
528
529 if (ret) {
530 ipath_dev_err(dd, "Write combining not enabled "
531 "(err %d): performance may be poor\n",
532 -ret);
533 ret = 0;
534 }
535
536 ipath_device_create_group(&pdev->dev, dd);
537 ipathfs_add_device(dd);
538 ipath_user_add(dd);
539 ipath_layer_add(dd);
540
541 goto bail;
542
543bail_iounmap:
544 iounmap((volatile void __iomem *) dd->ipath_kregbase);
545
546bail_regions:
547 pci_release_regions(pdev);
548
549bail_disable:
550 pci_disable_device(pdev);
551
552bail_devdata:
553 ipath_free_devdata(pdev, dd);
554
555bail_rcvhdrtail:
556 cleanup_port0_rcvhdrtail(pdev);
557
558bail:
559 return ret;
560}
561
562static void __devexit ipath_remove_one(struct pci_dev *pdev)
563{
564 struct ipath_devdata *dd;
565
566 ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev);
567 if (!pdev)
568 return;
569
570 dd = pci_get_drvdata(pdev);
571 ipath_layer_del(dd);
572 ipath_user_del(dd);
573 ipathfs_remove_device(dd);
574 ipath_device_remove_group(&pdev->dev, dd);
575 ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
576 "unit %u\n", dd, (u32) dd->ipath_unit);
577 if (dd->ipath_kregbase) {
578 ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n",
579 dd->ipath_kregbase);
580 iounmap((volatile void __iomem *) dd->ipath_kregbase);
581 dd->ipath_kregbase = NULL;
582 }
583 pci_release_regions(pdev);
584 ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
585 pci_disable_device(pdev);
586
587 ipath_free_devdata(pdev, dd);
588 cleanup_port0_rcvhdrtail(pdev);
589}
590
591/* general driver use */
592DEFINE_MUTEX(ipath_mutex);
593
594static DEFINE_SPINLOCK(ipath_pioavail_lock);
595
596/**
597 * ipath_disarm_piobufs - cancel a range of PIO buffers
598 * @dd: the infinipath device
599 * @first: the first PIO buffer to cancel
600 * @cnt: the number of PIO buffers to cancel
601 *
602 * cancel a range of PIO buffers, used when they might be armed, but
603 * not triggered. Used at init to ensure buffer state, and also user
604 * process close, in case it died while writing to a PIO buffer
605 * Also after errors.
606 */
607void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
608 unsigned cnt)
609{
610 unsigned i, last = first + cnt;
611 u64 sendctrl, sendorig;
612
613 ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
614 sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM;
615 for (i = first; i < last; i++) {
616 sendctrl = sendorig |
617 (i << INFINIPATH_S_DISARMPIOBUF_SHIFT);
618 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
619 sendctrl);
620 }
621
622 /*
623 * Write it again with current value, in case ipath_sendctrl changed
624 * while we were looping; no critical bits that would require
625 * locking.
626 *
627 * Write a 0, and then the original value, reading scratch in
628 * between. This seems to avoid a chip timing race that causes
629 * pioavail updates to memory to stop.
630 */
631 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
632 0);
633 sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
634 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
635 dd->ipath_sendctrl);
636}
637
638/**
639 * ipath_wait_linkstate - wait for an IB link state change to occur
640 * @dd: the infinipath device
641 * @state: the state to wait for
642 * @msecs: the number of milliseconds to wait
643 *
644 * wait up to msecs milliseconds for IB link state change to occur for
645 * now, take the easy polling route. Currently used only by
646 * ipath_layer_set_linkstate. Returns 0 if state reached, otherwise
647 * -ETIMEDOUT state can have multiple states set, for any of several
648 * transitions.
649 */
650int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
651{
652 dd->ipath_sma_state_wanted = state;
653 wait_event_interruptible_timeout(ipath_sma_state_wait,
654 (dd->ipath_flags & state),
655 msecs_to_jiffies(msecs));
656 dd->ipath_sma_state_wanted = 0;
657
658 if (!(dd->ipath_flags & state)) {
659 u64 val;
660 ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n",
661 /* test INIT ahead of DOWN, both can be set */
662 (state & IPATH_LINKINIT) ? "INIT" :
663 ((state & IPATH_LINKDOWN) ? "DOWN" :
664 ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
665 msecs);
666 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
667 ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
668 (unsigned long long) ipath_read_kreg64(
669 dd, dd->ipath_kregs->kr_ibcctrl),
670 (unsigned long long) val,
671 ipath_ibcstatus_str[val & 0xf]);
672 }
673 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
674}
675
676void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
677{
678 *buf = '\0';
679 if (err & INFINIPATH_E_RHDRLEN)
680 strlcat(buf, "rhdrlen ", blen);
681 if (err & INFINIPATH_E_RBADTID)
682 strlcat(buf, "rbadtid ", blen);
683 if (err & INFINIPATH_E_RBADVERSION)
684 strlcat(buf, "rbadversion ", blen);
685 if (err & INFINIPATH_E_RHDR)
686 strlcat(buf, "rhdr ", blen);
687 if (err & INFINIPATH_E_RLONGPKTLEN)
688 strlcat(buf, "rlongpktlen ", blen);
689 if (err & INFINIPATH_E_RSHORTPKTLEN)
690 strlcat(buf, "rshortpktlen ", blen);
691 if (err & INFINIPATH_E_RMAXPKTLEN)
692 strlcat(buf, "rmaxpktlen ", blen);
693 if (err & INFINIPATH_E_RMINPKTLEN)
694 strlcat(buf, "rminpktlen ", blen);
695 if (err & INFINIPATH_E_RFORMATERR)
696 strlcat(buf, "rformaterr ", blen);
697 if (err & INFINIPATH_E_RUNSUPVL)
698 strlcat(buf, "runsupvl ", blen);
699 if (err & INFINIPATH_E_RUNEXPCHAR)
700 strlcat(buf, "runexpchar ", blen);
701 if (err & INFINIPATH_E_RIBFLOW)
702 strlcat(buf, "ribflow ", blen);
703 if (err & INFINIPATH_E_REBP)
704 strlcat(buf, "EBP ", blen);
705 if (err & INFINIPATH_E_SUNDERRUN)
706 strlcat(buf, "sunderrun ", blen);
707 if (err & INFINIPATH_E_SPIOARMLAUNCH)
708 strlcat(buf, "spioarmlaunch ", blen);
709 if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
710 strlcat(buf, "sunexperrpktnum ", blen);
711 if (err & INFINIPATH_E_SDROPPEDDATAPKT)
712 strlcat(buf, "sdroppeddatapkt ", blen);
713 if (err & INFINIPATH_E_SDROPPEDSMPPKT)
714 strlcat(buf, "sdroppedsmppkt ", blen);
715 if (err & INFINIPATH_E_SMAXPKTLEN)
716 strlcat(buf, "smaxpktlen ", blen);
717 if (err & INFINIPATH_E_SMINPKTLEN)
718 strlcat(buf, "sminpktlen ", blen);
719 if (err & INFINIPATH_E_SUNSUPVL)
720 strlcat(buf, "sunsupVL ", blen);
721 if (err & INFINIPATH_E_SPKTLEN)
722 strlcat(buf, "spktlen ", blen);
723 if (err & INFINIPATH_E_INVALIDADDR)
724 strlcat(buf, "invalidaddr ", blen);
725 if (err & INFINIPATH_E_RICRC)
726 strlcat(buf, "CRC ", blen);
727 if (err & INFINIPATH_E_RVCRC)
728 strlcat(buf, "VCRC ", blen);
729 if (err & INFINIPATH_E_RRCVEGRFULL)
730 strlcat(buf, "rcvegrfull ", blen);
731 if (err & INFINIPATH_E_RRCVHDRFULL)
732 strlcat(buf, "rcvhdrfull ", blen);
733 if (err & INFINIPATH_E_IBSTATUSCHANGED)
734 strlcat(buf, "ibcstatuschg ", blen);
735 if (err & INFINIPATH_E_RIBLOSTLINK)
736 strlcat(buf, "riblostlink ", blen);
737 if (err & INFINIPATH_E_HARDWARE)
738 strlcat(buf, "hardware ", blen);
739 if (err & INFINIPATH_E_RESET)
740 strlcat(buf, "reset ", blen);
741}
742
743/**
744 * get_rhf_errstring - decode RHF errors
745 * @err: the err number
746 * @msg: the output buffer
747 * @len: the length of the output buffer
748 *
749 * only used one place now, may want more later
750 */
751static void get_rhf_errstring(u32 err, char *msg, size_t len)
752{
753 /* if no errors, and so don't need to check what's first */
754 *msg = '\0';
755
756 if (err & INFINIPATH_RHF_H_ICRCERR)
757 strlcat(msg, "icrcerr ", len);
758 if (err & INFINIPATH_RHF_H_VCRCERR)
759 strlcat(msg, "vcrcerr ", len);
760 if (err & INFINIPATH_RHF_H_PARITYERR)
761 strlcat(msg, "parityerr ", len);
762 if (err & INFINIPATH_RHF_H_LENERR)
763 strlcat(msg, "lenerr ", len);
764 if (err & INFINIPATH_RHF_H_MTUERR)
765 strlcat(msg, "mtuerr ", len);
766 if (err & INFINIPATH_RHF_H_IHDRERR)
767 /* infinipath hdr checksum error */
768 strlcat(msg, "ipathhdrerr ", len);
769 if (err & INFINIPATH_RHF_H_TIDERR)
770 strlcat(msg, "tiderr ", len);
771 if (err & INFINIPATH_RHF_H_MKERR)
772 /* bad port, offset, etc. */
773 strlcat(msg, "invalid ipathhdr ", len);
774 if (err & INFINIPATH_RHF_H_IBERR)
775 strlcat(msg, "iberr ", len);
776 if (err & INFINIPATH_RHF_L_SWA)
777 strlcat(msg, "swA ", len);
778 if (err & INFINIPATH_RHF_L_SWB)
779 strlcat(msg, "swB ", len);
780}
781
782/**
783 * ipath_get_egrbuf - get an eager buffer
784 * @dd: the infinipath device
785 * @bufnum: the eager buffer to get
786 * @err: unused
787 *
788 * must only be called if ipath_pd[port] is known to be allocated
789 */
790static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
791 int err)
792{
793 return dd->ipath_port0_skbs ?
794 (void *)dd->ipath_port0_skbs[bufnum]->data : NULL;
795}
796
797/**
798 * ipath_alloc_skb - allocate an skb and buffer with possible constraints
799 * @dd: the infinipath device
800 * @gfp_mask: the sk_buff SFP mask
801 */
802struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
803 gfp_t gfp_mask)
804{
805 struct sk_buff *skb;
806 u32 len;
807
808 /*
809 * Only fully supported way to handle this is to allocate lots
810 * extra, align as needed, and then do skb_reserve(). That wastes
811 * a lot of memory... I'll have to hack this into infinipath_copy
812 * also.
813 */
814
815 /*
816 * We need 4 extra bytes for unaligned transfer copying
817 */
818 if (dd->ipath_flags & IPATH_4BYTE_TID) {
819 /* we need a 4KB multiple alignment, and there is no way
820 * to do it except to allocate extra and then skb_reserve
821 * enough to bring it up to the right alignment.
822 */
823 len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1;
824 }
825 else
826 len = dd->ipath_ibmaxlen + 4;
827 skb = __dev_alloc_skb(len, gfp_mask);
828 if (!skb) {
829 ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
830 len);
831 goto bail;
832 }
833 if (dd->ipath_flags & IPATH_4BYTE_TID) {
834 u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4);
835 if (una)
836 skb_reserve(skb, 4 + (1 << 11) - una);
837 else
838 skb_reserve(skb, 4);
839 } else
840 skb_reserve(skb, 4);
841
842bail:
843 return skb;
844}
845
846/**
847 * ipath_rcv_layer - receive a packet for the layered (ethernet) driver
848 * @dd: the infinipath device
849 * @etail: the sk_buff number
850 * @tlen: the total packet length
851 * @hdr: the ethernet header
852 *
853 * Separate routine for better overall optimization
854 */
855static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail,
856 u32 tlen, struct ether_header *hdr)
857{
858 u32 elen;
859 u8 pad, *bthbytes;
860 struct sk_buff *skb, *nskb;
861
862 if (dd->ipath_port0_skbs && hdr->sub_opcode == OPCODE_ENCAP) {
863 /*
864 * Allocate a new sk_buff to replace the one we give
865 * to the network stack.
866 */
867 nskb = ipath_alloc_skb(dd, GFP_ATOMIC);
868 if (!nskb) {
869 /* count OK packets that we drop */
870 ipath_stats.sps_krdrops++;
871 return;
872 }
873
874 bthbytes = (u8 *) hdr->bth;
875 pad = (bthbytes[1] >> 4) & 3;
876 /* +CRC32 */
877 elen = tlen - (sizeof(*hdr) + pad + sizeof(u32));
878
879 skb = dd->ipath_port0_skbs[etail];
880 dd->ipath_port0_skbs[etail] = nskb;
881 skb_put(skb, elen);
882
883 dd->ipath_f_put_tid(dd, etail + (u64 __iomem *)
884 ((char __iomem *) dd->ipath_kregbase
885 + dd->ipath_rcvegrbase), 0,
886 virt_to_phys(nskb->data));
887
888 __ipath_layer_rcv(dd, hdr, skb);
889
890 /* another ether packet received */
891 ipath_stats.sps_ether_rpkts++;
892 }
893 else if (hdr->sub_opcode == OPCODE_LID_ARP)
894 __ipath_layer_rcv_lid(dd, hdr);
895}
896
897/*
898 * ipath_kreceive - receive a packet
899 * @dd: the infinipath device
900 *
901 * called from interrupt handler for errors or receive interrupt
902 */
903void ipath_kreceive(struct ipath_devdata *dd)
904{
905 u64 *rc;
906 void *ebuf;
907 const u32 rsize = dd->ipath_rcvhdrentsize; /* words */
908 const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
909 u32 etail = -1, l, hdrqtail;
910 struct ips_message_header *hdr;
911 u32 eflags, i, etype, tlen, pkttot = 0;
912 static u64 totcalls; /* stats, may eventually remove */
913 char emsg[128];
914
915 if (!dd->ipath_hdrqtailptr) {
916 ipath_dev_err(dd,
917 "hdrqtailptr not set, can't do receives\n");
918 goto bail;
919 }
920
921 /* There is already a thread processing this queue. */
922 if (test_and_set_bit(0, &dd->ipath_rcv_pending))
923 goto bail;
924
925 if (dd->ipath_port0head ==
926 (u32)le64_to_cpu(*dd->ipath_hdrqtailptr))
927 goto done;
928
929gotmore:
930 /*
931 * read only once at start. If in flood situation, this helps
932 * performance slightly. If more arrive while we are processing,
933 * we'll come back here and do them
934 */
935 hdrqtail = (u32)le64_to_cpu(*dd->ipath_hdrqtailptr);
936
937 for (i = 0, l = dd->ipath_port0head; l != hdrqtail; i++) {
938 u32 qp;
939 u8 *bthbytes;
940
941 rc = (u64 *) (dd->ipath_pd[0]->port_rcvhdrq + (l << 2));
942 hdr = (struct ips_message_header *)&rc[1];
943 /*
944 * could make a network order version of IPATH_KD_QP, and
945 * do the obvious shift before masking to speed this up.
946 */
947 qp = ntohl(hdr->bth[1]) & 0xffffff;
948 bthbytes = (u8 *) hdr->bth;
949
950 eflags = ips_get_hdr_err_flags((__le32 *) rc);
951 etype = ips_get_rcv_type((__le32 *) rc);
952 /* total length */
953 tlen = ips_get_length_in_bytes((__le32 *) rc);
954 ebuf = NULL;
955 if (etype != RCVHQ_RCV_TYPE_EXPECTED) {
956 /*
957 * it turns out that the chips uses an eager buffer
958 * for all non-expected packets, whether it "needs"
959 * one or not. So always get the index, but don't
960 * set ebuf (so we try to copy data) unless the
961 * length requires it.
962 */
963 etail = ips_get_index((__le32 *) rc);
964 if (tlen > sizeof(*hdr) ||
965 etype == RCVHQ_RCV_TYPE_NON_KD)
966 ebuf = ipath_get_egrbuf(dd, etail, 0);
967 }
968
969 /*
970 * both tiderr and ipathhdrerr are set for all plain IB
971 * packets; only ipathhdrerr should be set.
972 */
973
974 if (etype != RCVHQ_RCV_TYPE_NON_KD && etype !=
975 RCVHQ_RCV_TYPE_ERROR && ips_get_ipath_ver(
976 hdr->iph.ver_port_tid_offset) !=
977 IPS_PROTO_VERSION) {
978 ipath_cdbg(PKT, "Bad InfiniPath protocol version "
979 "%x\n", etype);
980 }
981
982 if (eflags & ~(INFINIPATH_RHF_H_TIDERR |
983 INFINIPATH_RHF_H_IHDRERR)) {
984 get_rhf_errstring(eflags, emsg, sizeof emsg);
985 ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
986 "tlen=%x opcode=%x egridx=%x: %s\n",
987 eflags, l, etype, tlen, bthbytes[0],
988 ips_get_index((__le32 *) rc), emsg);
989 } else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
990 int ret = __ipath_verbs_rcv(dd, rc + 1,
991 ebuf, tlen);
992 if (ret == -ENODEV)
993 ipath_cdbg(VERBOSE,
994 "received IB packet, "
995 "not SMA (QP=%x)\n", qp);
996 } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
997 if (qp == IPATH_KD_QP &&
998 bthbytes[0] == ipath_layer_rcv_opcode &&
999 ebuf)
1000 ipath_rcv_layer(dd, etail, tlen,
1001 (struct ether_header *)hdr);
1002 else
1003 ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
1004 "qp=%x), len %x; ignored\n",
1005 etype, bthbytes[0], qp, tlen);
1006 }
1007 else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
1008 ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
1009 be32_to_cpu(hdr->bth[0]) & 0xff);
1010 else if (eflags & (INFINIPATH_RHF_H_TIDERR |
1011 INFINIPATH_RHF_H_IHDRERR)) {
1012 /*
1013 * This is a type 3 packet, only the LRH is in the
1014 * rcvhdrq, the rest of the header is in the eager
1015 * buffer.
1016 */
1017 u8 opcode;
1018 if (ebuf) {
1019 bthbytes = (u8 *) ebuf;
1020 opcode = *bthbytes;
1021 }
1022 else
1023 opcode = 0;
1024 get_rhf_errstring(eflags, emsg, sizeof emsg);
1025 ipath_dbg("Err %x (%s), opcode %x, egrbuf %x, "
1026 "len %x\n", eflags, emsg, opcode, etail,
1027 tlen);
1028 } else {
1029 /*
1030 * error packet, type of error unknown.
1031 * Probably type 3, but we don't know, so don't
1032 * even try to print the opcode, etc.
1033 */
1034 ipath_dbg("Error Pkt, but no eflags! egrbuf %x, "
1035 "len %x\nhdrq@%lx;hdrq+%x rhf: %llx; "
1036 "hdr %llx %llx %llx %llx %llx\n",
1037 etail, tlen, (unsigned long) rc, l,
1038 (unsigned long long) rc[0],
1039 (unsigned long long) rc[1],
1040 (unsigned long long) rc[2],
1041 (unsigned long long) rc[3],
1042 (unsigned long long) rc[4],
1043 (unsigned long long) rc[5]);
1044 }
1045 l += rsize;
1046 if (l >= maxcnt)
1047 l = 0;
1048 /*
1049 * update for each packet, to help prevent overflows if we
1050 * have lots of packets.
1051 */
1052 (void)ipath_write_ureg(dd, ur_rcvhdrhead,
1053 dd->ipath_rhdrhead_intr_off | l, 0);
1054 if (etype != RCVHQ_RCV_TYPE_EXPECTED)
1055 (void)ipath_write_ureg(dd, ur_rcvegrindexhead,
1056 etail, 0);
1057 }
1058
1059 pkttot += i;
1060
1061 dd->ipath_port0head = l;
1062
1063 if (hdrqtail != (u32)le64_to_cpu(*dd->ipath_hdrqtailptr))
1064 /* more arrived while we handled first batch */
1065 goto gotmore;
1066
1067 if (pkttot > ipath_stats.sps_maxpkts_call)
1068 ipath_stats.sps_maxpkts_call = pkttot;
1069 ipath_stats.sps_port0pkts += pkttot;
1070 ipath_stats.sps_avgpkts_call =
1071 ipath_stats.sps_port0pkts / ++totcalls;
1072
1073done:
1074 clear_bit(0, &dd->ipath_rcv_pending);
1075 smp_mb__after_clear_bit();
1076
1077bail:;
1078}
1079
1080/**
1081 * ipath_update_pio_bufs - update shadow copy of the PIO availability map
1082 * @dd: the infinipath device
1083 *
1084 * called whenever our local copy indicates we have run out of send buffers
1085 * NOTE: This can be called from interrupt context by some code
1086 * and from non-interrupt context by ipath_getpiobuf().
1087 */
1088
1089static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1090{
1091 unsigned long flags;
1092 int i;
1093 const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
1094
1095 /* If the generation (check) bits have changed, then we update the
1096 * busy bit for the corresponding PIO buffer. This algorithm will
1097 * modify positions to the value they already have in some cases
1098 * (i.e., no change), but it's faster than changing only the bits
1099 * that have changed.
1100 *
1101 * We would like to do this atomicly, to avoid spinlocks in the
1102 * critical send path, but that's not really possible, given the
1103 * type of changes, and that this routine could be called on
1104 * multiple cpu's simultaneously, so we lock in this routine only,
1105 * to avoid conflicting updates; all we change is the shadow, and
1106 * it's a single 64 bit memory location, so by definition the update
1107 * is atomic in terms of what other cpu's can see in testing the
1108 * bits. The spin_lock overhead isn't too bad, since it only
1109 * happens when all buffers are in use, so only cpu overhead, not
1110 * latency or bandwidth is affected.
1111 */
1112#define _IPATH_ALL_CHECKBITS 0x5555555555555555ULL
1113 if (!dd->ipath_pioavailregs_dma) {
1114 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
1115 return;
1116 }
1117 if (ipath_debug & __IPATH_VERBDBG) {
1118 /* only if packet debug and verbose */
1119 volatile __le64 *dma = dd->ipath_pioavailregs_dma;
1120 unsigned long *shadow = dd->ipath_pioavailshadow;
1121
1122 ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
1123 "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
1124 "s3=%lx\n",
1125 (unsigned long long) le64_to_cpu(dma[0]),
1126 shadow[0],
1127 (unsigned long long) le64_to_cpu(dma[1]),
1128 shadow[1],
1129 (unsigned long long) le64_to_cpu(dma[2]),
1130 shadow[2],
1131 (unsigned long long) le64_to_cpu(dma[3]),
1132 shadow[3]);
1133 if (piobregs > 4)
1134 ipath_cdbg(
1135 PKT, "2nd group, dma4=%llx shad4=%lx, "
1136 "d5=%llx s5=%lx, d6=%llx s6=%lx, "
1137 "d7=%llx s7=%lx\n",
1138 (unsigned long long) le64_to_cpu(dma[4]),
1139 shadow[4],
1140 (unsigned long long) le64_to_cpu(dma[5]),
1141 shadow[5],
1142 (unsigned long long) le64_to_cpu(dma[6]),
1143 shadow[6],
1144 (unsigned long long) le64_to_cpu(dma[7]),
1145 shadow[7]);
1146 }
1147 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1148 for (i = 0; i < piobregs; i++) {
1149 u64 pchbusy, pchg, piov, pnew;
1150 /*
1151 * Chip Errata: bug 6641; even and odd qwords>3 are swapped
1152 */
1153 if (i > 3) {
1154 if (i & 1)
1155 piov = le64_to_cpu(
1156 dd->ipath_pioavailregs_dma[i - 1]);
1157 else
1158 piov = le64_to_cpu(
1159 dd->ipath_pioavailregs_dma[i + 1]);
1160 } else
1161 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
1162 pchg = _IPATH_ALL_CHECKBITS &
1163 ~(dd->ipath_pioavailshadow[i] ^ piov);
1164 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
1165 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
1166 pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
1167 pnew |= piov & pchbusy;
1168 dd->ipath_pioavailshadow[i] = pnew;
1169 }
1170 }
1171 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1172}
1173
1174/**
1175 * ipath_setrcvhdrsize - set the receive header size
1176 * @dd: the infinipath device
1177 * @rhdrsize: the receive header size
1178 *
1179 * called from user init code, and also layered driver init
1180 */
1181int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
1182{
1183 int ret = 0;
1184
1185 if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
1186 if (dd->ipath_rcvhdrsize != rhdrsize) {
1187 dev_info(&dd->pcidev->dev,
1188 "Error: can't set protocol header "
1189 "size %u, already %u\n",
1190 rhdrsize, dd->ipath_rcvhdrsize);
1191 ret = -EAGAIN;
1192 } else
1193 ipath_cdbg(VERBOSE, "Reuse same protocol header "
1194 "size %u\n", dd->ipath_rcvhdrsize);
1195 } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
1196 (sizeof(u64) / sizeof(u32)))) {
1197 ipath_dbg("Error: can't set protocol header size %u "
1198 "(> max %u)\n", rhdrsize,
1199 dd->ipath_rcvhdrentsize -
1200 (u32) (sizeof(u64) / sizeof(u32)));
1201 ret = -EOVERFLOW;
1202 } else {
1203 dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
1204 dd->ipath_rcvhdrsize = rhdrsize;
1205 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
1206 dd->ipath_rcvhdrsize);
1207 ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
1208 dd->ipath_rcvhdrsize);
1209 }
1210 return ret;
1211}
1212
1213/**
1214 * ipath_getpiobuf - find an available pio buffer
1215 * @dd: the infinipath device
1216 * @pbufnum: the buffer number is placed here
1217 *
1218 * do appropriate marking as busy, etc.
1219 * returns buffer number if one found (>=0), negative number is error.
1220 * Used by ipath_sma_send_pkt and ipath_layer_send
1221 */
1222u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
1223{
1224 int i, j, starti, updated = 0;
1225 unsigned piobcnt, iter;
1226 unsigned long flags;
1227 unsigned long *shadow = dd->ipath_pioavailshadow;
1228 u32 __iomem *buf;
1229
1230 piobcnt = (unsigned)(dd->ipath_piobcnt2k
1231 + dd->ipath_piobcnt4k);
1232 starti = dd->ipath_lastport_piobuf;
1233 iter = piobcnt - starti;
1234 if (dd->ipath_upd_pio_shadow) {
1235 /*
1236 * Minor optimization. If we had no buffers on last call,
1237 * start out by doing the update; continue and do scan even
1238 * if no buffers were updated, to be paranoid
1239 */
1240 ipath_update_pio_bufs(dd);
1241 /* we scanned here, don't do it at end of scan */
1242 updated = 1;
1243 i = starti;
1244 } else
1245 i = dd->ipath_lastpioindex;
1246
1247rescan:
1248 /*
1249 * while test_and_set_bit() is atomic, we do that and then the
1250 * change_bit(), and the pair is not. See if this is the cause
1251 * of the remaining armlaunch errors.
1252 */
1253 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1254 for (j = 0; j < iter; j++, i++) {
1255 if (i >= piobcnt)
1256 i = starti;
1257 /*
1258 * To avoid bus lock overhead, we first find a candidate
1259 * buffer, then do the test and set, and continue if that
1260 * fails.
1261 */
1262 if (test_bit((2 * i) + 1, shadow) ||
1263 test_and_set_bit((2 * i) + 1, shadow))
1264 continue;
1265 /* flip generation bit */
1266 change_bit(2 * i, shadow);
1267 break;
1268 }
1269 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1270
1271 if (j == iter) {
1272 volatile __le64 *dma = dd->ipath_pioavailregs_dma;
1273
1274 /*
1275 * first time through; shadow exhausted, but may be real
1276 * buffers available, so go see; if any updated, rescan
1277 * (once)
1278 */
1279 if (!updated) {
1280 ipath_update_pio_bufs(dd);
1281 updated = 1;
1282 i = starti;
1283 goto rescan;
1284 }
1285 dd->ipath_upd_pio_shadow = 1;
1286 /*
1287 * not atomic, but if we lose one once in a while, that's OK
1288 */
1289 ipath_stats.sps_nopiobufs++;
1290 if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1291 ipath_dbg(
1292 "%u pio sends with no bufavail; dmacopy: "
1293 "%llx %llx %llx %llx; shadow: "
1294 "%lx %lx %lx %lx\n",
1295 dd->ipath_consec_nopiobuf,
1296 (unsigned long long) le64_to_cpu(dma[0]),
1297 (unsigned long long) le64_to_cpu(dma[1]),
1298 (unsigned long long) le64_to_cpu(dma[2]),
1299 (unsigned long long) le64_to_cpu(dma[3]),
1300 shadow[0], shadow[1], shadow[2],
1301 shadow[3]);
1302 /*
1303 * 4 buffers per byte, 4 registers above, cover rest
1304 * below
1305 */
1306 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
1307 (sizeof(shadow[0]) * 4 * 4))
1308 ipath_dbg("2nd group: dmacopy: %llx %llx "
1309 "%llx %llx; shadow: %lx %lx "
1310 "%lx %lx\n",
1311 (unsigned long long)
1312 le64_to_cpu(dma[4]),
1313 (unsigned long long)
1314 le64_to_cpu(dma[5]),
1315 (unsigned long long)
1316 le64_to_cpu(dma[6]),
1317 (unsigned long long)
1318 le64_to_cpu(dma[7]),
1319 shadow[4], shadow[5],
1320 shadow[6], shadow[7]);
1321 }
1322 buf = NULL;
1323 goto bail;
1324 }
1325
1326 if (updated)
1327 /*
1328 * ran out of bufs, now some (at least this one we just
1329 * got) are now available, so tell the layered driver.
1330 */
1331 __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
1332
1333 /*
1334 * set next starting place. Since it's just an optimization,
1335 * it doesn't matter who wins on this, so no locking
1336 */
1337 dd->ipath_lastpioindex = i + 1;
1338 if (dd->ipath_upd_pio_shadow)
1339 dd->ipath_upd_pio_shadow = 0;
1340 if (dd->ipath_consec_nopiobuf)
1341 dd->ipath_consec_nopiobuf = 0;
1342 if (i < dd->ipath_piobcnt2k)
1343 buf = (u32 __iomem *) (dd->ipath_pio2kbase +
1344 i * dd->ipath_palign);
1345 else
1346 buf = (u32 __iomem *)
1347 (dd->ipath_pio4kbase +
1348 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
1349 ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
1350 i, (i < dd->ipath_piobcnt2k) ? 2 : 4, buf);
1351 if (pbufnum)
1352 *pbufnum = i;
1353
1354bail:
1355 return buf;
1356}
1357
1358/**
1359 * ipath_create_rcvhdrq - create a receive header queue
1360 * @dd: the infinipath device
1361 * @pd: the port data
1362 *
1363 * this *must* be physically contiguous memory, and for now,
1364 * that limits it to what kmalloc can do.
1365 */
1366int ipath_create_rcvhdrq(struct ipath_devdata *dd,
1367 struct ipath_portdata *pd)
1368{
1369 int ret = 0, amt;
1370
1371 amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1372 sizeof(u32), PAGE_SIZE);
1373 if (!pd->port_rcvhdrq) {
1374 /*
1375 * not using REPEAT isn't viable; at 128KB, we can easily
1376 * fail this. The problem with REPEAT is we can block here
1377 * "forever". There isn't an inbetween, unfortunately. We
1378 * could reduce the risk by never freeing the rcvhdrq except
1379 * at unload, but even then, the first time a port is used,
1380 * we could delay for some time...
1381 */
1382 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
1383
1384 pd->port_rcvhdrq = dma_alloc_coherent(
1385 &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
1386 gfp_flags);
1387
1388 if (!pd->port_rcvhdrq) {
1389 ipath_dev_err(dd, "attempt to allocate %d bytes "
1390 "for port %u rcvhdrq failed\n",
1391 amt, pd->port_port);
1392 ret = -ENOMEM;
1393 goto bail;
1394 }
1395
1396 pd->port_rcvhdrq_size = amt;
1397
1398 ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
1399 "for port %u rcvhdr Q\n",
1400 amt >> PAGE_SHIFT, pd->port_rcvhdrq,
1401 (unsigned long) pd->port_rcvhdrq_phys,
1402 (unsigned long) pd->port_rcvhdrq_size,
1403 pd->port_port);
1404 } else {
1405 /*
1406 * clear for security, sanity, and/or debugging, each
1407 * time we reuse
1408 */
1409 memset(pd->port_rcvhdrq, 0, amt);
1410 }
1411
1412 /*
1413 * tell chip each time we init it, even if we are re-using previous
1414 * memory (we zero it at process close)
1415 */
1416 ipath_cdbg(VERBOSE, "writing port %d rcvhdraddr as %lx\n",
1417 pd->port_port, (unsigned long) pd->port_rcvhdrq_phys);
1418 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
1419 pd->port_port, pd->port_rcvhdrq_phys);
1420
1421 ret = 0;
1422bail:
1423 return ret;
1424}
1425
1426int ipath_waitfor_complete(struct ipath_devdata *dd, ipath_kreg reg_id,
1427 u64 bits_to_wait_for, u64 * valp)
1428{
1429 unsigned long timeout;
1430 u64 lastval, val;
1431 int ret;
1432
1433 lastval = ipath_read_kreg64(dd, reg_id);
1434 /* wait a ridiculously long time */
1435 timeout = jiffies + msecs_to_jiffies(5);
1436 do {
1437 val = ipath_read_kreg64(dd, reg_id);
1438 /* set so they have something, even on failures. */
1439 *valp = val;
1440 if ((val & bits_to_wait_for) == bits_to_wait_for) {
1441 ret = 0;
1442 break;
1443 }
1444 if (val != lastval)
1445 ipath_cdbg(VERBOSE, "Changed from %llx to %llx, "
1446 "waiting for %llx bits\n",
1447 (unsigned long long) lastval,
1448 (unsigned long long) val,
1449 (unsigned long long) bits_to_wait_for);
1450 cond_resched();
1451 if (time_after(jiffies, timeout)) {
1452 ipath_dbg("Didn't get bits %llx in register 0x%x, "
1453 "got %llx\n",
1454 (unsigned long long) bits_to_wait_for,
1455 reg_id, (unsigned long long) *valp);
1456 ret = -ENODEV;
1457 break;
1458 }
1459 } while (1);
1460
1461 return ret;
1462}
1463
1464/**
1465 * ipath_waitfor_mdio_cmdready - wait for last command to complete
1466 * @dd: the infinipath device
1467 *
1468 * Like ipath_waitfor_complete(), but we wait for the CMDVALID bit to go
1469 * away indicating the last command has completed. It doesn't return data
1470 */
1471int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
1472{
1473 unsigned long timeout;
1474 u64 val;
1475 int ret;
1476
1477 /* wait a ridiculously long time */
1478 timeout = jiffies + msecs_to_jiffies(5);
1479 do {
1480 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_mdio);
1481 if (!(val & IPATH_MDIO_CMDVALID)) {
1482 ret = 0;
1483 break;
1484 }
1485 cond_resched();
1486 if (time_after(jiffies, timeout)) {
1487 ipath_dbg("CMDVALID stuck in mdio reg? (%llx)\n",
1488 (unsigned long long) val);
1489 ret = -ENODEV;
1490 break;
1491 }
1492 } while (1);
1493
1494 return ret;
1495}
1496
1497void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1498{
1499 static const char *what[4] = {
1500 [0] = "DOWN",
1501 [INFINIPATH_IBCC_LINKCMD_INIT] = "INIT",
1502 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
1503 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
1504 };
1505 ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
1506 "is %s\n", dd->ipath_unit,
1507 what[(which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
1508 INFINIPATH_IBCC_LINKCMD_MASK],
1509 ipath_ibcstatus_str[
1510 (ipath_read_kreg64
1511 (dd, dd->ipath_kregs->kr_ibcstatus) >>
1512 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1513 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
1514
1515 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1516 dd->ipath_ibcctrl | which);
1517}
1518
1519/**
1520 * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
1521 * @dd: the infinipath device
1522 * @regno: the register number to read
1523 * @port: the port containing the register
1524 *
1525 * Registers that vary with the chip implementation constants (port)
1526 * use this routine.
1527 */
1528u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
1529 unsigned port)
1530{
1531 u16 where;
1532
1533 if (port < dd->ipath_portcnt &&
1534 (regno == dd->ipath_kregs->kr_rcvhdraddr ||
1535 regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
1536 where = regno + port;
1537 else
1538 where = -1;
1539
1540 return ipath_read_kreg64(dd, where);
1541}
1542
1543/**
1544 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
1545 * @dd: the infinipath device
1546 * @regno: the register number to write
1547 * @port: the port containing the register
1548 * @value: the value to write
1549 *
1550 * Registers that vary with the chip implementation constants (port)
1551 * use this routine.
1552 */
1553void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
1554 unsigned port, u64 value)
1555{
1556 u16 where;
1557
1558 if (port < dd->ipath_portcnt &&
1559 (regno == dd->ipath_kregs->kr_rcvhdraddr ||
1560 regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
1561 where = regno + port;
1562 else
1563 where = -1;
1564
1565 ipath_write_kreg(dd, where, value);
1566}
1567
1568/**
1569 * ipath_shutdown_device - shut down a device
1570 * @dd: the infinipath device
1571 *
1572 * This is called to make the device quiet when we are about to
1573 * unload the driver, and also when the device is administratively
1574 * disabled. It does not free any data structures.
1575 * Everything it does has to be setup again by ipath_init_chip(dd,1)
1576 */
1577void ipath_shutdown_device(struct ipath_devdata *dd)
1578{
1579 u64 val;
1580
1581 ipath_dbg("Shutting down the device\n");
1582
1583 dd->ipath_flags |= IPATH_LINKUNK;
1584 dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
1585 IPATH_LINKINIT | IPATH_LINKARMED |
1586 IPATH_LINKACTIVE);
1587 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
1588 IPATH_STATUS_IB_READY);
1589
1590 /* mask interrupts, but not errors */
1591 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
1592
1593 dd->ipath_rcvctrl = 0;
1594 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1595 dd->ipath_rcvctrl);
1596
1597 /*
1598 * gracefully stop all sends allowing any in progress to trickle out
1599 * first.
1600 */
1601 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL);
1602 /* flush it */
1603 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1604 /*
1605 * enough for anything that's going to trickle out to have actually
1606 * done so.
1607 */
1608 udelay(5);
1609
1610 /*
1611 * abort any armed or launched PIO buffers that didn't go. (self
1612 * clearing). Will cause any packet currently being transmitted to
1613 * go out with an EBP, and may also cause a short packet error on
1614 * the receiver.
1615 */
1616 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1617 INFINIPATH_S_ABORT);
1618
1619 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
1620 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1621
1622 /*
1623 * we are shutting down, so tell the layered driver. We don't do
1624 * this on just a link state change, much like ethernet, a cable
1625 * unplug, etc. doesn't change driver state
1626 */
1627 ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN);
1628
1629 /* disable IBC */
1630 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
1631 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
1632 dd->ipath_control);
1633
1634 /*
1635 * clear SerdesEnable and turn the leds off; do this here because
1636 * we are unloading, so don't count on interrupts to move along
1637 * Turn the LEDs off explictly for the same reason.
1638 */
1639 dd->ipath_f_quiet_serdes(dd);
1640 dd->ipath_f_setextled(dd, 0, 0);
1641
1642 if (dd->ipath_stats_timer_active) {
1643 del_timer_sync(&dd->ipath_stats_timer);
1644 dd->ipath_stats_timer_active = 0;
1645 }
1646
1647 /*
1648 * clear all interrupts and errors, so that the next time the driver
1649 * is loaded or device is enabled, we know that whatever is set
1650 * happened while we were unloaded
1651 */
1652 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
1653 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
1654 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
1655 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
1656}
1657
1658/**
1659 * ipath_free_pddata - free a port's allocated data
1660 * @dd: the infinipath device
1661 * @port: the port
1662 * @freehdrq: free the port data structure if true
1663 *
1664 * when closing, free up any allocated data for a port, if the
1665 * reference count goes to zero
1666 * Note: this also optionally frees the portdata itself!
1667 * Any changes here have to be matched up with the reinit case
1668 * of ipath_init_chip(), which calls this routine on reinit after reset.
1669 */
1670void ipath_free_pddata(struct ipath_devdata *dd, u32 port, int freehdrq)
1671{
1672 struct ipath_portdata *pd = dd->ipath_pd[port];
1673
1674 if (!pd)
1675 return;
1676 if (freehdrq)
1677 /*
1678 * only clear and free portdata if we are going to also
1679 * release the hdrq, otherwise we leak the hdrq on each
1680 * open/close cycle
1681 */
1682 dd->ipath_pd[port] = NULL;
1683 if (freehdrq && pd->port_rcvhdrq) {
1684 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
1685 "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
1686 (unsigned long) pd->port_rcvhdrq_size);
1687 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
1688 pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
1689 pd->port_rcvhdrq = NULL;
1690 }
1691 if (port && pd->port_rcvegrbuf) {
1692 /* always free this */
1693 if (pd->port_rcvegrbuf) {
1694 unsigned e;
1695
1696 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
1697 void *base = pd->port_rcvegrbuf[e];
1698 size_t size = pd->port_rcvegrbuf_size;
1699
1700 ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
1701 "chunk %u/%u\n", base,
1702 (unsigned long) size,
1703 e, pd->port_rcvegrbuf_chunks);
1704 dma_free_coherent(
1705 &dd->pcidev->dev, size, base,
1706 pd->port_rcvegrbuf_phys[e]);
1707 }
1708 vfree(pd->port_rcvegrbuf);
1709 pd->port_rcvegrbuf = NULL;
1710 vfree(pd->port_rcvegrbuf_phys);
1711 pd->port_rcvegrbuf_phys = NULL;
1712 }
1713 pd->port_rcvegrbuf_chunks = 0;
1714 } else if (port == 0 && dd->ipath_port0_skbs) {
1715 unsigned e;
1716 struct sk_buff **skbs = dd->ipath_port0_skbs;
1717
1718 dd->ipath_port0_skbs = NULL;
1719 ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs "
1720 "@ %p\n", pd->port_port, skbs);
1721 for (e = 0; e < dd->ipath_rcvegrcnt; e++)
1722 if (skbs[e])
1723 dev_kfree_skb(skbs[e]);
1724 vfree(skbs);
1725 }
1726 if (freehdrq) {
1727 kfree(pd->port_tid_pg_list);
1728 kfree(pd);
1729 }
1730}
1731
1732static int __init infinipath_init(void)
1733{
1734 int ret;
1735
1736 ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version);
1737
1738 /*
1739 * These must be called before the driver is registered with
1740 * the PCI subsystem.
1741 */
1742 idr_init(&unit_table);
1743 if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
1744 ret = -ENOMEM;
1745 goto bail;
1746 }
1747
1748 ret = pci_register_driver(&ipath_driver);
1749 if (ret < 0) {
1750 printk(KERN_ERR IPATH_DRV_NAME
1751 ": Unable to register driver: error %d\n", -ret);
1752 goto bail_unit;
1753 }
1754
1755 ret = ipath_driver_create_group(&ipath_driver.driver);
1756 if (ret < 0) {
1757 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create driver "
1758 "sysfs entries: error %d\n", -ret);
1759 goto bail_pci;
1760 }
1761
1762 ret = ipath_init_ipathfs();
1763 if (ret < 0) {
1764 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
1765 "ipathfs: error %d\n", -ret);
1766 goto bail_group;
1767 }
1768
1769 goto bail;
1770
1771bail_group:
1772 ipath_driver_remove_group(&ipath_driver.driver);
1773
1774bail_pci:
1775 pci_unregister_driver(&ipath_driver);
1776
1777bail_unit:
1778 idr_destroy(&unit_table);
1779
1780bail:
1781 return ret;
1782}
1783
1784static void cleanup_device(struct ipath_devdata *dd)
1785{
1786 int port;
1787
1788 ipath_shutdown_device(dd);
1789
1790 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
1791 /* can't do anything more with chip; needs re-init */
1792 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
1793 if (dd->ipath_kregbase) {
1794 /*
1795 * if we haven't already cleaned up before these are
1796 * to ensure any register reads/writes "fail" until
1797 * re-init
1798 */
1799 dd->ipath_kregbase = NULL;
1800 dd->ipath_kregvirt = NULL;
1801 dd->ipath_uregbase = 0;
1802 dd->ipath_sregbase = 0;
1803 dd->ipath_cregbase = 0;
1804 dd->ipath_kregsize = 0;
1805 }
1806 ipath_disable_wc(dd);
1807 }
1808
1809 if (dd->ipath_pioavailregs_dma) {
1810 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
1811 (void *) dd->ipath_pioavailregs_dma,
1812 dd->ipath_pioavailregs_phys);
1813 dd->ipath_pioavailregs_dma = NULL;
1814 }
1815
1816 if (dd->ipath_pageshadow) {
1817 struct page **tmpp = dd->ipath_pageshadow;
1818 int i, cnt = 0;
1819
1820 ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
1821 "locked\n");
1822 for (port = 0; port < dd->ipath_cfgports; port++) {
1823 int port_tidbase = port * dd->ipath_rcvtidcnt;
1824 int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
1825 for (i = port_tidbase; i < maxtid; i++) {
1826 if (!tmpp[i])
1827 continue;
1828 ipath_release_user_pages(&tmpp[i], 1);
1829 tmpp[i] = NULL;
1830 cnt++;
1831 }
1832 }
1833 if (cnt) {
1834 ipath_stats.sps_pageunlocks += cnt;
1835 ipath_cdbg(VERBOSE, "There were still %u expTID "
1836 "entries locked\n", cnt);
1837 }
1838 if (ipath_stats.sps_pagelocks ||
1839 ipath_stats.sps_pageunlocks)
1840 ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
1841 "unlocked via ipath_m{un}lock\n",
1842 (unsigned long long)
1843 ipath_stats.sps_pagelocks,
1844 (unsigned long long)
1845 ipath_stats.sps_pageunlocks);
1846
1847 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
1848 dd->ipath_pageshadow);
1849 vfree(dd->ipath_pageshadow);
1850 dd->ipath_pageshadow = NULL;
1851 }
1852
1853 /*
1854 * free any resources still in use (usually just kernel ports)
1855 * at unload
1856 */
1857 for (port = 0; port < dd->ipath_cfgports; port++)
1858 ipath_free_pddata(dd, port, 1);
1859 kfree(dd->ipath_pd);
1860 /*
1861 * debuggability, in case some cleanup path tries to use it
1862 * after this
1863 */
1864 dd->ipath_pd = NULL;
1865}
1866
1867static void __exit infinipath_cleanup(void)
1868{
1869 struct ipath_devdata *dd, *tmp;
1870 unsigned long flags;
1871
1872 ipath_exit_ipathfs();
1873
1874 ipath_driver_remove_group(&ipath_driver.driver);
1875
1876 spin_lock_irqsave(&ipath_devs_lock, flags);
1877
1878 /*
1879 * turn off rcv, send, and interrupts for all ports, all drivers
1880 * should also hard reset the chip here?
1881 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
1882 * for all versions of the driver, if they were allocated
1883 */
1884 list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
1885 spin_unlock_irqrestore(&ipath_devs_lock, flags);
1886
1887 if (dd->ipath_kregbase)
1888 cleanup_device(dd);
1889
1890 if (dd->pcidev) {
1891 if (dd->pcidev->irq) {
1892 ipath_cdbg(VERBOSE,
1893 "unit %u free_irq of irq %x\n",
1894 dd->ipath_unit, dd->pcidev->irq);
1895 free_irq(dd->pcidev->irq, dd);
1896 } else
1897 ipath_dbg("irq is 0, not doing free_irq "
1898 "for unit %u\n", dd->ipath_unit);
1899 dd->pcidev = NULL;
1900 }
1901
1902 /*
1903 * we check for NULL here, because it's outside the kregbase
1904 * check, and we need to call it after the free_irq. Thus
1905 * it's possible that the function pointers were never
1906 * initialized.
1907 */
1908 if (dd->ipath_f_cleanup)
1909 /* clean up chip-specific stuff */
1910 dd->ipath_f_cleanup(dd);
1911
1912 spin_lock_irqsave(&ipath_devs_lock, flags);
1913 }
1914
1915 spin_unlock_irqrestore(&ipath_devs_lock, flags);
1916
1917 ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
1918 pci_unregister_driver(&ipath_driver);
1919
1920 idr_destroy(&unit_table);
1921}
1922
1923/**
1924 * ipath_reset_device - reset the chip if possible
1925 * @unit: the device to reset
1926 *
1927 * Whether or not reset is successful, we attempt to re-initialize the chip
1928 * (that is, much like a driver unload/reload). We clear the INITTED flag
1929 * so that the various entry points will fail until we reinitialize. For
1930 * now, we only allow this if no user ports are open that use chip resources
1931 */
1932int ipath_reset_device(int unit)
1933{
1934 int ret, i;
1935 struct ipath_devdata *dd = ipath_lookup(unit);
1936
1937 if (!dd) {
1938 ret = -ENODEV;
1939 goto bail;
1940 }
1941
1942 dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
1943
1944 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
1945 dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
1946 "not initialized or not present\n", unit);
1947 ret = -ENXIO;
1948 goto bail;
1949 }
1950
1951 if (dd->ipath_pd)
1952 for (i = 1; i < dd->ipath_portcnt; i++) {
1953 if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
1954 ipath_dbg("unit %u port %d is in use "
1955 "(PID %u cmd %s), can't reset\n",
1956 unit, i,
1957 dd->ipath_pd[i]->port_pid,
1958 dd->ipath_pd[i]->port_comm);
1959 ret = -EBUSY;
1960 goto bail;
1961 }
1962 }
1963
1964 dd->ipath_flags &= ~IPATH_INITTED;
1965 ret = dd->ipath_f_reset(dd);
1966 if (ret != 1)
1967 ipath_dbg("reset was not successful\n");
1968 ipath_dbg("Trying to reinitialize unit %u after reset attempt\n",
1969 unit);
1970 ret = ipath_init_chip(dd, 1);
1971 if (ret)
1972 ipath_dev_err(dd, "Reinitialize unit %u after "
1973 "reset failed with %d\n", unit, ret);
1974 else
1975 dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
1976 "resetting\n", unit);
1977
1978bail:
1979 return ret;
1980}
1981
1982module_init(infinipath_init);
1983module_exit(infinipath_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
new file mode 100644
index 000000000000..f11a900e8cd7
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -0,0 +1,613 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/delay.h>
34#include <linux/pci.h>
35#include <linux/vmalloc.h>
36
37#include "ipath_kernel.h"
38
39/*
40 * InfiniPath I2C driver for a serial eeprom. This is not a generic
41 * I2C interface. For a start, the device we're using (Atmel AT24C11)
42 * doesn't work like a regular I2C device. It looks like one
43 * electrically, but not logically. Normal I2C devices have a single
44 * 7-bit or 10-bit I2C address that they respond to. Valid 7-bit
45 * addresses range from 0x03 to 0x77. Addresses 0x00 to 0x02 and 0x78
46 * to 0x7F are special reserved addresses (e.g. 0x00 is the "general
47 * call" address.) The Atmel device, on the other hand, responds to ALL
48 * 7-bit addresses. It's designed to be the only device on a given I2C
49 * bus. A 7-bit address corresponds to the memory address within the
50 * Atmel device itself.
51 *
52 * Also, the timing requirements mean more than simple software
53 * bitbanging, with readbacks from chip to ensure timing (simple udelay
54 * is not enough).
55 *
56 * This all means that accessing the device is specialized enough
57 * that using the standard kernel I2C bitbanging interface would be
58 * impossible. For example, the core I2C eeprom driver expects to find
59 * a device at one or more of a limited set of addresses only. It doesn't
60 * allow writing to an eeprom. It also doesn't provide any means of
61 * accessing eeprom contents from within the kernel, only via sysfs.
62 */
63
64enum i2c_type {
65 i2c_line_scl = 0,
66 i2c_line_sda
67};
68
69enum i2c_state {
70 i2c_line_low = 0,
71 i2c_line_high
72};
73
74#define READ_CMD 1
75#define WRITE_CMD 0
76
77static int eeprom_init;
78
79/*
80 * The gpioval manipulation really should be protected by spinlocks
81 * or be converted to use atomic operations.
82 */
83
84/**
85 * i2c_gpio_set - set a GPIO line
86 * @dd: the infinipath device
87 * @line: the line to set
88 * @new_line_state: the state to set
89 *
90 * Returns 0 if the line was set to the new state successfully, non-zero
91 * on error.
92 */
93static int i2c_gpio_set(struct ipath_devdata *dd,
94 enum i2c_type line,
95 enum i2c_state new_line_state)
96{
97 u64 read_val, write_val, mask, *gpioval;
98
99 gpioval = &dd->ipath_gpio_out;
100 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
101 if (line == i2c_line_scl)
102 mask = ipath_gpio_scl;
103 else
104 mask = ipath_gpio_sda;
105
106 if (new_line_state == i2c_line_high)
107 /* tri-state the output rather than force high */
108 write_val = read_val & ~mask;
109 else
110 /* config line to be an output */
111 write_val = read_val | mask;
112 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
113
114 /* set high and verify */
115 if (new_line_state == i2c_line_high)
116 write_val = 0x1UL;
117 else
118 write_val = 0x0UL;
119
120 if (line == i2c_line_scl) {
121 write_val <<= ipath_gpio_scl_num;
122 *gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num);
123 *gpioval |= write_val;
124 } else {
125 write_val <<= ipath_gpio_sda_num;
126 *gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num);
127 *gpioval |= write_val;
128 }
129 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
130
131 return 0;
132}
133
134/**
135 * i2c_gpio_get - get a GPIO line state
136 * @dd: the infinipath device
137 * @line: the line to get
138 * @curr_statep: where to put the line state
139 *
140 * Returns 0 if the line was set to the new state successfully, non-zero
141 * on error. curr_state is not set on error.
142 */
143static int i2c_gpio_get(struct ipath_devdata *dd,
144 enum i2c_type line,
145 enum i2c_state *curr_statep)
146{
147 u64 read_val, write_val, mask;
148 int ret;
149
150 /* check args */
151 if (curr_statep == NULL) {
152 ret = 1;
153 goto bail;
154 }
155
156 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
157 /* config line to be an input */
158 if (line == i2c_line_scl)
159 mask = ipath_gpio_scl;
160 else
161 mask = ipath_gpio_sda;
162 write_val = read_val & ~mask;
163 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
164 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
165
166 if (read_val & mask)
167 *curr_statep = i2c_line_high;
168 else
169 *curr_statep = i2c_line_low;
170
171 ret = 0;
172
173bail:
174 return ret;
175}
176
177/**
178 * i2c_wait_for_writes - wait for a write
179 * @dd: the infinipath device
180 *
181 * We use this instead of udelay directly, so we can make sure
182 * that previous register writes have been flushed all the way
183 * to the chip. Since we are delaying anyway, the cost doesn't
184 * hurt, and makes the bit twiddling more regular
185 */
186static void i2c_wait_for_writes(struct ipath_devdata *dd)
187{
188 (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
189}
190
191static void scl_out(struct ipath_devdata *dd, u8 bit)
192{
193 i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
194
195 i2c_wait_for_writes(dd);
196}
197
198static void sda_out(struct ipath_devdata *dd, u8 bit)
199{
200 i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low);
201
202 i2c_wait_for_writes(dd);
203}
204
205static u8 sda_in(struct ipath_devdata *dd, int wait)
206{
207 enum i2c_state bit;
208
209 if (i2c_gpio_get(dd, i2c_line_sda, &bit))
210 ipath_dbg("get bit failed!\n");
211
212 if (wait)
213 i2c_wait_for_writes(dd);
214
215 return bit == i2c_line_high ? 1U : 0;
216}
217
218/**
219 * i2c_ackrcv - see if ack following write is true
220 * @dd: the infinipath device
221 */
222static int i2c_ackrcv(struct ipath_devdata *dd)
223{
224 u8 ack_received;
225
226 /* AT ENTRY SCL = LOW */
227 /* change direction, ignore data */
228 ack_received = sda_in(dd, 1);
229 scl_out(dd, i2c_line_high);
230 ack_received = sda_in(dd, 1) == 0;
231 scl_out(dd, i2c_line_low);
232 return ack_received;
233}
234
235/**
236 * wr_byte - write a byte, one bit at a time
237 * @dd: the infinipath device
238 * @data: the byte to write
239 *
240 * Returns 0 if we got the following ack, otherwise 1
241 */
242static int wr_byte(struct ipath_devdata *dd, u8 data)
243{
244 int bit_cntr;
245 u8 bit;
246
247 for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
248 bit = (data >> bit_cntr) & 1;
249 sda_out(dd, bit);
250 scl_out(dd, i2c_line_high);
251 scl_out(dd, i2c_line_low);
252 }
253 return (!i2c_ackrcv(dd)) ? 1 : 0;
254}
255
256static void send_ack(struct ipath_devdata *dd)
257{
258 sda_out(dd, i2c_line_low);
259 scl_out(dd, i2c_line_high);
260 scl_out(dd, i2c_line_low);
261 sda_out(dd, i2c_line_high);
262}
263
264/**
265 * i2c_startcmd - transmit the start condition, followed by address/cmd
266 * @dd: the infinipath device
267 * @offset_dir: direction byte
268 *
269 * (both clock/data high, clock high, data low while clock is high)
270 */
271static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir)
272{
273 int res;
274
275 /* issue start sequence */
276 sda_out(dd, i2c_line_high);
277 scl_out(dd, i2c_line_high);
278 sda_out(dd, i2c_line_low);
279 scl_out(dd, i2c_line_low);
280
281 /* issue length and direction byte */
282 res = wr_byte(dd, offset_dir);
283
284 if (res)
285 ipath_cdbg(VERBOSE, "No ack to complete start\n");
286
287 return res;
288}
289
290/**
291 * stop_cmd - transmit the stop condition
292 * @dd: the infinipath device
293 *
294 * (both clock/data low, clock high, data high while clock is high)
295 */
296static void stop_cmd(struct ipath_devdata *dd)
297{
298 scl_out(dd, i2c_line_low);
299 sda_out(dd, i2c_line_low);
300 scl_out(dd, i2c_line_high);
301 sda_out(dd, i2c_line_high);
302 udelay(2);
303}
304
305/**
306 * eeprom_reset - reset I2C communication
307 * @dd: the infinipath device
308 */
309
310static int eeprom_reset(struct ipath_devdata *dd)
311{
312 int clock_cycles_left = 9;
313 u64 *gpioval = &dd->ipath_gpio_out;
314 int ret;
315
316 eeprom_init = 1;
317 *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
318 ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
319 "is %llx\n", (unsigned long long) *gpioval);
320
321 /*
322 * This is to get the i2c into a known state, by first going low,
323 * then tristate sda (and then tristate scl as first thing
324 * in loop)
325 */
326 scl_out(dd, i2c_line_low);
327 sda_out(dd, i2c_line_high);
328
329 while (clock_cycles_left--) {
330 scl_out(dd, i2c_line_high);
331
332 if (sda_in(dd, 0)) {
333 sda_out(dd, i2c_line_low);
334 scl_out(dd, i2c_line_low);
335 ret = 0;
336 goto bail;
337 }
338
339 scl_out(dd, i2c_line_low);
340 }
341
342 ret = 1;
343
344bail:
345 return ret;
346}
347
348/**
349 * ipath_eeprom_read - receives bytes from the eeprom via I2C
350 * @dd: the infinipath device
351 * @eeprom_offset: address to read from
352 * @buffer: where to store result
353 * @len: number of bytes to receive
354 */
355
356int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
357 void *buffer, int len)
358{
359 /* compiler complains unless initialized */
360 u8 single_byte = 0;
361 int bit_cntr;
362 int ret;
363
364 if (!eeprom_init)
365 eeprom_reset(dd);
366
367 eeprom_offset = (eeprom_offset << 1) | READ_CMD;
368
369 if (i2c_startcmd(dd, eeprom_offset)) {
370 ipath_dbg("Failed startcmd\n");
371 stop_cmd(dd);
372 ret = 1;
373 goto bail;
374 }
375
376 /*
377 * eeprom keeps clocking data out as long as we ack, automatically
378 * incrementing the address.
379 */
380 while (len-- > 0) {
381 /* get data */
382 single_byte = 0;
383 for (bit_cntr = 8; bit_cntr; bit_cntr--) {
384 u8 bit;
385 scl_out(dd, i2c_line_high);
386 bit = sda_in(dd, 0);
387 single_byte |= bit << (bit_cntr - 1);
388 scl_out(dd, i2c_line_low);
389 }
390
391 /* send ack if not the last byte */
392 if (len)
393 send_ack(dd);
394
395 *((u8 *) buffer) = single_byte;
396 buffer++;
397 }
398
399 stop_cmd(dd);
400
401 ret = 0;
402
403bail:
404 return ret;
405}
406
407/**
408 * ipath_eeprom_write - writes data to the eeprom via I2C
409 * @dd: the infinipath device
410 * @eeprom_offset: where to place data
411 * @buffer: data to write
412 * @len: number of bytes to write
413 */
414int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
415 const void *buffer, int len)
416{
417 u8 single_byte;
418 int sub_len;
419 const u8 *bp = buffer;
420 int max_wait_time, i;
421 int ret;
422
423 if (!eeprom_init)
424 eeprom_reset(dd);
425
426 while (len > 0) {
427 if (i2c_startcmd(dd, (eeprom_offset << 1) | WRITE_CMD)) {
428 ipath_dbg("Failed to start cmd offset %u\n",
429 eeprom_offset);
430 goto failed_write;
431 }
432
433 sub_len = min(len, 4);
434 eeprom_offset += sub_len;
435 len -= sub_len;
436
437 for (i = 0; i < sub_len; i++) {
438 if (wr_byte(dd, *bp++)) {
439 ipath_dbg("no ack after byte %u/%u (%u "
440 "total remain)\n", i, sub_len,
441 len + sub_len - i);
442 goto failed_write;
443 }
444 }
445
446 stop_cmd(dd);
447
448 /*
449 * wait for write complete by waiting for a successful
450 * read (the chip replies with a zero after the write
451 * cmd completes, and before it writes to the eeprom.
452 * The startcmd for the read will fail the ack until
453 * the writes have completed. We do this inline to avoid
454 * the debug prints that are in the real read routine
455 * if the startcmd fails.
456 */
457 max_wait_time = 100;
458 while (i2c_startcmd(dd, READ_CMD)) {
459 stop_cmd(dd);
460 if (!--max_wait_time) {
461 ipath_dbg("Did not get successful read to "
462 "complete write\n");
463 goto failed_write;
464 }
465 }
466 /* now read the zero byte */
467 for (i = single_byte = 0; i < 8; i++) {
468 u8 bit;
469 scl_out(dd, i2c_line_high);
470 bit = sda_in(dd, 0);
471 scl_out(dd, i2c_line_low);
472 single_byte <<= 1;
473 single_byte |= bit;
474 }
475 stop_cmd(dd);
476 }
477
478 ret = 0;
479 goto bail;
480
481failed_write:
482 stop_cmd(dd);
483 ret = 1;
484
485bail:
486 return ret;
487}
488
489static u8 flash_csum(struct ipath_flash *ifp, int adjust)
490{
491 u8 *ip = (u8 *) ifp;
492 u8 csum = 0, len;
493
494 for (len = 0; len < ifp->if_length; len++)
495 csum += *ip++;
496 csum -= ifp->if_csum;
497 csum = ~csum;
498 if (adjust)
499 ifp->if_csum = csum;
500
501 return csum;
502}
503
504/**
505 * ipath_get_guid - get the GUID from the i2c device
506 * @dd: the infinipath device
507 *
508 * When we add the multi-chip support, we will probably have to add
509 * the ability to use the number of guids field, and get the guid from
510 * the first chip's flash, to use for all of them.
511 */
512void ipath_get_guid(struct ipath_devdata *dd)
513{
514 void *buf;
515 struct ipath_flash *ifp;
516 __be64 guid;
517 int len;
518 u8 csum, *bguid;
519 int t = dd->ipath_unit;
520 struct ipath_devdata *dd0 = ipath_lookup(0);
521
522 if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
523 u8 *bguid, oguid;
524 dd->ipath_guid = dd0->ipath_guid;
525 bguid = (u8 *) & dd->ipath_guid;
526
527 oguid = bguid[7];
528 bguid[7] += t;
529 if (oguid > bguid[7]) {
530 if (bguid[6] == 0xff) {
531 if (bguid[5] == 0xff) {
532 ipath_dev_err(
533 dd,
534 "Can't set %s GUID from "
535 "base, wraps to OUI!\n",
536 ipath_get_unit_name(t));
537 dd->ipath_guid = 0;
538 goto bail;
539 }
540 bguid[5]++;
541 }
542 bguid[6]++;
543 }
544 dd->ipath_nguid = 1;
545
546 ipath_dbg("nguid %u, so adding %u to device 0 guid, "
547 "for %llx\n",
548 dd0->ipath_nguid, t,
549 (unsigned long long) be64_to_cpu(dd->ipath_guid));
550 goto bail;
551 }
552
553 len = offsetof(struct ipath_flash, if_future);
554 buf = vmalloc(len);
555 if (!buf) {
556 ipath_dev_err(dd, "Couldn't allocate memory to read %u "
557 "bytes from eeprom for GUID\n", len);
558 goto bail;
559 }
560
561 if (ipath_eeprom_read(dd, 0, buf, len)) {
562 ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
563 goto done;
564 }
565 ifp = (struct ipath_flash *)buf;
566
567 csum = flash_csum(ifp, 0);
568 if (csum != ifp->if_csum) {
569 dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: "
570 "0x%x, not 0x%x\n", csum, ifp->if_csum);
571 goto done;
572 }
573 if (*(__be64 *) ifp->if_guid == 0ULL ||
574 *(__be64 *) ifp->if_guid == __constant_cpu_to_be64(-1LL)) {
575 ipath_dev_err(dd, "Invalid GUID %llx from flash; "
576 "ignoring\n",
577 *(unsigned long long *) ifp->if_guid);
578 /* don't allow GUID if all 0 or all 1's */
579 goto done;
580 }
581
582 /* complain, but allow it */
583 if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
584 dev_info(&dd->pcidev->dev, "Warning, GUID %llx is "
585 "default, probably not correct!\n",
586 *(unsigned long long *) ifp->if_guid);
587
588 bguid = ifp->if_guid;
589 if (!bguid[0] && !bguid[1] && !bguid[2]) {
590 /* original incorrect GUID format in flash; fix in
591 * core copy, by shifting up 2 octets; don't need to
592 * change top octet, since both it and shifted are
593 * 0.. */
594 bguid[1] = bguid[3];
595 bguid[2] = bguid[4];
596 bguid[3] = bguid[4] = 0;
597 guid = *(__be64 *) ifp->if_guid;
598 ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, "
599 "shifting 2 octets\n");
600 } else
601 guid = *(__be64 *) ifp->if_guid;
602 dd->ipath_guid = guid;
603 dd->ipath_nguid = ifp->if_numguid;
604 memcpy(dd->ipath_serial, ifp->if_serial,
605 sizeof(ifp->if_serial));
606 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
607 (unsigned long long) be64_to_cpu(dd->ipath_guid));
608
609done:
610 vfree(buf);
611
612bail:;
613}
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
new file mode 100644
index 000000000000..c347191f02bf
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -0,0 +1,1910 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/pci.h>
34#include <linux/poll.h>
35#include <linux/cdev.h>
36#include <linux/swap.h>
37#include <linux/vmalloc.h>
38#include <asm/pgtable.h>
39
40#include "ipath_kernel.h"
41#include "ips_common.h"
42#include "ipath_layer.h"
43
44static int ipath_open(struct inode *, struct file *);
45static int ipath_close(struct inode *, struct file *);
46static ssize_t ipath_write(struct file *, const char __user *, size_t,
47 loff_t *);
48static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
49static int ipath_mmap(struct file *, struct vm_area_struct *);
50
51static struct file_operations ipath_file_ops = {
52 .owner = THIS_MODULE,
53 .write = ipath_write,
54 .open = ipath_open,
55 .release = ipath_close,
56 .poll = ipath_poll,
57 .mmap = ipath_mmap
58};
59
60static int ipath_get_base_info(struct ipath_portdata *pd,
61 void __user *ubase, size_t ubase_size)
62{
63 int ret = 0;
64 struct ipath_base_info *kinfo = NULL;
65 struct ipath_devdata *dd = pd->port_dd;
66
67 if (ubase_size < sizeof(*kinfo)) {
68 ipath_cdbg(PROC,
69 "Base size %lu, need %lu (version mismatch?)\n",
70 (unsigned long) ubase_size,
71 (unsigned long) sizeof(*kinfo));
72 ret = -EINVAL;
73 goto bail;
74 }
75
76 kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
77 if (kinfo == NULL) {
78 ret = -ENOMEM;
79 goto bail;
80 }
81
82 ret = dd->ipath_f_get_base_info(pd, kinfo);
83 if (ret < 0)
84 goto bail;
85
86 kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
87 kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
88 kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
89 kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
90 /*
91 * have to mmap whole thing
92 */
93 kinfo->spi_rcv_egrbuftotlen =
94 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
95 kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
96 kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
97 pd->port_rcvegrbuf_chunks;
98 kinfo->spi_tidcnt = dd->ipath_rcvtidcnt;
99 /*
100 * for this use, may be ipath_cfgports summed over all chips that
101 * are are configured and present
102 */
103 kinfo->spi_nports = dd->ipath_cfgports;
104 /* unit (chip/board) our port is on */
105 kinfo->spi_unit = dd->ipath_unit;
106 /* for now, only a single page */
107 kinfo->spi_tid_maxsize = PAGE_SIZE;
108
109 /*
110 * Doing this per port, and based on the skip value, etc. This has
111 * to be the actual buffer size, since the protocol code treats it
112 * as an array.
113 *
114 * These have to be set to user addresses in the user code via mmap.
115 * These values are used on return to user code for the mmap target
116 * addresses only. For 32 bit, same 44 bit address problem, so use
117 * the physical address, not virtual. Before 2.6.11, using the
118 * page_address() macro worked, but in 2.6.11, even that returns the
119 * full 64 bit address (upper bits all 1's). So far, using the
120 * physical addresses (or chip offsets, for chip mapping) works, but
121 * no doubt some future kernel release will chang that, and we'll be
122 * on to yet another method of dealing with this
123 */
124 kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
125 kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
126 kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
127 kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
128 (void *) dd->ipath_statusp -
129 (void *) dd->ipath_pioavailregs_dma;
130 kinfo->spi_piobufbase = (u64) pd->port_piobufs;
131 kinfo->__spi_uregbase =
132 dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
133
134 kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1);
135 kinfo->spi_piocnt = dd->ipath_pbufsport;
136 kinfo->spi_pioalign = dd->ipath_palign;
137
138 kinfo->spi_qpair = IPATH_KD_QP;
139 kinfo->spi_piosize = dd->ipath_ibmaxlen;
140 kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */
141 kinfo->spi_port = pd->port_port;
142 kinfo->spi_sw_version = IPATH_USER_SWVERSION;
143 kinfo->spi_hw_version = dd->ipath_revision;
144
145 if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
146 ret = -EFAULT;
147
148bail:
149 kfree(kinfo);
150 return ret;
151}
152
153/**
154 * ipath_tid_update - update a port TID
155 * @pd: the port
156 * @ti: the TID information
157 *
158 * The new implementation as of Oct 2004 is that the driver assigns
159 * the tid and returns it to the caller. To make it easier to
160 * catch bugs, and to reduce search time, we keep a cursor for
161 * each port, walking the shadow tid array to find one that's not
162 * in use.
163 *
164 * For now, if we can't allocate the full list, we fail, although
165 * in the long run, we'll allocate as many as we can, and the
166 * caller will deal with that by trying the remaining pages later.
167 * That means that when we fail, we have to mark the tids as not in
168 * use again, in our shadow copy.
169 *
170 * It's up to the caller to free the tids when they are done.
171 * We'll unlock the pages as they free them.
172 *
173 * Also, right now we are locking one page at a time, but since
174 * the intended use of this routine is for a single group of
175 * virtually contiguous pages, that should change to improve
176 * performance.
177 */
178static int ipath_tid_update(struct ipath_portdata *pd,
179 const struct ipath_tid_info *ti)
180{
181 int ret = 0, ntids;
182 u32 tid, porttid, cnt, i, tidcnt;
183 u16 *tidlist;
184 struct ipath_devdata *dd = pd->port_dd;
185 u64 physaddr;
186 unsigned long vaddr;
187 u64 __iomem *tidbase;
188 unsigned long tidmap[8];
189 struct page **pagep = NULL;
190
191 if (!dd->ipath_pageshadow) {
192 ret = -ENOMEM;
193 goto done;
194 }
195
196 cnt = ti->tidcnt;
197 if (!cnt) {
198 ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
199 (unsigned long long) ti->tidlist);
200 /*
201 * Should we treat as success? likely a bug
202 */
203 ret = -EFAULT;
204 goto done;
205 }
206 tidcnt = dd->ipath_rcvtidcnt;
207 if (cnt >= tidcnt) {
208 /* make sure it all fits in port_tid_pg_list */
209 dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
210 "TIDs, only trying max (%u)\n", cnt, tidcnt);
211 cnt = tidcnt;
212 }
213 pagep = (struct page **)pd->port_tid_pg_list;
214 tidlist = (u16 *) (&pagep[cnt]);
215
216 memset(tidmap, 0, sizeof(tidmap));
217 tid = pd->port_tidcursor;
218 /* before decrement; chip actual # */
219 porttid = pd->port_port * tidcnt;
220 ntids = tidcnt;
221 tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
222 dd->ipath_rcvtidbase +
223 porttid * sizeof(*tidbase));
224
225 ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
226 pd->port_port, cnt, tid, tidbase);
227
228 /* virtual address of first page in transfer */
229 vaddr = ti->tidvaddr;
230 if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
231 cnt * PAGE_SIZE)) {
232 ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
233 (void *)vaddr, cnt);
234 ret = -EFAULT;
235 goto done;
236 }
237 ret = ipath_get_user_pages(vaddr, cnt, pagep);
238 if (ret) {
239 if (ret == -EBUSY) {
240 ipath_dbg("Failed to lock addr %p, %u pages "
241 "(already locked)\n",
242 (void *) vaddr, cnt);
243 /*
244 * for now, continue, and see what happens but with
245 * the new implementation, this should never happen,
246 * unless perhaps the user has mpin'ed the pages
247 * themselves (something we need to test)
248 */
249 ret = 0;
250 } else {
251 dev_info(&dd->pcidev->dev,
252 "Failed to lock addr %p, %u pages: "
253 "errno %d\n", (void *) vaddr, cnt, -ret);
254 goto done;
255 }
256 }
257 for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
258 for (; ntids--; tid++) {
259 if (tid == tidcnt)
260 tid = 0;
261 if (!dd->ipath_pageshadow[porttid + tid])
262 break;
263 }
264 if (ntids < 0) {
265 /*
266 * oops, wrapped all the way through their TIDs,
267 * and didn't have enough free; see comments at
268 * start of routine
269 */
270 ipath_dbg("Not enough free TIDs for %u pages "
271 "(index %d), failing\n", cnt, i);
272 i--; /* last tidlist[i] not filled in */
273 ret = -ENOMEM;
274 break;
275 }
276 tidlist[i] = tid;
277 ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
278 "vaddr %lx\n", i, tid, vaddr);
279 /* we "know" system pages and TID pages are same size */
280 dd->ipath_pageshadow[porttid + tid] = pagep[i];
281 /*
282 * don't need atomic or it's overhead
283 */
284 __set_bit(tid, tidmap);
285 physaddr = page_to_phys(pagep[i]);
286 ipath_stats.sps_pagelocks++;
287 ipath_cdbg(VERBOSE,
288 "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
289 tid, vaddr, (unsigned long long) physaddr,
290 pagep[i]);
291 dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr);
292 /*
293 * don't check this tid in ipath_portshadow, since we
294 * just filled it in; start with the next one.
295 */
296 tid++;
297 }
298
299 if (ret) {
300 u32 limit;
301 cleanup:
302 /* jump here if copy out of updated info failed... */
303 ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
304 -ret, i, cnt);
305 /* same code that's in ipath_free_tid() */
306 limit = sizeof(tidmap) * BITS_PER_BYTE;
307 if (limit > tidcnt)
308 /* just in case size changes in future */
309 limit = tidcnt;
310 tid = find_first_bit((const unsigned long *)tidmap, limit);
311 for (; tid < limit; tid++) {
312 if (!test_bit(tid, tidmap))
313 continue;
314 if (dd->ipath_pageshadow[porttid + tid]) {
315 ipath_cdbg(VERBOSE, "Freeing TID %u\n",
316 tid);
317 dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
318 dd->ipath_tidinvalid);
319 dd->ipath_pageshadow[porttid + tid] = NULL;
320 ipath_stats.sps_pageunlocks++;
321 }
322 }
323 ipath_release_user_pages(pagep, cnt);
324 } else {
325 /*
326 * Copy the updated array, with ipath_tid's filled in, back
327 * to user. Since we did the copy in already, this "should
328 * never fail" If it does, we have to clean up...
329 */
330 if (copy_to_user((void __user *)
331 (unsigned long) ti->tidlist,
332 tidlist, cnt * sizeof(*tidlist))) {
333 ret = -EFAULT;
334 goto cleanup;
335 }
336 if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
337 tidmap, sizeof tidmap)) {
338 ret = -EFAULT;
339 goto cleanup;
340 }
341 if (tid == tidcnt)
342 tid = 0;
343 pd->port_tidcursor = tid;
344 }
345
346done:
347 if (ret)
348 ipath_dbg("Failed to map %u TID pages, failing with %d\n",
349 ti->tidcnt, -ret);
350 return ret;
351}
352
353/**
354 * ipath_tid_free - free a port TID
355 * @pd: the port
356 * @ti: the TID info
357 *
358 * right now we are unlocking one page at a time, but since
359 * the intended use of this routine is for a single group of
360 * virtually contiguous pages, that should change to improve
361 * performance. We check that the TID is in range for this port
362 * but otherwise don't check validity; if user has an error and
363 * frees the wrong tid, it's only their own data that can thereby
364 * be corrupted. We do check that the TID was in use, for sanity
365 * We always use our idea of the saved address, not the address that
366 * they pass in to us.
367 */
368
369static int ipath_tid_free(struct ipath_portdata *pd,
370 const struct ipath_tid_info *ti)
371{
372 int ret = 0;
373 u32 tid, porttid, cnt, limit, tidcnt;
374 struct ipath_devdata *dd = pd->port_dd;
375 u64 __iomem *tidbase;
376 unsigned long tidmap[8];
377
378 if (!dd->ipath_pageshadow) {
379 ret = -ENOMEM;
380 goto done;
381 }
382
383 if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
384 sizeof tidmap)) {
385 ret = -EFAULT;
386 goto done;
387 }
388
389 porttid = pd->port_port * dd->ipath_rcvtidcnt;
390 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
391 dd->ipath_rcvtidbase +
392 porttid * sizeof(*tidbase));
393
394 tidcnt = dd->ipath_rcvtidcnt;
395 limit = sizeof(tidmap) * BITS_PER_BYTE;
396 if (limit > tidcnt)
397 /* just in case size changes in future */
398 limit = tidcnt;
399 tid = find_first_bit(tidmap, limit);
400 ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
401 "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
402 limit, tid, porttid);
403 for (cnt = 0; tid < limit; tid++) {
404 /*
405 * small optimization; if we detect a run of 3 or so without
406 * any set, use find_first_bit again. That's mainly to
407 * accelerate the case where we wrapped, so we have some at
408 * the beginning, and some at the end, and a big gap
409 * in the middle.
410 */
411 if (!test_bit(tid, tidmap))
412 continue;
413 cnt++;
414 if (dd->ipath_pageshadow[porttid + tid]) {
415 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
416 pd->port_pid, tid);
417 dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
418 dd->ipath_tidinvalid);
419 ipath_release_user_pages(
420 &dd->ipath_pageshadow[porttid + tid], 1);
421 dd->ipath_pageshadow[porttid + tid] = NULL;
422 ipath_stats.sps_pageunlocks++;
423 } else
424 ipath_dbg("Unused tid %u, ignoring\n", tid);
425 }
426 if (cnt != ti->tidcnt)
427 ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
428 ti->tidcnt, cnt);
429done:
430 if (ret)
431 ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
432 ti->tidcnt, -ret);
433 return ret;
434}
435
436/**
437 * ipath_set_part_key - set a partition key
438 * @pd: the port
439 * @key: the key
440 *
441 * We can have up to 4 active at a time (other than the default, which is
442 * always allowed). This is somewhat tricky, since multiple ports may set
443 * the same key, so we reference count them, and clean up at exit. All 4
444 * partition keys are packed into a single infinipath register. It's an
445 * error for a process to set the same pkey multiple times. We provide no
446 * mechanism to de-allocate a pkey at this time, we may eventually need to
447 * do that. I've used the atomic operations, and no locking, and only make
448 * a single pass through what's available. This should be more than
449 * adequate for some time. I'll think about spinlocks or the like if and as
450 * it's necessary.
451 */
452static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
453{
454 struct ipath_devdata *dd = pd->port_dd;
455 int i, any = 0, pidx = -1;
456 u16 lkey = key & 0x7FFF;
457 int ret;
458
459 if (lkey == (IPS_DEFAULT_P_KEY & 0x7FFF)) {
460 /* nothing to do; this key always valid */
461 ret = 0;
462 goto bail;
463 }
464
465 ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
466 "%hx:%x %hx:%x %hx:%x %hx:%x\n",
467 pd->port_port, key, dd->ipath_pkeys[0],
468 atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
469 atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
470 atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
471 atomic_read(&dd->ipath_pkeyrefs[3]));
472
473 if (!lkey) {
474 ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
475 pd->port_port);
476 ret = -EINVAL;
477 goto bail;
478 }
479
480 /*
481 * Set the full membership bit, because it has to be
482 * set in the register or the packet, and it seems
483 * cleaner to set in the register than to force all
484 * callers to set it. (see bug 4331)
485 */
486 key |= 0x8000;
487
488 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
489 if (!pd->port_pkeys[i] && pidx == -1)
490 pidx = i;
491 if (pd->port_pkeys[i] == key) {
492 ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
493 "(%x) more than once\n",
494 pd->port_port, key);
495 ret = -EEXIST;
496 goto bail;
497 }
498 }
499 if (pidx == -1) {
500 ipath_dbg("All pkeys for port %u already in use, "
501 "can't set %x\n", pd->port_port, key);
502 ret = -EBUSY;
503 goto bail;
504 }
505 for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
506 if (!dd->ipath_pkeys[i]) {
507 any++;
508 continue;
509 }
510 if (dd->ipath_pkeys[i] == key) {
511 atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
512
513 if (atomic_inc_return(pkrefs) > 1) {
514 pd->port_pkeys[pidx] = key;
515 ipath_cdbg(VERBOSE, "p%u set key %x "
516 "matches #%d, count now %d\n",
517 pd->port_port, key, i,
518 atomic_read(pkrefs));
519 ret = 0;
520 goto bail;
521 } else {
522 /*
523 * lost race, decrement count, catch below
524 */
525 atomic_dec(pkrefs);
526 ipath_cdbg(VERBOSE, "Lost race, count was "
527 "0, after dec, it's %d\n",
528 atomic_read(pkrefs));
529 any++;
530 }
531 }
532 if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
533 /*
534 * It makes no sense to have both the limited and
535 * full membership PKEY set at the same time since
536 * the unlimited one will disable the limited one.
537 */
538 ret = -EEXIST;
539 goto bail;
540 }
541 }
542 if (!any) {
543 ipath_dbg("port %u, all pkeys already in use, "
544 "can't set %x\n", pd->port_port, key);
545 ret = -EBUSY;
546 goto bail;
547 }
548 for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
549 if (!dd->ipath_pkeys[i] &&
550 atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
551 u64 pkey;
552
553 /* for ipathstats, etc. */
554 ipath_stats.sps_pkeys[i] = lkey;
555 pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
556 pkey =
557 (u64) dd->ipath_pkeys[0] |
558 ((u64) dd->ipath_pkeys[1] << 16) |
559 ((u64) dd->ipath_pkeys[2] << 32) |
560 ((u64) dd->ipath_pkeys[3] << 48);
561 ipath_cdbg(PROC, "p%u set key %x in #%d, "
562 "portidx %d, new pkey reg %llx\n",
563 pd->port_port, key, i, pidx,
564 (unsigned long long) pkey);
565 ipath_write_kreg(
566 dd, dd->ipath_kregs->kr_partitionkey, pkey);
567
568 ret = 0;
569 goto bail;
570 }
571 }
572 ipath_dbg("port %u, all pkeys already in use 2nd pass, "
573 "can't set %x\n", pd->port_port, key);
574 ret = -EBUSY;
575
576bail:
577 return ret;
578}
579
580/**
581 * ipath_manage_rcvq - manage a port's receive queue
582 * @pd: the port
583 * @start_stop: action to carry out
584 *
585 * start_stop == 0 disables receive on the port, for use in queue
586 * overflow conditions. start_stop==1 re-enables, to be used to
587 * re-init the software copy of the head register
588 */
589static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
590{
591 struct ipath_devdata *dd = pd->port_dd;
592 u64 tval;
593
594 ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n",
595 start_stop ? "en" : "dis", dd->ipath_unit,
596 pd->port_port);
597 /* atomically clear receive enable port. */
598 if (start_stop) {
599 /*
600 * On enable, force in-memory copy of the tail register to
601 * 0, so that protocol code doesn't have to worry about
602 * whether or not the chip has yet updated the in-memory
603 * copy or not on return from the system call. The chip
604 * always resets it's tail register back to 0 on a
605 * transition from disabled to enabled. This could cause a
606 * problem if software was broken, and did the enable w/o
607 * the disable, but eventually the in-memory copy will be
608 * updated and correct itself, even in the face of software
609 * bugs.
610 */
611 *pd->port_rcvhdrtail_kvaddr = 0;
612 set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
613 &dd->ipath_rcvctrl);
614 } else
615 clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
616 &dd->ipath_rcvctrl);
617 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
618 dd->ipath_rcvctrl);
619 /* now be sure chip saw it before we return */
620 tval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
621 if (start_stop) {
622 /*
623 * And try to be sure that tail reg update has happened too.
624 * This should in theory interlock with the RXE changes to
625 * the tail register. Don't assign it to the tail register
626 * in memory copy, since we could overwrite an update by the
627 * chip if we did.
628 */
629 tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
630 }
631 /* always; new head should be equal to new tail; see above */
632 return 0;
633}
634
635static void ipath_clean_part_key(struct ipath_portdata *pd,
636 struct ipath_devdata *dd)
637{
638 int i, j, pchanged = 0;
639 u64 oldpkey;
640
641 /* for debugging only */
642 oldpkey = (u64) dd->ipath_pkeys[0] |
643 ((u64) dd->ipath_pkeys[1] << 16) |
644 ((u64) dd->ipath_pkeys[2] << 32) |
645 ((u64) dd->ipath_pkeys[3] << 48);
646
647 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
648 if (!pd->port_pkeys[i])
649 continue;
650 ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
651 pd->port_pkeys[i]);
652 for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
653 /* check for match independent of the global bit */
654 if ((dd->ipath_pkeys[j] & 0x7fff) !=
655 (pd->port_pkeys[i] & 0x7fff))
656 continue;
657 if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
658 ipath_cdbg(VERBOSE, "p%u clear key "
659 "%x matches #%d\n",
660 pd->port_port,
661 pd->port_pkeys[i], j);
662 ipath_stats.sps_pkeys[j] =
663 dd->ipath_pkeys[j] = 0;
664 pchanged++;
665 }
666 else ipath_cdbg(
667 VERBOSE, "p%u key %x matches #%d, "
668 "but ref still %d\n", pd->port_port,
669 pd->port_pkeys[i], j,
670 atomic_read(&dd->ipath_pkeyrefs[j]));
671 break;
672 }
673 pd->port_pkeys[i] = 0;
674 }
675 if (pchanged) {
676 u64 pkey = (u64) dd->ipath_pkeys[0] |
677 ((u64) dd->ipath_pkeys[1] << 16) |
678 ((u64) dd->ipath_pkeys[2] << 32) |
679 ((u64) dd->ipath_pkeys[3] << 48);
680 ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
681 "new pkey reg %llx\n", pd->port_port,
682 (unsigned long long) oldpkey,
683 (unsigned long long) pkey);
684 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
685 pkey);
686 }
687}
688
689/**
690 * ipath_create_user_egr - allocate eager TID buffers
691 * @pd: the port to allocate TID buffers for
692 *
693 * This routine is now quite different for user and kernel, because
694 * the kernel uses skb's, for the accelerated network performance
695 * This is the user port version
696 *
697 * Allocate the eager TID buffers and program them into infinipath
698 * They are no longer completely contiguous, we do multiple allocation
699 * calls.
700 */
701static int ipath_create_user_egr(struct ipath_portdata *pd)
702{
703 struct ipath_devdata *dd = pd->port_dd;
704 unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff;
705 size_t size;
706 int ret;
707
708 egrcnt = dd->ipath_rcvegrcnt;
709 /* TID number offset for this port */
710 egroff = pd->port_port * egrcnt;
711 egrsize = dd->ipath_rcvegrbufsize;
712 ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
713 "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
714
715 /*
716 * to avoid wasting a lot of memory, we allocate 32KB chunks of
717 * physically contiguous memory, advance through it until used up
718 * and then allocate more. Of course, we need memory to store those
719 * extra pointers, now. Started out with 256KB, but under heavy
720 * memory pressure (creating large files and then copying them over
721 * NFS while doing lots of MPI jobs), we hit some allocation
722 * failures, even though we can sleep... (2.6.10) Still get
723 * failures at 64K. 32K is the lowest we can go without waiting
724 * more memory again. It seems likely that the coalescing in
725 * free_pages, etc. still has issues (as it has had previously
726 * during 2.6.x development).
727 */
728 size = 0x8000;
729 alloced = ALIGN(egrsize * egrcnt, size);
730 egrperchunk = size / egrsize;
731 chunk = (egrcnt + egrperchunk - 1) / egrperchunk;
732 pd->port_rcvegrbuf_chunks = chunk;
733 pd->port_rcvegrbufs_perchunk = egrperchunk;
734 pd->port_rcvegrbuf_size = size;
735 pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]));
736 if (!pd->port_rcvegrbuf) {
737 ret = -ENOMEM;
738 goto bail;
739 }
740 pd->port_rcvegrbuf_phys =
741 vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]));
742 if (!pd->port_rcvegrbuf_phys) {
743 ret = -ENOMEM;
744 goto bail_rcvegrbuf;
745 }
746 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
747 /*
748 * GFP_USER, but without GFP_FS, so buffer cache can be
749 * coalesced (we hope); otherwise, even at order 4,
750 * heavy filesystem activity makes these fail
751 */
752 gfp_t gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
753
754 pd->port_rcvegrbuf[e] = dma_alloc_coherent(
755 &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
756 gfp_flags);
757
758 if (!pd->port_rcvegrbuf[e]) {
759 ret = -ENOMEM;
760 goto bail_rcvegrbuf_phys;
761 }
762 }
763
764 pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
765
766 for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
767 dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
768 unsigned i;
769
770 for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
771 dd->ipath_f_put_tid(dd, e + egroff +
772 (u64 __iomem *)
773 ((char __iomem *)
774 dd->ipath_kregbase +
775 dd->ipath_rcvegrbase), 0, pa);
776 pa += egrsize;
777 }
778 cond_resched(); /* don't hog the cpu */
779 }
780
781 ret = 0;
782 goto bail;
783
784bail_rcvegrbuf_phys:
785 for (e = 0; e < pd->port_rcvegrbuf_chunks &&
786 pd->port_rcvegrbuf[e]; e++)
787 dma_free_coherent(&dd->pcidev->dev, size,
788 pd->port_rcvegrbuf[e],
789 pd->port_rcvegrbuf_phys[e]);
790
791 vfree(pd->port_rcvegrbuf_phys);
792 pd->port_rcvegrbuf_phys = NULL;
793bail_rcvegrbuf:
794 vfree(pd->port_rcvegrbuf);
795 pd->port_rcvegrbuf = NULL;
796bail:
797 return ret;
798}
799
800static int ipath_do_user_init(struct ipath_portdata *pd,
801 const struct ipath_user_info *uinfo)
802{
803 int ret = 0;
804 struct ipath_devdata *dd = pd->port_dd;
805 u64 physaddr, uaddr, off, atmp;
806 struct page *pagep;
807 u32 head32;
808 u64 head;
809
810 /* for now, if major version is different, bail */
811 if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
812 dev_info(&dd->pcidev->dev,
813 "User major version %d not same as driver "
814 "major %d\n", uinfo->spu_userversion >> 16,
815 IPATH_USER_SWMAJOR);
816 ret = -ENODEV;
817 goto done;
818 }
819
820 if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
821 ipath_dbg("User minor version %d not same as driver "
822 "minor %d\n", uinfo->spu_userversion & 0xffff,
823 IPATH_USER_SWMINOR);
824
825 if (uinfo->spu_rcvhdrsize) {
826 ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
827 if (ret)
828 goto done;
829 }
830
831 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
832
833 /* set up for the rcvhdr Q tail register writeback to user memory */
834 if (!uinfo->spu_rcvhdraddr ||
835 !access_ok(VERIFY_WRITE, (u64 __user *) (unsigned long)
836 uinfo->spu_rcvhdraddr, sizeof(u64))) {
837 ipath_dbg("Port %d rcvhdrtail addr %llx not valid\n",
838 pd->port_port,
839 (unsigned long long) uinfo->spu_rcvhdraddr);
840 ret = -EINVAL;
841 goto done;
842 }
843
844 off = offset_in_page(uinfo->spu_rcvhdraddr);
845 uaddr = PAGE_MASK & (unsigned long) uinfo->spu_rcvhdraddr;
846 ret = ipath_get_user_pages_nocopy(uaddr, &pagep);
847 if (ret) {
848 dev_info(&dd->pcidev->dev, "Failed to lookup and lock "
849 "address %llx for rcvhdrtail: errno %d\n",
850 (unsigned long long) uinfo->spu_rcvhdraddr, -ret);
851 goto done;
852 }
853 ipath_stats.sps_pagelocks++;
854 pd->port_rcvhdrtail_uaddr = uaddr;
855 pd->port_rcvhdrtail_pagep = pagep;
856 pd->port_rcvhdrtail_kvaddr =
857 page_address(pagep);
858 pd->port_rcvhdrtail_kvaddr += off;
859 physaddr = page_to_phys(pagep) + off;
860 ipath_cdbg(VERBOSE, "port %d user addr %llx hdrtailaddr, %llx "
861 "physical (off=%llx)\n",
862 pd->port_port,
863 (unsigned long long) uinfo->spu_rcvhdraddr,
864 (unsigned long long) physaddr, (unsigned long long) off);
865 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
866 pd->port_port, physaddr);
867 atmp = ipath_read_kreg64_port(dd,
868 dd->ipath_kregs->kr_rcvhdrtailaddr,
869 pd->port_port);
870 if (physaddr != atmp) {
871 ipath_dev_err(dd,
872 "Catastrophic software error, "
873 "RcvHdrTailAddr%u written as %llx, "
874 "read back as %llx\n", pd->port_port,
875 (unsigned long long) physaddr,
876 (unsigned long long) atmp);
877 ret = -EINVAL;
878 goto done;
879 }
880
881 /* for right now, kernel piobufs are at end, so port 1 is at 0 */
882 pd->port_piobufs = dd->ipath_piobufbase +
883 dd->ipath_pbufsport * (pd->port_port -
884 1) * dd->ipath_palign;
885 ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
886 pd->port_port, pd->port_piobufs);
887
888 /*
889 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
890 * array for time being. If pd->port_port > chip-supported,
891 * we need to do extra stuff here to handle by handling overflow
892 * through port 0, someday
893 */
894 ret = ipath_create_rcvhdrq(dd, pd);
895 if (!ret)
896 ret = ipath_create_user_egr(pd);
897 if (ret)
898 goto done;
899 /* enable receives now */
900 /* atomically set enable bit for this port */
901 set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
902 &dd->ipath_rcvctrl);
903
904 /*
905 * set the head registers for this port to the current values
906 * of the tail pointers, since we don't know if they were
907 * updated on last use of the port.
908 */
909 head32 = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
910 head = (u64) head32;
911 ipath_write_ureg(dd, ur_rcvhdrhead, head, pd->port_port);
912 head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
913 ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
914 dd->ipath_lastegrheads[pd->port_port] = -1;
915 dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
916 ipath_cdbg(VERBOSE, "Wrote port%d head %llx, egrhead %x from "
917 "tail regs\n", pd->port_port,
918 (unsigned long long) head, head32);
919 pd->port_tidcursor = 0; /* start at beginning after open */
920 /*
921 * now enable the port; the tail registers will be written to memory
922 * by the chip as soon as it sees the write to
923 * dd->ipath_kregs->kr_rcvctrl. The update only happens on
924 * transition from 0 to 1, so clear it first, then set it as part of
925 * enabling the port. This will (very briefly) affect any other
926 * open ports, but it shouldn't be long enough to be an issue.
927 */
928 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
929 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
930 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
931 dd->ipath_rcvctrl);
932
933done:
934 return ret;
935}
936
937static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
938 u64 ureg)
939{
940 unsigned long phys;
941 int ret;
942
943 /* it's the real hardware, so io_remap works */
944
945 if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
946 dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
947 "%lx > PAGE\n", vma->vm_end - vma->vm_start);
948 ret = -EFAULT;
949 } else {
950 phys = dd->ipath_physaddr + ureg;
951 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
952
953 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
954 ret = io_remap_pfn_range(vma, vma->vm_start,
955 phys >> PAGE_SHIFT,
956 vma->vm_end - vma->vm_start,
957 vma->vm_page_prot);
958 }
959 return ret;
960}
961
962static int mmap_piobufs(struct vm_area_struct *vma,
963 struct ipath_devdata *dd,
964 struct ipath_portdata *pd)
965{
966 unsigned long phys;
967 int ret;
968
969 /*
970 * When we map the PIO buffers, we want to map them as writeonly, no
971 * read possible.
972 */
973
974 if ((vma->vm_end - vma->vm_start) >
975 (dd->ipath_pbufsport * dd->ipath_palign)) {
976 dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
977 "reqlen %lx > PAGE\n",
978 vma->vm_end - vma->vm_start);
979 ret = -EFAULT;
980 goto bail;
981 }
982
983 phys = dd->ipath_physaddr + pd->port_piobufs;
984 /*
985 * Do *NOT* mark this as non-cached (PWT bit), or we don't get the
986 * write combining behavior we want on the PIO buffers!
987 * vma->vm_page_prot =
988 * pgprot_noncached(vma->vm_page_prot);
989 */
990
991 if (vma->vm_flags & VM_READ) {
992 dev_info(&dd->pcidev->dev,
993 "Can't map piobufs as readable (flags=%lx)\n",
994 vma->vm_flags);
995 ret = -EPERM;
996 goto bail;
997 }
998
999 /* don't allow them to later change to readable with mprotect */
1000
1001 vma->vm_flags &= ~VM_MAYWRITE;
1002 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
1003
1004 ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
1005 vma->vm_end - vma->vm_start,
1006 vma->vm_page_prot);
1007bail:
1008 return ret;
1009}
1010
1011static int mmap_rcvegrbufs(struct vm_area_struct *vma,
1012 struct ipath_portdata *pd)
1013{
1014 struct ipath_devdata *dd = pd->port_dd;
1015 unsigned long start, size;
1016 size_t total_size, i;
1017 dma_addr_t *phys;
1018 int ret;
1019
1020 if (!pd->port_rcvegrbuf) {
1021 ret = -EFAULT;
1022 goto bail;
1023 }
1024
1025 size = pd->port_rcvegrbuf_size;
1026 total_size = pd->port_rcvegrbuf_chunks * size;
1027 if ((vma->vm_end - vma->vm_start) > total_size) {
1028 dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
1029 "reqlen %lx > actual %lx\n",
1030 vma->vm_end - vma->vm_start,
1031 (unsigned long) total_size);
1032 ret = -EFAULT;
1033 goto bail;
1034 }
1035
1036 if (vma->vm_flags & VM_WRITE) {
1037 dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
1038 "writable (flags=%lx)\n", vma->vm_flags);
1039 ret = -EPERM;
1040 goto bail;
1041 }
1042
1043 start = vma->vm_start;
1044 phys = pd->port_rcvegrbuf_phys;
1045
1046 /* don't allow them to later change to writeable with mprotect */
1047 vma->vm_flags &= ~VM_MAYWRITE;
1048
1049 for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
1050 ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
1051 size, vma->vm_page_prot);
1052 if (ret < 0)
1053 goto bail;
1054 }
1055 ret = 0;
1056
1057bail:
1058 return ret;
1059}
1060
1061static int mmap_rcvhdrq(struct vm_area_struct *vma,
1062 struct ipath_portdata *pd)
1063{
1064 struct ipath_devdata *dd = pd->port_dd;
1065 size_t total_size;
1066 int ret;
1067
1068 /*
1069 * kmalloc'ed memory, physically contiguous; this is from
1070 * spi_rcvhdr_base; we allow user to map read-write so they can
1071 * write hdrq entries to allow protocol code to directly poll
1072 * whether a hdrq entry has been written.
1073 */
1074 total_size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1075 sizeof(u32), PAGE_SIZE);
1076 if ((vma->vm_end - vma->vm_start) > total_size) {
1077 dev_info(&dd->pcidev->dev,
1078 "FAIL on rcvhdrq: reqlen %lx > actual %lx\n",
1079 vma->vm_end - vma->vm_start,
1080 (unsigned long) total_size);
1081 ret = -EFAULT;
1082 goto bail;
1083 }
1084
1085 ret = remap_pfn_range(vma, vma->vm_start,
1086 pd->port_rcvhdrq_phys >> PAGE_SHIFT,
1087 vma->vm_end - vma->vm_start,
1088 vma->vm_page_prot);
1089bail:
1090 return ret;
1091}
1092
1093static int mmap_pioavailregs(struct vm_area_struct *vma,
1094 struct ipath_portdata *pd)
1095{
1096 struct ipath_devdata *dd = pd->port_dd;
1097 int ret;
1098
1099 /*
1100 * when we map the PIO bufferavail registers, we want to map them as
1101 * readonly, no write possible.
1102 *
1103 * kmalloc'ed memory, physically contiguous, one page only, readonly
1104 */
1105
1106 if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
1107 dev_info(&dd->pcidev->dev, "FAIL on pioavailregs_dma: "
1108 "reqlen %lx > actual %lx\n",
1109 vma->vm_end - vma->vm_start,
1110 (unsigned long) PAGE_SIZE);
1111 ret = -EFAULT;
1112 goto bail;
1113 }
1114
1115 if (vma->vm_flags & VM_WRITE) {
1116 dev_info(&dd->pcidev->dev,
1117 "Can't map pioavailregs as writable (flags=%lx)\n",
1118 vma->vm_flags);
1119 ret = -EPERM;
1120 goto bail;
1121 }
1122
1123 /* don't allow them to later change with mprotect */
1124 vma->vm_flags &= ~VM_MAYWRITE;
1125
1126 ret = remap_pfn_range(vma, vma->vm_start,
1127 dd->ipath_pioavailregs_phys >> PAGE_SHIFT,
1128 PAGE_SIZE, vma->vm_page_prot);
1129bail:
1130 return ret;
1131}
1132
1133/**
1134 * ipath_mmap - mmap various structures into user space
1135 * @fp: the file pointer
1136 * @vma: the VM area
1137 *
1138 * We use this to have a shared buffer between the kernel and the user code
1139 * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
1140 * buffers in the chip. We have the open and close entries so we can bump
1141 * the ref count and keep the driver from being unloaded while still mapped.
1142 */
1143static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1144{
1145 struct ipath_portdata *pd;
1146 struct ipath_devdata *dd;
1147 u64 pgaddr, ureg;
1148 int ret;
1149
1150 pd = port_fp(fp);
1151 dd = pd->port_dd;
1152 /*
1153 * This is the ipath_do_user_init() code, mapping the shared buffers
1154 * into the user process. The address referred to by vm_pgoff is the
1155 * virtual, not physical, address; we only do one mmap for each
1156 * space mapped.
1157 */
1158 pgaddr = vma->vm_pgoff << PAGE_SHIFT;
1159
1160 /*
1161 * note that ureg does *NOT* have the kregvirt as part of it, to be
1162 * sure that for 32 bit programs, we don't end up trying to map a >
1163 * 44 address. Has to match ipath_get_base_info() code that sets
1164 * __spi_uregbase
1165 */
1166
1167 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1168
1169 ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
1170 (unsigned long long) pgaddr, vma->vm_start,
1171 vma->vm_end - vma->vm_start);
1172
1173 if (pgaddr == ureg)
1174 ret = mmap_ureg(vma, dd, ureg);
1175 else if (pgaddr == pd->port_piobufs)
1176 ret = mmap_piobufs(vma, dd, pd);
1177 else if (pgaddr == (u64) pd->port_rcvegr_phys)
1178 ret = mmap_rcvegrbufs(vma, pd);
1179 else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
1180 ret = mmap_rcvhdrq(vma, pd);
1181 else if (pgaddr == dd->ipath_pioavailregs_phys)
1182 ret = mmap_pioavailregs(vma, pd);
1183 else
1184 ret = -EINVAL;
1185
1186 vma->vm_private_data = NULL;
1187
1188 if (ret < 0)
1189 dev_info(&dd->pcidev->dev,
1190 "Failure %d on addr %lx, off %lx\n",
1191 -ret, vma->vm_start, vma->vm_pgoff);
1192
1193 return ret;
1194}
1195
1196static unsigned int ipath_poll(struct file *fp,
1197 struct poll_table_struct *pt)
1198{
1199 struct ipath_portdata *pd;
1200 u32 head, tail;
1201 int bit;
1202 struct ipath_devdata *dd;
1203
1204 pd = port_fp(fp);
1205 dd = pd->port_dd;
1206
1207 bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
1208 set_bit(bit, &dd->ipath_rcvctrl);
1209
1210 /*
1211 * Before blocking, make sure that head is still == tail,
1212 * reading from the chip, so we can be sure the interrupt
1213 * enable has made it to the chip. If not equal, disable
1214 * interrupt again and return immediately. This avoids races,
1215 * and the overhead of the chip read doesn't matter much at
1216 * this point, since we are waiting for something anyway.
1217 */
1218
1219 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1220 dd->ipath_rcvctrl);
1221
1222 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
1223 tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
1224
1225 if (tail == head) {
1226 set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1227 poll_wait(fp, &pd->port_wait, pt);
1228
1229 if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
1230 /* timed out, no packets received */
1231 clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1232 pd->port_rcvwait_to++;
1233 }
1234 }
1235 else {
1236 /* it's already happened; don't do wait_event overhead */
1237 pd->port_rcvnowait++;
1238 }
1239
1240 clear_bit(bit, &dd->ipath_rcvctrl);
1241 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1242 dd->ipath_rcvctrl);
1243
1244 return 0;
1245}
1246
1247static int try_alloc_port(struct ipath_devdata *dd, int port,
1248 struct file *fp)
1249{
1250 int ret;
1251
1252 if (!dd->ipath_pd[port]) {
1253 void *p, *ptmp;
1254
1255 p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
1256
1257 /*
1258 * Allocate memory for use in ipath_tid_update() just once
1259 * at open, not per call. Reduces cost of expected send
1260 * setup.
1261 */
1262 ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
1263 dd->ipath_rcvtidcnt * sizeof(struct page **),
1264 GFP_KERNEL);
1265 if (!p || !ptmp) {
1266 ipath_dev_err(dd, "Unable to allocate portdata "
1267 "memory, failing open\n");
1268 ret = -ENOMEM;
1269 kfree(p);
1270 kfree(ptmp);
1271 goto bail;
1272 }
1273 dd->ipath_pd[port] = p;
1274 dd->ipath_pd[port]->port_port = port;
1275 dd->ipath_pd[port]->port_dd = dd;
1276 dd->ipath_pd[port]->port_tid_pg_list = ptmp;
1277 init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
1278 }
1279 if (!dd->ipath_pd[port]->port_cnt) {
1280 dd->ipath_pd[port]->port_cnt = 1;
1281 fp->private_data = (void *) dd->ipath_pd[port];
1282 ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
1283 current->comm, current->pid, dd->ipath_unit,
1284 port);
1285 dd->ipath_pd[port]->port_pid = current->pid;
1286 strncpy(dd->ipath_pd[port]->port_comm, current->comm,
1287 sizeof(dd->ipath_pd[port]->port_comm));
1288 ipath_stats.sps_ports++;
1289 ret = 0;
1290 goto bail;
1291 }
1292 ret = -EBUSY;
1293
1294bail:
1295 return ret;
1296}
1297
1298static inline int usable(struct ipath_devdata *dd)
1299{
1300 return dd &&
1301 (dd->ipath_flags & IPATH_PRESENT) &&
1302 dd->ipath_kregbase &&
1303 dd->ipath_lid &&
1304 !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
1305 | IPATH_LINKUNK));
1306}
1307
1308static int find_free_port(int unit, struct file *fp)
1309{
1310 struct ipath_devdata *dd = ipath_lookup(unit);
1311 int ret, i;
1312
1313 if (!dd) {
1314 ret = -ENODEV;
1315 goto bail;
1316 }
1317
1318 if (!usable(dd)) {
1319 ret = -ENETDOWN;
1320 goto bail;
1321 }
1322
1323 for (i = 0; i < dd->ipath_cfgports; i++) {
1324 ret = try_alloc_port(dd, i, fp);
1325 if (ret != -EBUSY)
1326 goto bail;
1327 }
1328 ret = -EBUSY;
1329
1330bail:
1331 return ret;
1332}
1333
1334static int find_best_unit(struct file *fp)
1335{
1336 int ret = 0, i, prefunit = -1, devmax;
1337 int maxofallports, npresent, nup;
1338 int ndev;
1339
1340 (void) ipath_count_units(&npresent, &nup, &maxofallports);
1341
1342 /*
1343 * This code is present to allow a knowledgeable person to
1344 * specify the layout of processes to processors before opening
1345 * this driver, and then we'll assign the process to the "closest"
1346 * HT-400 to that processor (we assume reasonable connectivity,
1347 * for now). This code assumes that if affinity has been set
1348 * before this point, that at most one cpu is set; for now this
1349 * is reasonable. I check for both cpus_empty() and cpus_full(),
1350 * in case some kernel variant sets none of the bits when no
1351 * affinity is set. 2.6.11 and 12 kernels have all present
1352 * cpus set. Some day we'll have to fix it up further to handle
1353 * a cpu subset. This algorithm fails for two HT-400's connected
1354 * in tunnel fashion. Eventually this needs real topology
1355 * information. There may be some issues with dual core numbering
1356 * as well. This needs more work prior to release.
1357 */
1358 if (!cpus_empty(current->cpus_allowed) &&
1359 !cpus_full(current->cpus_allowed)) {
1360 int ncpus = num_online_cpus(), curcpu = -1;
1361 for (i = 0; i < ncpus; i++)
1362 if (cpu_isset(i, current->cpus_allowed)) {
1363 ipath_cdbg(PROC, "%s[%u] affinity set for "
1364 "cpu %d\n", current->comm,
1365 current->pid, i);
1366 curcpu = i;
1367 }
1368 if (curcpu != -1) {
1369 if (npresent) {
1370 prefunit = curcpu / (ncpus / npresent);
1371 ipath_dbg("%s[%u] %d chips, %d cpus, "
1372 "%d cpus/chip, select unit %d\n",
1373 current->comm, current->pid,
1374 npresent, ncpus, ncpus / npresent,
1375 prefunit);
1376 }
1377 }
1378 }
1379
1380 /*
1381 * user ports start at 1, kernel port is 0
1382 * For now, we do round-robin access across all chips
1383 */
1384
1385 if (prefunit != -1)
1386 devmax = prefunit + 1;
1387 else
1388 devmax = ipath_count_units(NULL, NULL, NULL);
1389recheck:
1390 for (i = 1; i < maxofallports; i++) {
1391 for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
1392 ndev++) {
1393 struct ipath_devdata *dd = ipath_lookup(ndev);
1394
1395 if (!usable(dd))
1396 continue; /* can't use this unit */
1397 if (i >= dd->ipath_cfgports)
1398 /*
1399 * Maxed out on users of this unit. Try
1400 * next.
1401 */
1402 continue;
1403 ret = try_alloc_port(dd, i, fp);
1404 if (!ret)
1405 goto done;
1406 }
1407 }
1408
1409 if (npresent) {
1410 if (nup == 0) {
1411 ret = -ENETDOWN;
1412 ipath_dbg("No ports available (none initialized "
1413 "and ready)\n");
1414 } else {
1415 if (prefunit > 0) {
1416 /* if started above 0, retry from 0 */
1417 ipath_cdbg(PROC,
1418 "%s[%u] no ports on prefunit "
1419 "%d, clear and re-check\n",
1420 current->comm, current->pid,
1421 prefunit);
1422 devmax = ipath_count_units(NULL, NULL,
1423 NULL);
1424 prefunit = -1;
1425 goto recheck;
1426 }
1427 ret = -EBUSY;
1428 ipath_dbg("No ports available\n");
1429 }
1430 } else {
1431 ret = -ENXIO;
1432 ipath_dbg("No boards found\n");
1433 }
1434
1435done:
1436 return ret;
1437}
1438
1439static int ipath_open(struct inode *in, struct file *fp)
1440{
1441 int ret, minor;
1442
1443 mutex_lock(&ipath_mutex);
1444
1445 minor = iminor(in);
1446 ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
1447 (long)in->i_rdev, minor);
1448
1449 if (minor)
1450 ret = find_free_port(minor - 1, fp);
1451 else
1452 ret = find_best_unit(fp);
1453
1454 mutex_unlock(&ipath_mutex);
1455 return ret;
1456}
1457
1458/**
1459 * unlock_exptid - unlock any expected TID entries port still had in use
1460 * @pd: port
1461 *
1462 * We don't actually update the chip here, because we do a bulk update
1463 * below, using ipath_f_clear_tids.
1464 */
1465static void unlock_expected_tids(struct ipath_portdata *pd)
1466{
1467 struct ipath_devdata *dd = pd->port_dd;
1468 int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
1469 int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
1470
1471 ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
1472 pd->port_port);
1473 for (i = port_tidbase; i < maxtid; i++) {
1474 if (!dd->ipath_pageshadow[i])
1475 continue;
1476
1477 ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i],
1478 1);
1479 dd->ipath_pageshadow[i] = NULL;
1480 cnt++;
1481 ipath_stats.sps_pageunlocks++;
1482 }
1483 if (cnt)
1484 ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
1485 pd->port_port, cnt);
1486
1487 if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
1488 ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
1489 (unsigned long long) ipath_stats.sps_pagelocks,
1490 (unsigned long long)
1491 ipath_stats.sps_pageunlocks);
1492}
1493
1494static int ipath_close(struct inode *in, struct file *fp)
1495{
1496 int ret = 0;
1497 struct ipath_portdata *pd;
1498 struct ipath_devdata *dd;
1499 unsigned port;
1500
1501 ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
1502 (long)in->i_rdev, fp->private_data);
1503
1504 mutex_lock(&ipath_mutex);
1505
1506 pd = port_fp(fp);
1507 port = pd->port_port;
1508 fp->private_data = NULL;
1509 dd = pd->port_dd;
1510
1511 if (pd->port_hdrqfull) {
1512 ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
1513 "during run\n", pd->port_comm, pd->port_pid,
1514 pd->port_hdrqfull);
1515 pd->port_hdrqfull = 0;
1516 }
1517
1518 if (pd->port_rcvwait_to || pd->port_piowait_to
1519 || pd->port_rcvnowait || pd->port_pionowait) {
1520 ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
1521 "%u rcv %u, pio already\n",
1522 pd->port_port, pd->port_rcvwait_to,
1523 pd->port_piowait_to, pd->port_rcvnowait,
1524 pd->port_pionowait);
1525 pd->port_rcvwait_to = pd->port_piowait_to =
1526 pd->port_rcvnowait = pd->port_pionowait = 0;
1527 }
1528 if (pd->port_flag) {
1529 ipath_dbg("port %u port_flag still set to 0x%lx\n",
1530 pd->port_port, pd->port_flag);
1531 pd->port_flag = 0;
1532 }
1533
1534 if (dd->ipath_kregbase) {
1535 if (pd->port_rcvhdrtail_uaddr) {
1536 pd->port_rcvhdrtail_uaddr = 0;
1537 pd->port_rcvhdrtail_kvaddr = NULL;
1538 ipath_release_user_pages_on_close(
1539 &pd->port_rcvhdrtail_pagep, 1);
1540 pd->port_rcvhdrtail_pagep = NULL;
1541 ipath_stats.sps_pageunlocks++;
1542 }
1543 ipath_write_kreg_port(
1544 dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
1545 port, 0ULL);
1546 ipath_write_kreg_port(
1547 dd, dd->ipath_kregs->kr_rcvhdraddr,
1548 pd->port_port, 0);
1549
1550 /* clean up the pkeys for this port user */
1551 ipath_clean_part_key(pd, dd);
1552
1553 if (port < dd->ipath_cfgports) {
1554 int i = dd->ipath_pbufsport * (port - 1);
1555 ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
1556
1557 /* atomically clear receive enable port. */
1558 clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port,
1559 &dd->ipath_rcvctrl);
1560 ipath_write_kreg(
1561 dd,
1562 dd->ipath_kregs->kr_rcvctrl,
1563 dd->ipath_rcvctrl);
1564
1565 if (dd->ipath_pageshadow)
1566 unlock_expected_tids(pd);
1567 ipath_stats.sps_ports--;
1568 ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
1569 pd->port_comm, pd->port_pid,
1570 dd->ipath_unit, port);
1571 }
1572 }
1573
1574 pd->port_cnt = 0;
1575 pd->port_pid = 0;
1576
1577 dd->ipath_f_clear_tids(dd, pd->port_port);
1578
1579 ipath_free_pddata(dd, pd->port_port, 0);
1580
1581 mutex_unlock(&ipath_mutex);
1582
1583 return ret;
1584}
1585
1586static int ipath_port_info(struct ipath_portdata *pd,
1587 struct ipath_port_info __user *uinfo)
1588{
1589 struct ipath_port_info info;
1590 int nup;
1591 int ret;
1592
1593 (void) ipath_count_units(NULL, &nup, NULL);
1594 info.num_active = nup;
1595 info.unit = pd->port_dd->ipath_unit;
1596 info.port = pd->port_port;
1597
1598 if (copy_to_user(uinfo, &info, sizeof(info))) {
1599 ret = -EFAULT;
1600 goto bail;
1601 }
1602 ret = 0;
1603
1604bail:
1605 return ret;
1606}
1607
1608static ssize_t ipath_write(struct file *fp, const char __user *data,
1609 size_t count, loff_t *off)
1610{
1611 const struct ipath_cmd __user *ucmd;
1612 struct ipath_portdata *pd;
1613 const void __user *src;
1614 size_t consumed, copy;
1615 struct ipath_cmd cmd;
1616 ssize_t ret = 0;
1617 void *dest;
1618
1619 if (count < sizeof(cmd.type)) {
1620 ret = -EINVAL;
1621 goto bail;
1622 }
1623
1624 ucmd = (const struct ipath_cmd __user *) data;
1625
1626 if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
1627 ret = -EFAULT;
1628 goto bail;
1629 }
1630
1631 consumed = sizeof(cmd.type);
1632
1633 switch (cmd.type) {
1634 case IPATH_CMD_USER_INIT:
1635 copy = sizeof(cmd.cmd.user_info);
1636 dest = &cmd.cmd.user_info;
1637 src = &ucmd->cmd.user_info;
1638 break;
1639 case IPATH_CMD_RECV_CTRL:
1640 copy = sizeof(cmd.cmd.recv_ctrl);
1641 dest = &cmd.cmd.recv_ctrl;
1642 src = &ucmd->cmd.recv_ctrl;
1643 break;
1644 case IPATH_CMD_PORT_INFO:
1645 copy = sizeof(cmd.cmd.port_info);
1646 dest = &cmd.cmd.port_info;
1647 src = &ucmd->cmd.port_info;
1648 break;
1649 case IPATH_CMD_TID_UPDATE:
1650 case IPATH_CMD_TID_FREE:
1651 copy = sizeof(cmd.cmd.tid_info);
1652 dest = &cmd.cmd.tid_info;
1653 src = &ucmd->cmd.tid_info;
1654 break;
1655 case IPATH_CMD_SET_PART_KEY:
1656 copy = sizeof(cmd.cmd.part_key);
1657 dest = &cmd.cmd.part_key;
1658 src = &ucmd->cmd.part_key;
1659 break;
1660 default:
1661 ret = -EINVAL;
1662 goto bail;
1663 }
1664
1665 if ((count - consumed) < copy) {
1666 ret = -EINVAL;
1667 goto bail;
1668 }
1669
1670 if (copy_from_user(dest, src, copy)) {
1671 ret = -EFAULT;
1672 goto bail;
1673 }
1674
1675 consumed += copy;
1676 pd = port_fp(fp);
1677
1678 switch (cmd.type) {
1679 case IPATH_CMD_USER_INIT:
1680 ret = ipath_do_user_init(pd, &cmd.cmd.user_info);
1681 if (ret < 0)
1682 goto bail;
1683 ret = ipath_get_base_info(
1684 pd, (void __user *) (unsigned long)
1685 cmd.cmd.user_info.spu_base_info,
1686 cmd.cmd.user_info.spu_base_info_size);
1687 break;
1688 case IPATH_CMD_RECV_CTRL:
1689 ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl);
1690 break;
1691 case IPATH_CMD_PORT_INFO:
1692 ret = ipath_port_info(pd,
1693 (struct ipath_port_info __user *)
1694 (unsigned long) cmd.cmd.port_info);
1695 break;
1696 case IPATH_CMD_TID_UPDATE:
1697 ret = ipath_tid_update(pd, &cmd.cmd.tid_info);
1698 break;
1699 case IPATH_CMD_TID_FREE:
1700 ret = ipath_tid_free(pd, &cmd.cmd.tid_info);
1701 break;
1702 case IPATH_CMD_SET_PART_KEY:
1703 ret = ipath_set_part_key(pd, cmd.cmd.part_key);
1704 break;
1705 }
1706
1707 if (ret >= 0)
1708 ret = consumed;
1709
1710bail:
1711 return ret;
1712}
1713
1714static struct class *ipath_class;
1715
1716static int init_cdev(int minor, char *name, struct file_operations *fops,
1717 struct cdev **cdevp, struct class_device **class_devp)
1718{
1719 const dev_t dev = MKDEV(IPATH_MAJOR, minor);
1720 struct cdev *cdev = NULL;
1721 struct class_device *class_dev = NULL;
1722 int ret;
1723
1724 cdev = cdev_alloc();
1725 if (!cdev) {
1726 printk(KERN_ERR IPATH_DRV_NAME
1727 ": Could not allocate cdev for minor %d, %s\n",
1728 minor, name);
1729 ret = -ENOMEM;
1730 goto done;
1731 }
1732
1733 cdev->owner = THIS_MODULE;
1734 cdev->ops = fops;
1735 kobject_set_name(&cdev->kobj, name);
1736
1737 ret = cdev_add(cdev, dev, 1);
1738 if (ret < 0) {
1739 printk(KERN_ERR IPATH_DRV_NAME
1740 ": Could not add cdev for minor %d, %s (err %d)\n",
1741 minor, name, -ret);
1742 goto err_cdev;
1743 }
1744
1745 class_dev = class_device_create(ipath_class, NULL, dev, NULL, name);
1746
1747 if (IS_ERR(class_dev)) {
1748 ret = PTR_ERR(class_dev);
1749 printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
1750 "class_dev for minor %d, %s (err %d)\n",
1751 minor, name, -ret);
1752 goto err_cdev;
1753 }
1754
1755 goto done;
1756
1757err_cdev:
1758 cdev_del(cdev);
1759 cdev = NULL;
1760
1761done:
1762 if (ret >= 0) {
1763 *cdevp = cdev;
1764 *class_devp = class_dev;
1765 } else {
1766 *cdevp = NULL;
1767 *class_devp = NULL;
1768 }
1769
1770 return ret;
1771}
1772
1773int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
1774 struct cdev **cdevp, struct class_device **class_devp)
1775{
1776 return init_cdev(minor, name, fops, cdevp, class_devp);
1777}
1778
1779static void cleanup_cdev(struct cdev **cdevp,
1780 struct class_device **class_devp)
1781{
1782 struct class_device *class_dev = *class_devp;
1783
1784 if (class_dev) {
1785 class_device_unregister(class_dev);
1786 *class_devp = NULL;
1787 }
1788
1789 if (*cdevp) {
1790 cdev_del(*cdevp);
1791 *cdevp = NULL;
1792 }
1793}
1794
1795void ipath_cdev_cleanup(struct cdev **cdevp,
1796 struct class_device **class_devp)
1797{
1798 cleanup_cdev(cdevp, class_devp);
1799}
1800
1801static struct cdev *wildcard_cdev;
1802static struct class_device *wildcard_class_dev;
1803
1804static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
1805
1806static int user_init(void)
1807{
1808 int ret;
1809
1810 ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
1811 if (ret < 0) {
1812 printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
1813 "chrdev region (err %d)\n", -ret);
1814 goto done;
1815 }
1816
1817 ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
1818
1819 if (IS_ERR(ipath_class)) {
1820 ret = PTR_ERR(ipath_class);
1821 printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
1822 "device class (err %d)\n", -ret);
1823 goto bail;
1824 }
1825
1826 goto done;
1827bail:
1828 unregister_chrdev_region(dev, IPATH_NMINORS);
1829done:
1830 return ret;
1831}
1832
1833static void user_cleanup(void)
1834{
1835 if (ipath_class) {
1836 class_destroy(ipath_class);
1837 ipath_class = NULL;
1838 }
1839
1840 unregister_chrdev_region(dev, IPATH_NMINORS);
1841}
1842
1843static atomic_t user_count = ATOMIC_INIT(0);
1844static atomic_t user_setup = ATOMIC_INIT(0);
1845
1846int ipath_user_add(struct ipath_devdata *dd)
1847{
1848 char name[10];
1849 int ret;
1850
1851 if (atomic_inc_return(&user_count) == 1) {
1852 ret = user_init();
1853 if (ret < 0) {
1854 ipath_dev_err(dd, "Unable to set up user support: "
1855 "error %d\n", -ret);
1856 goto bail;
1857 }
1858 ret = ipath_diag_init();
1859 if (ret < 0) {
1860 ipath_dev_err(dd, "Unable to set up diag support: "
1861 "error %d\n", -ret);
1862 goto bail_sma;
1863 }
1864
1865 ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
1866 &wildcard_class_dev);
1867 if (ret < 0) {
1868 ipath_dev_err(dd, "Could not create wildcard "
1869 "minor: error %d\n", -ret);
1870 goto bail_diag;
1871 }
1872
1873 atomic_set(&user_setup, 1);
1874 }
1875
1876 snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
1877
1878 ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
1879 &dd->cdev, &dd->class_dev);
1880 if (ret < 0)
1881 ipath_dev_err(dd, "Could not create user minor %d, %s\n",
1882 dd->ipath_unit + 1, name);
1883
1884 goto bail;
1885
1886bail_diag:
1887 ipath_diag_cleanup();
1888bail_sma:
1889 user_cleanup();
1890bail:
1891 return ret;
1892}
1893
1894void ipath_user_del(struct ipath_devdata *dd)
1895{
1896 cleanup_cdev(&dd->cdev, &dd->class_dev);
1897
1898 if (atomic_dec_return(&user_count) == 0) {
1899 if (atomic_read(&user_setup) == 0)
1900 goto bail;
1901
1902 cleanup_cdev(&wildcard_cdev, &wildcard_class_dev);
1903 ipath_diag_cleanup();
1904 user_cleanup();
1905
1906 atomic_set(&user_setup, 0);
1907 }
1908bail:
1909 return;
1910}
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
new file mode 100644
index 000000000000..e274120567e1
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -0,0 +1,605 @@
1/*
2 * Copyright (c) 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/version.h>
34#include <linux/config.h>
35#include <linux/module.h>
36#include <linux/fs.h>
37#include <linux/mount.h>
38#include <linux/pagemap.h>
39#include <linux/init.h>
40#include <linux/namei.h>
41#include <linux/pci.h>
42
43#include "ipath_kernel.h"
44
45#define IPATHFS_MAGIC 0x726a77
46
47static struct super_block *ipath_super;
48
49static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
50 int mode, struct file_operations *fops,
51 void *data)
52{
53 int error;
54 struct inode *inode = new_inode(dir->i_sb);
55
56 if (!inode) {
57 error = -EPERM;
58 goto bail;
59 }
60
61 inode->i_mode = mode;
62 inode->i_uid = 0;
63 inode->i_gid = 0;
64 inode->i_blksize = PAGE_CACHE_SIZE;
65 inode->i_blocks = 0;
66 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
67 inode->u.generic_ip = data;
68 if ((mode & S_IFMT) == S_IFDIR) {
69 inode->i_op = &simple_dir_inode_operations;
70 inode->i_nlink++;
71 dir->i_nlink++;
72 }
73
74 inode->i_fop = fops;
75
76 d_instantiate(dentry, inode);
77 error = 0;
78
79bail:
80 return error;
81}
82
83static int create_file(const char *name, mode_t mode,
84 struct dentry *parent, struct dentry **dentry,
85 struct file_operations *fops, void *data)
86{
87 int error;
88
89 *dentry = NULL;
90 mutex_lock(&parent->d_inode->i_mutex);
91 *dentry = lookup_one_len(name, parent, strlen(name));
92 if (!IS_ERR(dentry))
93 error = ipathfs_mknod(parent->d_inode, *dentry,
94 mode, fops, data);
95 else
96 error = PTR_ERR(dentry);
97 mutex_unlock(&parent->d_inode->i_mutex);
98
99 return error;
100}
101
102static ssize_t atomic_stats_read(struct file *file, char __user *buf,
103 size_t count, loff_t *ppos)
104{
105 return simple_read_from_buffer(buf, count, ppos, &ipath_stats,
106 sizeof ipath_stats);
107}
108
109static struct file_operations atomic_stats_ops = {
110 .read = atomic_stats_read,
111};
112
113#define NUM_COUNTERS sizeof(struct infinipath_counters) / sizeof(u64)
114
115static ssize_t atomic_counters_read(struct file *file, char __user *buf,
116 size_t count, loff_t *ppos)
117{
118 u64 counters[NUM_COUNTERS];
119 u16 i;
120 struct ipath_devdata *dd;
121
122 dd = file->f_dentry->d_inode->u.generic_ip;
123
124 for (i = 0; i < NUM_COUNTERS; i++)
125 counters[i] = ipath_snap_cntr(dd, i);
126
127 return simple_read_from_buffer(buf, count, ppos, counters,
128 sizeof counters);
129}
130
131static struct file_operations atomic_counters_ops = {
132 .read = atomic_counters_read,
133};
134
135static ssize_t atomic_node_info_read(struct file *file, char __user *buf,
136 size_t count, loff_t *ppos)
137{
138 u32 nodeinfo[10];
139 struct ipath_devdata *dd;
140 u64 guid;
141
142 dd = file->f_dentry->d_inode->u.generic_ip;
143
144 guid = be64_to_cpu(dd->ipath_guid);
145
146 nodeinfo[0] = /* BaseVersion is SMA */
147 /* ClassVersion is SMA */
148 (1 << 8) /* NodeType */
149 | (1 << 0); /* NumPorts */
150 nodeinfo[1] = (u32) (guid >> 32);
151 nodeinfo[2] = (u32) (guid & 0xffffffff);
152 /* PortGUID == SystemImageGUID for us */
153 nodeinfo[3] = nodeinfo[1];
154 /* PortGUID == SystemImageGUID for us */
155 nodeinfo[4] = nodeinfo[2];
156 /* PortGUID == NodeGUID for us */
157 nodeinfo[5] = nodeinfo[3];
158 /* PortGUID == NodeGUID for us */
159 nodeinfo[6] = nodeinfo[4];
160 nodeinfo[7] = (4 << 16) /* we support 4 pkeys */
161 | (dd->ipath_deviceid << 0);
162 /* our chip version as 16 bits major, 16 bits minor */
163 nodeinfo[8] = dd->ipath_minrev | (dd->ipath_majrev << 16);
164 nodeinfo[9] = (dd->ipath_unit << 24) | (dd->ipath_vendorid << 0);
165
166 return simple_read_from_buffer(buf, count, ppos, nodeinfo,
167 sizeof nodeinfo);
168}
169
170static struct file_operations atomic_node_info_ops = {
171 .read = atomic_node_info_read,
172};
173
174static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
175 size_t count, loff_t *ppos)
176{
177 u32 portinfo[13];
178 u32 tmp, tmp2;
179 struct ipath_devdata *dd;
180
181 dd = file->f_dentry->d_inode->u.generic_ip;
182
183 /* so we only initialize non-zero fields. */
184 memset(portinfo, 0, sizeof portinfo);
185
186 /*
187 * Notimpl yet M_Key (64)
188 * Notimpl yet GID (64)
189 */
190
191 portinfo[4] = (dd->ipath_lid << 16);
192
193 /*
194 * Notimpl yet SMLID (should we store this in the driver, in case
195 * SMA dies?) CapabilityMask is 0, we don't support any of these
196 * DiagCode is 0; we don't store any diag info for now Notimpl yet
197 * M_KeyLeasePeriod (we don't support M_Key)
198 */
199
200 /* LocalPortNum is whichever port number they ask for */
201 portinfo[7] = (dd->ipath_unit << 24)
202 /* LinkWidthEnabled */
203 | (2 << 16)
204 /* LinkWidthSupported (really 2, but not IB valid) */
205 | (3 << 8)
206 /* LinkWidthActive */
207 | (2 << 0);
208 tmp = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
209 tmp2 = 5;
210 if (tmp == IPATH_IBSTATE_INIT)
211 tmp = 2;
212 else if (tmp == IPATH_IBSTATE_ARM)
213 tmp = 3;
214 else if (tmp == IPATH_IBSTATE_ACTIVE)
215 tmp = 4;
216 else {
217 tmp = 0; /* down */
218 tmp2 = tmp & 0xf;
219 }
220
221 portinfo[8] = (1 << 28) /* LinkSpeedSupported */
222 | (tmp << 24) /* PortState */
223 | (tmp2 << 20) /* PortPhysicalState */
224 | (2 << 16)
225
226 /* LinkDownDefaultState */
227 /* M_KeyProtectBits == 0 */
228 /* NotImpl yet LMC == 0 (we can support all values) */
229 | (1 << 4) /* LinkSpeedActive */
230 | (1 << 0); /* LinkSpeedEnabled */
231 switch (dd->ipath_ibmtu) {
232 case 4096:
233 tmp = 5;
234 break;
235 case 2048:
236 tmp = 4;
237 break;
238 case 1024:
239 tmp = 3;
240 break;
241 case 512:
242 tmp = 2;
243 break;
244 case 256:
245 tmp = 1;
246 break;
247 default: /* oops, something is wrong */
248 ipath_dbg("Problem, ipath_ibmtu 0x%x not a valid IB MTU, "
249 "treat as 2048\n", dd->ipath_ibmtu);
250 tmp = 4;
251 break;
252 }
253 portinfo[9] = (tmp << 28)
254 /* NeighborMTU */
255 /* Notimpl MasterSMSL */
256 | (1 << 20)
257
258 /* VLCap */
259 /* Notimpl InitType (actually, an SMA decision) */
260 /* VLHighLimit is 0 (only one VL) */
261 ; /* VLArbitrationHighCap is 0 (only one VL) */
262 portinfo[10] = /* VLArbitrationLowCap is 0 (only one VL) */
263 /* InitTypeReply is SMA decision */
264 (5 << 16) /* MTUCap 4096 */
265 | (7 << 13) /* VLStallCount */
266 | (0x1f << 8) /* HOQLife */
267 | (1 << 4)
268
269 /* OperationalVLs 0 */
270 /* PartitionEnforcementInbound */
271 /* PartitionEnforcementOutbound not enforced */
272 /* FilterRawinbound not enforced */
273 ; /* FilterRawOutbound not enforced */
274 /* M_KeyViolations are not counted by hardware, SMA can count */
275 tmp = ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
276 /* P_KeyViolations are counted by hardware. */
277 portinfo[11] = ((tmp & 0xffff) << 0);
278 portinfo[12] =
279 /* Q_KeyViolations are not counted by hardware */
280 (1 << 8)
281
282 /* GUIDCap */
283 /* SubnetTimeOut handled by SMA */
284 /* RespTimeValue handled by SMA */
285 ;
286 /* LocalPhyErrors are programmed to max */
287 portinfo[12] |= (0xf << 20)
288 | (0xf << 16) /* OverRunErrors are programmed to max */
289 ;
290
291 return simple_read_from_buffer(buf, count, ppos, portinfo,
292 sizeof portinfo);
293}
294
295static struct file_operations atomic_port_info_ops = {
296 .read = atomic_port_info_read,
297};
298
299static ssize_t flash_read(struct file *file, char __user *buf,
300 size_t count, loff_t *ppos)
301{
302 struct ipath_devdata *dd;
303 ssize_t ret;
304 loff_t pos;
305 char *tmp;
306
307 pos = *ppos;
308
309 if ( pos < 0) {
310 ret = -EINVAL;
311 goto bail;
312 }
313
314 if (pos >= sizeof(struct ipath_flash)) {
315 ret = 0;
316 goto bail;
317 }
318
319 if (count > sizeof(struct ipath_flash) - pos)
320 count = sizeof(struct ipath_flash) - pos;
321
322 tmp = kmalloc(count, GFP_KERNEL);
323 if (!tmp) {
324 ret = -ENOMEM;
325 goto bail;
326 }
327
328 dd = file->f_dentry->d_inode->u.generic_ip;
329 if (ipath_eeprom_read(dd, pos, tmp, count)) {
330 ipath_dev_err(dd, "failed to read from flash\n");
331 ret = -ENXIO;
332 goto bail_tmp;
333 }
334
335 if (copy_to_user(buf, tmp, count)) {
336 ret = -EFAULT;
337 goto bail_tmp;
338 }
339
340 *ppos = pos + count;
341 ret = count;
342
343bail_tmp:
344 kfree(tmp);
345
346bail:
347 return ret;
348}
349
350static ssize_t flash_write(struct file *file, const char __user *buf,
351 size_t count, loff_t *ppos)
352{
353 struct ipath_devdata *dd;
354 ssize_t ret;
355 loff_t pos;
356 char *tmp;
357
358 pos = *ppos;
359
360 if ( pos < 0) {
361 ret = -EINVAL;
362 goto bail;
363 }
364
365 if (pos >= sizeof(struct ipath_flash)) {
366 ret = 0;
367 goto bail;
368 }
369
370 if (count > sizeof(struct ipath_flash) - pos)
371 count = sizeof(struct ipath_flash) - pos;
372
373 tmp = kmalloc(count, GFP_KERNEL);
374 if (!tmp) {
375 ret = -ENOMEM;
376 goto bail;
377 }
378
379 if (copy_from_user(tmp, buf, count)) {
380 ret = -EFAULT;
381 goto bail_tmp;
382 }
383
384 dd = file->f_dentry->d_inode->u.generic_ip;
385 if (ipath_eeprom_write(dd, pos, tmp, count)) {
386 ret = -ENXIO;
387 ipath_dev_err(dd, "failed to write to flash\n");
388 goto bail_tmp;
389 }
390
391 *ppos = pos + count;
392 ret = count;
393
394bail_tmp:
395 kfree(tmp);
396
397bail:
398 return ret;
399}
400
401static struct file_operations flash_ops = {
402 .read = flash_read,
403 .write = flash_write,
404};
405
406static int create_device_files(struct super_block *sb,
407 struct ipath_devdata *dd)
408{
409 struct dentry *dir, *tmp;
410 char unit[10];
411 int ret;
412
413 snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
414 ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
415 (struct file_operations *) &simple_dir_operations,
416 dd);
417 if (ret) {
418 printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
419 goto bail;
420 }
421
422 ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp,
423 &atomic_counters_ops, dd);
424 if (ret) {
425 printk(KERN_ERR "create_file(%s/atomic_counters) "
426 "failed: %d\n", unit, ret);
427 goto bail;
428 }
429
430 ret = create_file("node_info", S_IFREG|S_IRUGO, dir, &tmp,
431 &atomic_node_info_ops, dd);
432 if (ret) {
433 printk(KERN_ERR "create_file(%s/node_info) "
434 "failed: %d\n", unit, ret);
435 goto bail;
436 }
437
438 ret = create_file("port_info", S_IFREG|S_IRUGO, dir, &tmp,
439 &atomic_port_info_ops, dd);
440 if (ret) {
441 printk(KERN_ERR "create_file(%s/port_info) "
442 "failed: %d\n", unit, ret);
443 goto bail;
444 }
445
446 ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
447 &flash_ops, dd);
448 if (ret) {
449 printk(KERN_ERR "create_file(%s/flash) "
450 "failed: %d\n", unit, ret);
451 goto bail;
452 }
453
454bail:
455 return ret;
456}
457
458static void remove_file(struct dentry *parent, char *name)
459{
460 struct dentry *tmp;
461
462 tmp = lookup_one_len(name, parent, strlen(name));
463
464 spin_lock(&dcache_lock);
465 spin_lock(&tmp->d_lock);
466 if (!(d_unhashed(tmp) && tmp->d_inode)) {
467 dget_locked(tmp);
468 __d_drop(tmp);
469 spin_unlock(&tmp->d_lock);
470 spin_unlock(&dcache_lock);
471 simple_unlink(parent->d_inode, tmp);
472 } else {
473 spin_unlock(&tmp->d_lock);
474 spin_unlock(&dcache_lock);
475 }
476}
477
478static int remove_device_files(struct super_block *sb,
479 struct ipath_devdata *dd)
480{
481 struct dentry *dir, *root;
482 char unit[10];
483 int ret;
484
485 root = dget(sb->s_root);
486 mutex_lock(&root->d_inode->i_mutex);
487 snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
488 dir = lookup_one_len(unit, root, strlen(unit));
489
490 if (IS_ERR(dir)) {
491 ret = PTR_ERR(dir);
492 printk(KERN_ERR "Lookup of %s failed\n", unit);
493 goto bail;
494 }
495
496 remove_file(dir, "flash");
497 remove_file(dir, "port_info");
498 remove_file(dir, "node_info");
499 remove_file(dir, "atomic_counters");
500 d_delete(dir);
501 ret = simple_rmdir(root->d_inode, dir);
502
503bail:
504 mutex_unlock(&root->d_inode->i_mutex);
505 dput(root);
506 return ret;
507}
508
509static int ipathfs_fill_super(struct super_block *sb, void *data,
510 int silent)
511{
512 struct ipath_devdata *dd, *tmp;
513 unsigned long flags;
514 int ret;
515
516 static struct tree_descr files[] = {
517 [1] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
518 {""},
519 };
520
521 ret = simple_fill_super(sb, IPATHFS_MAGIC, files);
522 if (ret) {
523 printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
524 goto bail;
525 }
526
527 spin_lock_irqsave(&ipath_devs_lock, flags);
528
529 list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
530 spin_unlock_irqrestore(&ipath_devs_lock, flags);
531 ret = create_device_files(sb, dd);
532 if (ret) {
533 deactivate_super(sb);
534 goto bail;
535 }
536 spin_lock_irqsave(&ipath_devs_lock, flags);
537 }
538
539 spin_unlock_irqrestore(&ipath_devs_lock, flags);
540
541bail:
542 return ret;
543}
544
545static struct super_block *ipathfs_get_sb(struct file_system_type *fs_type,
546 int flags, const char *dev_name,
547 void *data)
548{
549 ipath_super = get_sb_single(fs_type, flags, data,
550 ipathfs_fill_super);
551 return ipath_super;
552}
553
554static void ipathfs_kill_super(struct super_block *s)
555{
556 kill_litter_super(s);
557 ipath_super = NULL;
558}
559
560int ipathfs_add_device(struct ipath_devdata *dd)
561{
562 int ret;
563
564 if (ipath_super == NULL) {
565 ret = 0;
566 goto bail;
567 }
568
569 ret = create_device_files(ipath_super, dd);
570
571bail:
572 return ret;
573}
574
575int ipathfs_remove_device(struct ipath_devdata *dd)
576{
577 int ret;
578
579 if (ipath_super == NULL) {
580 ret = 0;
581 goto bail;
582 }
583
584 ret = remove_device_files(ipath_super, dd);
585
586bail:
587 return ret;
588}
589
590static struct file_system_type ipathfs_fs_type = {
591 .owner = THIS_MODULE,
592 .name = "ipathfs",
593 .get_sb = ipathfs_get_sb,
594 .kill_sb = ipathfs_kill_super,
595};
596
597int __init ipath_init_ipathfs(void)
598{
599 return register_filesystem(&ipathfs_fs_type);
600}
601
602void __exit ipath_exit_ipathfs(void)
603{
604 unregister_filesystem(&ipathfs_fs_type);
605}
diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_ht400.c
new file mode 100644
index 000000000000..4652435998f3
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_ht400.c
@@ -0,0 +1,1586 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33/*
34 * This file contains all of the code that is specific to the InfiniPath
35 * HT-400 chip.
36 */
37
38#include <linux/pci.h>
39#include <linux/delay.h>
40
41#include "ipath_kernel.h"
42#include "ipath_registers.h"
43
44/*
45 * This lists the InfiniPath HT400 registers, in the actual chip layout.
46 * This structure should never be directly accessed.
47 *
48 * The names are in InterCap form because they're taken straight from
49 * the chip specification. Since they're only used in this file, they
50 * don't pollute the rest of the source.
51*/
52
53struct _infinipath_do_not_use_kernel_regs {
54 unsigned long long Revision;
55 unsigned long long Control;
56 unsigned long long PageAlign;
57 unsigned long long PortCnt;
58 unsigned long long DebugPortSelect;
59 unsigned long long DebugPort;
60 unsigned long long SendRegBase;
61 unsigned long long UserRegBase;
62 unsigned long long CounterRegBase;
63 unsigned long long Scratch;
64 unsigned long long ReservedMisc1;
65 unsigned long long InterruptConfig;
66 unsigned long long IntBlocked;
67 unsigned long long IntMask;
68 unsigned long long IntStatus;
69 unsigned long long IntClear;
70 unsigned long long ErrorMask;
71 unsigned long long ErrorStatus;
72 unsigned long long ErrorClear;
73 unsigned long long HwErrMask;
74 unsigned long long HwErrStatus;
75 unsigned long long HwErrClear;
76 unsigned long long HwDiagCtrl;
77 unsigned long long MDIO;
78 unsigned long long IBCStatus;
79 unsigned long long IBCCtrl;
80 unsigned long long ExtStatus;
81 unsigned long long ExtCtrl;
82 unsigned long long GPIOOut;
83 unsigned long long GPIOMask;
84 unsigned long long GPIOStatus;
85 unsigned long long GPIOClear;
86 unsigned long long RcvCtrl;
87 unsigned long long RcvBTHQP;
88 unsigned long long RcvHdrSize;
89 unsigned long long RcvHdrCnt;
90 unsigned long long RcvHdrEntSize;
91 unsigned long long RcvTIDBase;
92 unsigned long long RcvTIDCnt;
93 unsigned long long RcvEgrBase;
94 unsigned long long RcvEgrCnt;
95 unsigned long long RcvBufBase;
96 unsigned long long RcvBufSize;
97 unsigned long long RxIntMemBase;
98 unsigned long long RxIntMemSize;
99 unsigned long long RcvPartitionKey;
100 unsigned long long ReservedRcv[10];
101 unsigned long long SendCtrl;
102 unsigned long long SendPIOBufBase;
103 unsigned long long SendPIOSize;
104 unsigned long long SendPIOBufCnt;
105 unsigned long long SendPIOAvailAddr;
106 unsigned long long TxIntMemBase;
107 unsigned long long TxIntMemSize;
108 unsigned long long ReservedSend[9];
109 unsigned long long SendBufferError;
110 unsigned long long SendBufferErrorCONT1;
111 unsigned long long SendBufferErrorCONT2;
112 unsigned long long SendBufferErrorCONT3;
113 unsigned long long ReservedSBE[4];
114 unsigned long long RcvHdrAddr0;
115 unsigned long long RcvHdrAddr1;
116 unsigned long long RcvHdrAddr2;
117 unsigned long long RcvHdrAddr3;
118 unsigned long long RcvHdrAddr4;
119 unsigned long long RcvHdrAddr5;
120 unsigned long long RcvHdrAddr6;
121 unsigned long long RcvHdrAddr7;
122 unsigned long long RcvHdrAddr8;
123 unsigned long long ReservedRHA[7];
124 unsigned long long RcvHdrTailAddr0;
125 unsigned long long RcvHdrTailAddr1;
126 unsigned long long RcvHdrTailAddr2;
127 unsigned long long RcvHdrTailAddr3;
128 unsigned long long RcvHdrTailAddr4;
129 unsigned long long RcvHdrTailAddr5;
130 unsigned long long RcvHdrTailAddr6;
131 unsigned long long RcvHdrTailAddr7;
132 unsigned long long RcvHdrTailAddr8;
133 unsigned long long ReservedRHTA[7];
134 unsigned long long Sync; /* Software only */
135 unsigned long long Dump; /* Software only */
136 unsigned long long SimVer; /* Software only */
137 unsigned long long ReservedSW[5];
138 unsigned long long SerdesConfig0;
139 unsigned long long SerdesConfig1;
140 unsigned long long SerdesStatus;
141 unsigned long long XGXSConfig;
142 unsigned long long ReservedSW2[4];
143};
144
145#define IPATH_KREG_OFFSET(field) (offsetof(struct \
146 _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
147#define IPATH_CREG_OFFSET(field) (offsetof( \
148 struct infinipath_counters, field) / sizeof(u64))
149
150static const struct ipath_kregs ipath_ht_kregs = {
151 .kr_control = IPATH_KREG_OFFSET(Control),
152 .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
153 .kr_debugport = IPATH_KREG_OFFSET(DebugPort),
154 .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
155 .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
156 .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
157 .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
158 .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
159 .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
160 .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
161 .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
162 .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
163 .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
164 .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
165 .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
166 .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
167 .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
168 .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
169 .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
170 .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
171 .kr_intclear = IPATH_KREG_OFFSET(IntClear),
172 .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig),
173 .kr_intmask = IPATH_KREG_OFFSET(IntMask),
174 .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
175 .kr_mdio = IPATH_KREG_OFFSET(MDIO),
176 .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
177 .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
178 .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
179 .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
180 .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
181 .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
182 .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
183 .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
184 .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
185 .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
186 .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
187 .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
188 .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
189 .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
190 .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
191 .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
192 .kr_revision = IPATH_KREG_OFFSET(Revision),
193 .kr_scratch = IPATH_KREG_OFFSET(Scratch),
194 .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
195 .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
196 .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
197 .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
198 .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
199 .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
200 .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
201 .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
202 .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
203 .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
204 .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
205 .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
206 .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
207 .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
208 /*
209 * These should not be used directly via ipath_read_kreg64(),
210 * use them with ipath_read_kreg64_port(),
211 */
212 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
213 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
214};
215
216static const struct ipath_cregs ipath_ht_cregs = {
217 .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
218 .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
219 .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
220 .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
221 .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
222 .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
223 .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
224 .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
225 .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
226 .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
227 .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
228 .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
229 /* calc from Reg_CounterRegBase + offset */
230 .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
231 .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
232 .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
233 .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
234 .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
235 .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
236 .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
237 .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
238 .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
239 .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
240 .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
241 .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
242 .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
243 .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
244 .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
245 .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
246 .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
247 .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
248 .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
249 .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
250 .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
251};
252
253/* kr_intstatus, kr_intclear, kr_intmask bits */
254#define INFINIPATH_I_RCVURG_MASK 0x1FF
255#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF
256
257/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
258#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
259#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL
260#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR 0x0000000000800000ULL
261#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR 0x0000000001000000ULL
262#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR 0x0000000002000000ULL
263#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR 0x0000000004000000ULL
264#define INFINIPATH_HWE_HTCMISCERR4 0x0000000008000000ULL
265#define INFINIPATH_HWE_HTCMISCERR5 0x0000000010000000ULL
266#define INFINIPATH_HWE_HTCMISCERR6 0x0000000020000000ULL
267#define INFINIPATH_HWE_HTCMISCERR7 0x0000000040000000ULL
268#define INFINIPATH_HWE_HTCBUSTREQPARITYERR 0x0000000080000000ULL
269#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL
270#define INFINIPATH_HWE_HTCBUSIREQPARITYERR 0x0000000200000000ULL
271#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
272#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
273#define INFINIPATH_HWE_HTBPLL_FBSLIP 0x0200000000000000ULL
274#define INFINIPATH_HWE_HTBPLL_RFSLIP 0x0400000000000000ULL
275#define INFINIPATH_HWE_HTAPLL_FBSLIP 0x0800000000000000ULL
276#define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL
277#define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL
278
279/* kr_extstatus bits */
280#define INFINIPATH_EXTS_FREQSEL 0x2
281#define INFINIPATH_EXTS_SERDESSEL 0x4
282#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
283#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
284
285/*
286 * masks and bits that are different in different chips, or present only
287 * in one
288 */
289static const ipath_err_t infinipath_hwe_htcmemparityerr_mask =
290 INFINIPATH_HWE_HTCMEMPARITYERR_MASK;
291static const ipath_err_t infinipath_hwe_htcmemparityerr_shift =
292 INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT;
293
294static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr =
295 INFINIPATH_HWE_HTCLNKABYTE0CRCERR;
296static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr =
297 INFINIPATH_HWE_HTCLNKABYTE1CRCERR;
298static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr =
299 INFINIPATH_HWE_HTCLNKBBYTE0CRCERR;
300static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
301 INFINIPATH_HWE_HTCLNKBBYTE1CRCERR;
302
303#define _IPATH_GPIO_SDA_NUM 1
304#define _IPATH_GPIO_SCL_NUM 0
305
306#define IPATH_GPIO_SDA \
307 (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
308#define IPATH_GPIO_SCL \
309 (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
310
311/* keep the code below somewhat more readonable; not used elsewhere */
312#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \
313 infinipath_hwe_htclnkabyte1crcerr)
314#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \
315 infinipath_hwe_htclnkbbyte1crcerr)
316#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \
317 infinipath_hwe_htclnkbbyte0crcerr)
318#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr | \
319 infinipath_hwe_htclnkbbyte1crcerr)
320
321static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
322 char *msg, size_t msgl)
323{
324 char bitsmsg[64];
325 ipath_err_t crcbits = hwerrs &
326 (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS);
327 /* don't check if 8bit HT */
328 if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
329 crcbits &= ~infinipath_hwe_htclnkabyte1crcerr;
330 /* don't check if 8bit HT */
331 if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
332 crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr;
333 /*
334 * we'll want to ignore link errors on link that is
335 * not in use, if any. For now, complain about both
336 */
337 if (crcbits) {
338 u16 ctrl0, ctrl1;
339 snprintf(bitsmsg, sizeof bitsmsg,
340 "[HT%s lane %s CRC (%llx); ignore till reload]",
341 !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
342 "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
343 ? "1 (B)" : "0+1 (A+B)"),
344 !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0"
345 : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" :
346 "0+1"), (unsigned long long) crcbits);
347 strlcat(msg, bitsmsg, msgl);
348
349 /*
350 * print extra info for debugging. slave/primary
351 * config word 4, 8 (link control 0, 1)
352 */
353
354 if (pci_read_config_word(dd->pcidev,
355 dd->ipath_ht_slave_off + 0x4,
356 &ctrl0))
357 dev_info(&dd->pcidev->dev, "Couldn't read "
358 "linkctrl0 of slave/primary "
359 "config block\n");
360 else if (!(ctrl0 & 1 << 6))
361 /* not if EOC bit set */
362 ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0,
363 ((ctrl0 >> 8) & 7) ? " CRC" : "",
364 ((ctrl0 >> 4) & 1) ? "linkfail" :
365 "");
366 if (pci_read_config_word(dd->pcidev,
367 dd->ipath_ht_slave_off + 0x8,
368 &ctrl1))
369 dev_info(&dd->pcidev->dev, "Couldn't read "
370 "linkctrl1 of slave/primary "
371 "config block\n");
372 else if (!(ctrl1 & 1 << 6))
373 /* not if EOC bit set */
374 ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1,
375 ((ctrl1 >> 8) & 7) ? " CRC" : "",
376 ((ctrl1 >> 4) & 1) ? "linkfail" :
377 "");
378
379 /* disable until driver reloaded */
380 dd->ipath_hwerrmask &= ~crcbits;
381 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
382 dd->ipath_hwerrmask);
383 ipath_dbg("HT crc errs: %s\n", msg);
384 } else
385 ipath_dbg("ignoring HT crc errors 0x%llx, "
386 "not in use\n", (unsigned long long)
387 (hwerrs & (_IPATH_HTLINK0_CRCBITS |
388 _IPATH_HTLINK1_CRCBITS)));
389}
390
391/**
392 * ipath_ht_handle_hwerrors - display hardware errors
393 * @dd: the infinipath device
394 * @msg: the output buffer
395 * @msgl: the size of the output buffer
396 *
397 * Use same msg buffer as regular errors to avoid
398 * excessive stack use. Most hardware errors are catastrophic, but for
399 * right now, we'll print them and continue.
400 * We reuse the same message buffer as ipath_handle_errors() to avoid
401 * excessive stack usage.
402 */
403static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
404 size_t msgl)
405{
406 ipath_err_t hwerrs;
407 u32 bits, ctrl;
408 int isfatal = 0;
409 char bitsmsg[64];
410
411 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
412
413 if (!hwerrs) {
414 ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
415 /*
416 * better than printing cofusing messages
417 * This seems to be related to clearing the crc error, or
418 * the pll error during init.
419 */
420 goto bail;
421 } else if (hwerrs == -1LL) {
422 ipath_dev_err(dd, "Read of hardware error status failed "
423 "(all bits set); ignoring\n");
424 goto bail;
425 }
426 ipath_stats.sps_hwerrs++;
427
428 /* Always clear the error status register, except MEMBISTFAIL,
429 * regardless of whether we continue or stop using the chip.
430 * We want that set so we know it failed, even across driver reload.
431 * We'll still ignore it in the hwerrmask. We do this partly for
432 * diagnostics, but also for support */
433 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
434 hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
435
436 hwerrs &= dd->ipath_hwerrmask;
437
438 /*
439 * make sure we get this much out, unless told to be quiet,
440 * or it's occurred within the last 5 seconds
441 */
442 if ((hwerrs & ~dd->ipath_lasthwerror) ||
443 (ipath_debug & __IPATH_VERBDBG))
444 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
445 "(cleared)\n", (unsigned long long) hwerrs);
446 dd->ipath_lasthwerror |= hwerrs;
447
448 if (hwerrs & ~infinipath_hwe_bitsextant)
449 ipath_dev_err(dd, "hwerror interrupt with unknown errors "
450 "%llx set\n", (unsigned long long)
451 (hwerrs & ~infinipath_hwe_bitsextant));
452
453 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
454 if (ctrl & INFINIPATH_C_FREEZEMODE) {
455 if (hwerrs) {
456 /*
457 * if any set that we aren't ignoring; only
458 * make the complaint once, in case it's stuck
459 * or recurring, and we get here multiple
460 * times.
461 */
462 if (dd->ipath_flags & IPATH_INITTED) {
463 ipath_dev_err(dd, "Fatal Error (freeze "
464 "mode), no longer usable\n");
465 isfatal = 1;
466 }
467 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
468 /* mark as having had error */
469 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
470 /*
471 * mark as not usable, at a minimum until driver
472 * is reloaded, probably until reboot, since no
473 * other reset is possible.
474 */
475 dd->ipath_flags &= ~IPATH_INITTED;
476 } else {
477 ipath_dbg("Clearing freezemode on ignored hardware "
478 "error\n");
479 ctrl &= ~INFINIPATH_C_FREEZEMODE;
480 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
481 ctrl);
482 }
483 }
484
485 *msg = '\0';
486
487 /*
488 * may someday want to decode into which bits are which
489 * functional area for parity errors, etc.
490 */
491 if (hwerrs & (infinipath_hwe_htcmemparityerr_mask
492 << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) {
493 bits = (u32) ((hwerrs >>
494 INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) &
495 INFINIPATH_HWE_HTCMEMPARITYERR_MASK);
496 snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ",
497 bits);
498 strlcat(msg, bitsmsg, msgl);
499 }
500 if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
501 << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
502 bits = (u32) ((hwerrs >>
503 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
504 INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
505 snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
506 bits);
507 strlcat(msg, bitsmsg, msgl);
508 }
509 if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
510 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
511 bits = (u32) ((hwerrs >>
512 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
513 INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
514 snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
515 bits);
516 strlcat(msg, bitsmsg, msgl);
517 }
518 if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
519 strlcat(msg, "[IB2IPATH Parity]", msgl);
520 if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
521 strlcat(msg, "[IPATH2IB Parity]", msgl);
522 if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR)
523 strlcat(msg, "[HTC Ireq Parity]", msgl);
524 if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR)
525 strlcat(msg, "[HTC Treq Parity]", msgl);
526 if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR)
527 strlcat(msg, "[HTC Tresp Parity]", msgl);
528
529 if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
530 hwerr_crcbits(dd, hwerrs, msg, msgl);
531
532 if (hwerrs & INFINIPATH_HWE_HTCMISCERR5)
533 strlcat(msg, "[HT core Misc5]", msgl);
534 if (hwerrs & INFINIPATH_HWE_HTCMISCERR6)
535 strlcat(msg, "[HT core Misc6]", msgl);
536 if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
537 strlcat(msg, "[HT core Misc7]", msgl);
538 if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
539 strlcat(msg, "[Memory BIST test failed, HT-400 unusable]",
540 msgl);
541 /* ignore from now on, so disable until driver reloaded */
542 dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
543 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
544 dd->ipath_hwerrmask);
545 }
546#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
547 INFINIPATH_HWE_COREPLL_RFSLIP | \
548 INFINIPATH_HWE_HTBPLL_FBSLIP | \
549 INFINIPATH_HWE_HTBPLL_RFSLIP | \
550 INFINIPATH_HWE_HTAPLL_FBSLIP | \
551 INFINIPATH_HWE_HTAPLL_RFSLIP)
552
553 if (hwerrs & _IPATH_PLL_FAIL) {
554 snprintf(bitsmsg, sizeof bitsmsg,
555 "[PLL failed (%llx), HT-400 unusable]",
556 (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
557 strlcat(msg, bitsmsg, msgl);
558 /* ignore from now on, so disable until driver reloaded */
559 dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
560 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
561 dd->ipath_hwerrmask);
562 }
563
564 if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
565 /*
566 * If it occurs, it is left masked since the eternal
567 * interface is unused
568 */
569 dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
570 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
571 dd->ipath_hwerrmask);
572 }
573
574 if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
575 strlcat(msg, "[Rx Dsync]", msgl);
576 if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
577 strlcat(msg, "[SerDes PLL]", msgl);
578
579 ipath_dev_err(dd, "%s hardware error\n", msg);
580 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
581 /*
582 * for status file; if no trailing brace is copied,
583 * we'll know it was truncated.
584 */
585 snprintf(dd->ipath_freezemsg,
586 dd->ipath_freezelen, "{%s}", msg);
587
588bail:;
589}
590
591/**
592 * ipath_ht_boardname - fill in the board name
593 * @dd: the infinipath device
594 * @name: the output buffer
595 * @namelen: the size of the output buffer
596 *
597 * fill in the board name, based on the board revision register
598 */
599static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
600 size_t namelen)
601{
602 char *n = NULL;
603 u8 boardrev = dd->ipath_boardrev;
604 int ret;
605
606 switch (boardrev) {
607 case 4: /* Ponderosa is one of the bringup boards */
608 n = "Ponderosa";
609 break;
610 case 5: /* HT-460 original production board */
611 n = "InfiniPath_HT-460";
612 break;
613 case 6:
614 n = "OEM_Board_3";
615 break;
616 case 7:
617 /* HT-460 small form factor production board */
618 n = "InfiniPath_HT-465";
619 break;
620 case 8:
621 n = "LS/X-1";
622 break;
623 case 9: /* Comstock bringup test board */
624 n = "Comstock";
625 break;
626 case 10:
627 n = "OEM_Board_2";
628 break;
629 case 11:
630 n = "InfiniPath_HT-470";
631 break;
632 case 12:
633 n = "OEM_Board_4";
634 break;
635 default: /* don't know, just print the number */
636 ipath_dev_err(dd, "Don't yet know about board "
637 "with ID %u\n", boardrev);
638 snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u",
639 boardrev);
640 break;
641 }
642 if (n)
643 snprintf(name, namelen, "%s", n);
644
645 if (dd->ipath_majrev != 3 || dd->ipath_minrev != 2) {
646 /*
647 * This version of the driver only supports the HT-400
648 * Rev 3.2
649 */
650 ipath_dev_err(dd,
651 "Unsupported HT-400 revision %u.%u!\n",
652 dd->ipath_majrev, dd->ipath_minrev);
653 ret = 1;
654 goto bail;
655 }
656 /*
657 * pkt/word counters are 32 bit, and therefore wrap fast enough
658 * that we snapshot them from a timer, and maintain 64 bit shadow
659 * copies
660 */
661 dd->ipath_flags |= IPATH_32BITCOUNTERS;
662 if (dd->ipath_htspeed != 800)
663 ipath_dev_err(dd,
664 "Incorrectly configured for HT @ %uMHz\n",
665 dd->ipath_htspeed);
666 if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 ||
667 dd->ipath_boardrev == 6)
668 dd->ipath_flags |= IPATH_GPIO_INTR;
669 else
670 dd->ipath_flags |= IPATH_POLL_RX_INTR;
671 if (dd->ipath_boardrev == 8) { /* LS/X-1 */
672 u64 val;
673 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
674 if (val & INFINIPATH_EXTS_SERDESSEL) {
675 /*
676 * hardware disabled
677 *
678 * This means that the chip is hardware disabled,
679 * and will not be able to bring up the link,
680 * in any case. We special case this and abort
681 * early, to avoid later messages. We also set
682 * the DISABLED status bit
683 */
684 ipath_dbg("Unit %u is hardware-disabled\n",
685 dd->ipath_unit);
686 *dd->ipath_statusp |= IPATH_STATUS_DISABLED;
687 /* this value is handled differently */
688 ret = 2;
689 goto bail;
690 }
691 }
692 ret = 0;
693
694bail:
695 return ret;
696}
697
698static void ipath_check_htlink(struct ipath_devdata *dd)
699{
700 u8 linkerr, link_off, i;
701
702 for (i = 0; i < 2; i++) {
703 link_off = dd->ipath_ht_slave_off + i * 4 + 0xd;
704 if (pci_read_config_byte(dd->pcidev, link_off, &linkerr))
705 dev_info(&dd->pcidev->dev, "Couldn't read "
706 "linkerror%d of HT slave/primary block\n",
707 i);
708 else if (linkerr & 0xf0) {
709 ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
710 "clearing\n", linkerr >> 4, i);
711 /*
712 * writing the linkerr bits that are set should
713 * clear them
714 */
715 if (pci_write_config_byte(dd->pcidev, link_off,
716 linkerr))
717 ipath_dbg("Failed write to clear HT "
718 "linkerror%d\n", i);
719 if (pci_read_config_byte(dd->pcidev, link_off,
720 &linkerr))
721 dev_info(&dd->pcidev->dev,
722 "Couldn't reread linkerror%d of "
723 "HT slave/primary block\n", i);
724 else if (linkerr & 0xf0)
725 dev_info(&dd->pcidev->dev,
726 "HT linkerror%d bits 0x%x "
727 "couldn't be cleared\n",
728 i, linkerr >> 4);
729 }
730 }
731}
732
733static int ipath_setup_ht_reset(struct ipath_devdata *dd)
734{
735 ipath_dbg("No reset possible for HT-400\n");
736 return 0;
737}
738
739#define HT_CAPABILITY_ID 0x08 /* HT capabilities not defined in kernel */
740#define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */
741#define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */
742
743/*
744 * Bits 13-15 of command==0 is slave/primary block. Clear any HT CRC
745 * errors. We only bother to do this at load time, because it's OK if
746 * it happened before we were loaded (first time after boot/reset),
747 * but any time after that, it's fatal anyway. Also need to not check
748 * for for upper byte errors if we are in 8 bit mode, so figure out
749 * our width. For now, at least, also complain if it's 8 bit.
750 */
751static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
752 int pos, u8 cap_type)
753{
754 u8 linkwidth = 0, linkerr, link_a_b_off, link_off;
755 u16 linkctrl = 0;
756 int i;
757
758 dd->ipath_ht_slave_off = pos;
759 /* command word, master_host bit */
760 /* master host || slave */
761 if ((cap_type >> 2) & 1)
762 link_a_b_off = 4;
763 else
764 link_a_b_off = 0;
765 ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n",
766 link_a_b_off ? 1 : 0,
767 link_a_b_off ? 'B' : 'A');
768
769 link_a_b_off += pos;
770
771 /*
772 * check both link control registers; clear both HT CRC sets if
773 * necessary.
774 */
775 for (i = 0; i < 2; i++) {
776 link_off = pos + i * 4 + 0x4;
777 if (pci_read_config_word(pdev, link_off, &linkctrl))
778 ipath_dev_err(dd, "Couldn't read HT link control%d "
779 "register\n", i);
780 else if (linkctrl & (0xf << 8)) {
781 ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error "
782 "bits %x\n", i, linkctrl & (0xf << 8));
783 /*
784 * now write them back to clear the error.
785 */
786 pci_write_config_byte(pdev, link_off,
787 linkctrl & (0xf << 8));
788 }
789 }
790
791 /*
792 * As with HT CRC bits, same for protocol errors that might occur
793 * during boot.
794 */
795 for (i = 0; i < 2; i++) {
796 link_off = pos + i * 4 + 0xd;
797 if (pci_read_config_byte(pdev, link_off, &linkerr))
798 dev_info(&pdev->dev, "Couldn't read linkerror%d "
799 "of HT slave/primary block\n", i);
800 else if (linkerr & 0xf0) {
801 ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
802 "clearing\n", linkerr >> 4, i);
803 /*
804 * writing the linkerr bits that are set will clear
805 * them
806 */
807 if (pci_write_config_byte
808 (pdev, link_off, linkerr))
809 ipath_dbg("Failed write to clear HT "
810 "linkerror%d\n", i);
811 if (pci_read_config_byte(pdev, link_off, &linkerr))
812 dev_info(&pdev->dev, "Couldn't reread "
813 "linkerror%d of HT slave/primary "
814 "block\n", i);
815 else if (linkerr & 0xf0)
816 dev_info(&pdev->dev, "HT linkerror%d bits "
817 "0x%x couldn't be cleared\n",
818 i, linkerr >> 4);
819 }
820 }
821
822 /*
823 * this is just for our link to the host, not devices connected
824 * through tunnel.
825 */
826
827 if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth))
828 ipath_dev_err(dd, "Couldn't read HT link width "
829 "config register\n");
830 else {
831 u32 width;
832 switch (linkwidth & 7) {
833 case 5:
834 width = 4;
835 break;
836 case 4:
837 width = 2;
838 break;
839 case 3:
840 width = 32;
841 break;
842 case 1:
843 width = 16;
844 break;
845 case 0:
846 default: /* if wrong, assume 8 bit */
847 width = 8;
848 break;
849 }
850
851 dd->ipath_htwidth = width;
852
853 if (linkwidth != 0x11) {
854 ipath_dev_err(dd, "Not configured for 16 bit HT "
855 "(%x)\n", linkwidth);
856 if (!(linkwidth & 0xf)) {
857 ipath_dbg("Will ignore HT lane1 errors\n");
858 dd->ipath_flags |= IPATH_8BIT_IN_HT0;
859 }
860 }
861 }
862
863 /*
864 * this is just for our link to the host, not devices connected
865 * through tunnel.
866 */
867 if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth))
868 ipath_dev_err(dd, "Couldn't read HT link frequency "
869 "config register\n");
870 else {
871 u32 speed;
872 switch (linkwidth & 0xf) {
873 case 6:
874 speed = 1000;
875 break;
876 case 5:
877 speed = 800;
878 break;
879 case 4:
880 speed = 600;
881 break;
882 case 3:
883 speed = 500;
884 break;
885 case 2:
886 speed = 400;
887 break;
888 case 1:
889 speed = 300;
890 break;
891 default:
892 /*
893 * assume reserved and vendor-specific are 200...
894 */
895 case 0:
896 speed = 200;
897 break;
898 }
899 dd->ipath_htspeed = speed;
900 }
901}
902
903static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,
904 int pos)
905{
906 u32 int_handler_addr_lower;
907 u32 int_handler_addr_upper;
908 u64 ihandler;
909 u32 intvec;
910
911 /* use indirection register to get the intr handler */
912 pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x10);
913 pci_read_config_dword(pdev, pos + 4, &int_handler_addr_lower);
914 pci_write_config_byte(pdev, pos + HT_INTR_REG_INDEX, 0x11);
915 pci_read_config_dword(pdev, pos + 4, &int_handler_addr_upper);
916
917 ihandler = (u64) int_handler_addr_lower |
918 ((u64) int_handler_addr_upper << 32);
919
920 /*
921 * kernels with CONFIG_PCI_MSI set the vector in the irq field of
922 * struct pci_device, so we use that to program the HT-400 internal
923 * interrupt register (not config space) with that value. The BIOS
924 * must still have done the basic MSI setup.
925 */
926 intvec = pdev->irq;
927 /*
928 * clear any vector bits there; normally not set but we'll overload
929 * this for some debug purposes (setting the HTC debug register
930 * value from software, rather than GPIOs), so it might be set on a
931 * driver reload.
932 */
933 ihandler &= ~0xff0000;
934 /* x86 vector goes in intrinfo[23:16] */
935 ihandler |= intvec << 16;
936 ipath_cdbg(VERBOSE, "ihandler lower %x, upper %x, intvec %x, "
937 "interruptconfig %llx\n", int_handler_addr_lower,
938 int_handler_addr_upper, intvec,
939 (unsigned long long) ihandler);
940
941 /* can't program yet, so save for interrupt setup */
942 dd->ipath_intconfig = ihandler;
943 /* keep going, so we find link control stuff also */
944
945 return ihandler != 0;
946}
947
948/**
949 * ipath_setup_ht_config - setup the interruptconfig register
950 * @dd: the infinipath device
951 * @pdev: the PCI device
952 *
953 * setup the interruptconfig register from the HT config info.
954 * Also clear CRC errors in HT linkcontrol, if necessary.
955 * This is done only for the real hardware. It is done before
956 * chip address space is initted, so can't touch infinipath registers
957 */
958static int ipath_setup_ht_config(struct ipath_devdata *dd,
959 struct pci_dev *pdev)
960{
961 int pos, ret = 0;
962 int ihandler = 0;
963
964 /*
965 * Read the capability info to find the interrupt info, and also
966 * handle clearing CRC errors in linkctrl register if necessary. We
967 * do this early, before we ever enable errors or hardware errors,
968 * mostly to avoid causing the chip to enter freeze mode.
969 */
970 pos = pci_find_capability(pdev, HT_CAPABILITY_ID);
971 if (!pos) {
972 ipath_dev_err(dd, "Couldn't find HyperTransport "
973 "capability; no interrupts\n");
974 ret = -ENODEV;
975 goto bail;
976 }
977 do {
978 u8 cap_type;
979
980 /* the HT capability type byte is 3 bytes after the
981 * capability byte.
982 */
983 if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
984 dev_info(&pdev->dev, "Couldn't read config "
985 "command @ %d\n", pos);
986 continue;
987 }
988 if (!(cap_type & 0xE0))
989 slave_or_pri_blk(dd, pdev, pos, cap_type);
990 else if (cap_type == HT_INTR_DISC_CONFIG)
991 ihandler = set_int_handler(dd, pdev, pos);
992 } while ((pos = pci_find_next_capability(pdev, pos,
993 HT_CAPABILITY_ID)));
994
995 if (!ihandler) {
996 ipath_dev_err(dd, "Couldn't find interrupt handler in "
997 "config space\n");
998 ret = -ENODEV;
999 }
1000
1001bail:
1002 return ret;
1003}
1004
1005/**
1006 * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff
1007 * @dd: the infinipath device
1008 *
1009 * Called during driver unload.
1010 * This is currently a nop for the HT-400, not for all chips
1011 */
1012static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
1013{
1014}
1015
1016/**
1017 * ipath_setup_ht_setextled - set the state of the two external LEDs
1018 * @dd: the infinipath device
1019 * @lst: the L state
1020 * @ltst: the LT state
1021 *
1022 * Set the state of the two external LEDs, to indicate physical and
1023 * logical state of IB link. For this chip (at least with recommended
1024 * board pinouts), LED1 is Green (physical state), and LED2 is Yellow
1025 * (logical state)
1026 *
1027 * Note: We try to match the Mellanox HCA LED behavior as best
1028 * we can. Green indicates physical link state is OK (something is
1029 * plugged in, and we can train).
1030 * Amber indicates the link is logically up (ACTIVE).
1031 * Mellanox further blinks the amber LED to indicate data packet
1032 * activity, but we have no hardware support for that, so it would
1033 * require waking up every 10-20 msecs and checking the counters
1034 * on the chip, and then turning the LED off if appropriate. That's
1035 * visible overhead, so not something we will do.
1036 *
1037 */
1038static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
1039 u64 lst, u64 ltst)
1040{
1041 u64 extctl;
1042
1043 /* the diags use the LED to indicate diag info, so we leave
1044 * the external LED alone when the diags are running */
1045 if (ipath_diag_inuse)
1046 return;
1047
1048 /*
1049 * start by setting both LED control bits to off, then turn
1050 * on the appropriate bit(s).
1051 */
1052 if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */
1053 /*
1054 * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF
1055 * is inverted, because it is normally used to indicate
1056 * a hardware fault at reset, if there were errors
1057 */
1058 extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON)
1059 | INFINIPATH_EXTC_LEDGBLERR_OFF;
1060 if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
1061 extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF;
1062 if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
1063 extctl |= INFINIPATH_EXTC_LEDGBLOK_ON;
1064 }
1065 else {
1066 extctl = dd->ipath_extctrl &
1067 ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
1068 INFINIPATH_EXTC_LED2PRIPORT_ON);
1069 if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
1070 extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
1071 if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
1072 extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
1073 }
1074 dd->ipath_extctrl = extctl;
1075 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
1076}
1077
1078static void ipath_init_ht_variables(void)
1079{
1080 ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
1081 ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
1082 ipath_gpio_sda = IPATH_GPIO_SDA;
1083 ipath_gpio_scl = IPATH_GPIO_SCL;
1084
1085 infinipath_i_bitsextant =
1086 (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
1087 (INFINIPATH_I_RCVAVAIL_MASK <<
1088 INFINIPATH_I_RCVAVAIL_SHIFT) |
1089 INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
1090 INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
1091
1092 infinipath_e_bitsextant =
1093 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
1094 INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
1095 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
1096 INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
1097 INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
1098 INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
1099 INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
1100 INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
1101 INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
1102 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
1103 INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
1104 INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
1105 INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
1106 INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
1107 INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
1108 INFINIPATH_E_HARDWARE;
1109
1110 infinipath_hwe_bitsextant =
1111 (INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
1112 INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
1113 (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
1114 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
1115 (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
1116 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
1117 INFINIPATH_HWE_HTCLNKABYTE0CRCERR |
1118 INFINIPATH_HWE_HTCLNKABYTE1CRCERR |
1119 INFINIPATH_HWE_HTCLNKBBYTE0CRCERR |
1120 INFINIPATH_HWE_HTCLNKBBYTE1CRCERR |
1121 INFINIPATH_HWE_HTCMISCERR4 |
1122 INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 |
1123 INFINIPATH_HWE_HTCMISCERR7 |
1124 INFINIPATH_HWE_HTCBUSTREQPARITYERR |
1125 INFINIPATH_HWE_HTCBUSTRESPPARITYERR |
1126 INFINIPATH_HWE_HTCBUSIREQPARITYERR |
1127 INFINIPATH_HWE_RXDSYNCMEMPARITYERR |
1128 INFINIPATH_HWE_MEMBISTFAILED |
1129 INFINIPATH_HWE_COREPLL_FBSLIP |
1130 INFINIPATH_HWE_COREPLL_RFSLIP |
1131 INFINIPATH_HWE_HTBPLL_FBSLIP |
1132 INFINIPATH_HWE_HTBPLL_RFSLIP |
1133 INFINIPATH_HWE_HTAPLL_FBSLIP |
1134 INFINIPATH_HWE_HTAPLL_RFSLIP |
1135 INFINIPATH_HWE_SERDESPLLFAILED |
1136 INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
1137 INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
1138
1139 infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
1140 infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
1141}
1142
1143/**
1144 * ipath_ht_init_hwerrors - enable hardware errors
1145 * @dd: the infinipath device
1146 *
1147 * now that we have finished initializing everything that might reasonably
1148 * cause a hardware error, and cleared those errors bits as they occur,
1149 * we can enable hardware errors in the mask (potentially enabling
1150 * freeze mode), and enable hardware errors as errors (along with
1151 * everything else) in errormask
1152 */
1153static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
1154{
1155 ipath_err_t val;
1156 u64 extsval;
1157
1158 extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
1159
1160 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
1161 ipath_dev_err(dd, "MemBIST did not complete!\n");
1162
1163 ipath_check_htlink(dd);
1164
1165 /* barring bugs, all hwerrors become interrupts, which can */
1166 val = -1LL;
1167 /* don't look at crc lane1 if 8 bit */
1168 if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
1169 val &= ~infinipath_hwe_htclnkabyte1crcerr;
1170 /* don't look at crc lane1 if 8 bit */
1171 if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
1172 val &= ~infinipath_hwe_htclnkbbyte1crcerr;
1173
1174 /*
1175 * disable RXDSYNCMEMPARITY because external serdes is unused,
1176 * and therefore the logic will never be used or initialized,
1177 * and uninitialized state will normally result in this error
1178 * being asserted. Similarly for the external serdess pll
1179 * lock signal.
1180 */
1181 val &= ~(INFINIPATH_HWE_SERDESPLLFAILED |
1182 INFINIPATH_HWE_RXDSYNCMEMPARITYERR);
1183
1184 /*
1185 * Disable MISCERR4 because of an inversion in the HT core
1186 * logic checking for errors that cause this bit to be set.
1187 * The errata can also cause the protocol error bit to be set
1188 * in the HT config space linkerror register(s).
1189 */
1190 val &= ~INFINIPATH_HWE_HTCMISCERR4;
1191
1192 /*
1193 * PLL ignored because MDIO interface has a logic problem
1194 * for reads, on Comstock and Ponderosa. BRINGUP
1195 */
1196 if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
1197 val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
1198 dd->ipath_hwerrmask = val;
1199}
1200
1201/**
1202 * ipath_ht_bringup_serdes - bring up the serdes
1203 * @dd: the infinipath device
1204 */
1205static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
1206{
1207 u64 val, config1;
1208 int ret = 0, change = 0;
1209
1210 ipath_dbg("Trying to bringup serdes\n");
1211
1212 if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
1213 INFINIPATH_HWE_SERDESPLLFAILED)
1214 {
1215 ipath_dbg("At start, serdes PLL failed bit set in "
1216 "hwerrstatus, clearing and continuing\n");
1217 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
1218 INFINIPATH_HWE_SERDESPLLFAILED);
1219 }
1220
1221 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
1222 config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
1223
1224 ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx "
1225 "config1=%llx, sstatus=%llx xgxs %llx\n",
1226 (unsigned long long) val, (unsigned long long) config1,
1227 (unsigned long long)
1228 ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
1229 (unsigned long long)
1230 ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
1231
1232 /* force reset on */
1233 val |= INFINIPATH_SERDC0_RESET_PLL
1234 /* | INFINIPATH_SERDC0_RESET_MASK */
1235 ;
1236 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
1237 udelay(15); /* need pll reset set at least for a bit */
1238
1239 if (val & INFINIPATH_SERDC0_RESET_PLL) {
1240 u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL;
1241 /* set lane resets, and tx idle, during pll reset */
1242 val2 |= INFINIPATH_SERDC0_RESET_MASK |
1243 INFINIPATH_SERDC0_TXIDLE;
1244 ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing "
1245 "%llx)\n", (unsigned long long) val2);
1246 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
1247 val2);
1248 /*
1249 * be sure chip saw it
1250 */
1251 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1252 /*
1253 * need pll reset clear at least 11 usec before lane
1254 * resets cleared; give it a few more
1255 */
1256 udelay(15);
1257 val = val2; /* for check below */
1258 }
1259
1260 if (val & (INFINIPATH_SERDC0_RESET_PLL |
1261 INFINIPATH_SERDC0_RESET_MASK |
1262 INFINIPATH_SERDC0_TXIDLE)) {
1263 val &= ~(INFINIPATH_SERDC0_RESET_PLL |
1264 INFINIPATH_SERDC0_RESET_MASK |
1265 INFINIPATH_SERDC0_TXIDLE);
1266 /* clear them */
1267 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
1268 val);
1269 }
1270
1271 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
1272 if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
1273 INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
1274 val &= ~(INFINIPATH_XGXS_MDIOADDR_MASK <<
1275 INFINIPATH_XGXS_MDIOADDR_SHIFT);
1276 /*
1277 * we use address 3
1278 */
1279 val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
1280 change = 1;
1281 }
1282 if (val & INFINIPATH_XGXS_RESET) {
1283 /* normally true after boot */
1284 val &= ~INFINIPATH_XGXS_RESET;
1285 change = 1;
1286 }
1287 if (change)
1288 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
1289
1290 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
1291
1292 /* clear current and de-emphasis bits */
1293 config1 &= ~0x0ffffffff00ULL;
1294 /* set current to 20ma */
1295 config1 |= 0x00000000000ULL;
1296 /* set de-emphasis to -5.68dB */
1297 config1 |= 0x0cccc000000ULL;
1298 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
1299
1300 ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx "
1301 "config1=%llx, sstatus=%llx xgxs %llx\n",
1302 (unsigned long long) val, (unsigned long long) config1,
1303 (unsigned long long)
1304 ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
1305 (unsigned long long)
1306 ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
1307
1308 if (!ipath_waitfor_mdio_cmdready(dd)) {
1309 ipath_write_kreg(dd, dd->ipath_kregs->kr_mdio,
1310 ipath_mdio_req(IPATH_MDIO_CMD_READ, 31,
1311 IPATH_MDIO_CTRL_XGXS_REG_8,
1312 0));
1313 if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio,
1314 IPATH_MDIO_DATAVALID, &val))
1315 ipath_dbg("Never got MDIO data for XGXS status "
1316 "read\n");
1317 else
1318 ipath_cdbg(VERBOSE, "MDIO Read reg8, "
1319 "'bank' 31 %x\n", (u32) val);
1320 } else
1321 ipath_dbg("Never got MDIO cmdready for XGXS status read\n");
1322
1323 return ret; /* for now, say we always succeeded */
1324}
1325
1326/**
1327 * ipath_ht_quiet_serdes - set serdes to txidle
1328 * @dd: the infinipath device
1329 * driver is being unloaded
1330 */
1331static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
1332{
1333 u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
1334
1335 val |= INFINIPATH_SERDC0_TXIDLE;
1336 ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
1337 (unsigned long long) val);
1338 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
1339}
1340
1341static int ipath_ht_intconfig(struct ipath_devdata *dd)
1342{
1343 int ret;
1344
1345 if (!dd->ipath_intconfig) {
1346 ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
1347 "interrupt address\n");
1348 ret = 1;
1349 goto bail;
1350 }
1351
1352 ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
1353 dd->ipath_intconfig); /* interrupt address */
1354 ret = 0;
1355
1356bail:
1357 return ret;
1358}
1359
1360/**
1361 * ipath_pe_put_tid - write a TID in chip
1362 * @dd: the infinipath device
1363 * @tidptr: pointer to the expected TID (in chip) to udpate
1364 * @tidtype: 0 for eager, 1 for expected
1365 * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
1366 *
1367 * This exists as a separate routine to allow for special locking etc.
1368 * It's used for both the full cleanup on exit, as well as the normal
1369 * setup and teardown.
1370 */
1371static void ipath_ht_put_tid(struct ipath_devdata *dd,
1372 u64 __iomem *tidptr, u32 type,
1373 unsigned long pa)
1374{
1375 if (pa != dd->ipath_tidinvalid) {
1376 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
1377 dev_info(&dd->pcidev->dev,
1378 "physaddr %lx has more than "
1379 "40 bits, using only 40!!!\n", pa);
1380 pa &= INFINIPATH_RT_ADDR_MASK;
1381 }
1382 if (type == 0)
1383 pa |= dd->ipath_tidtemplate;
1384 else {
1385 /* in words (fixed, full page). */
1386 u64 lenvalid = PAGE_SIZE >> 2;
1387 lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
1388 pa |= lenvalid | INFINIPATH_RT_VALID;
1389 }
1390 }
1391 if (dd->ipath_kregbase)
1392 writeq(pa, tidptr);
1393}
1394
1395/**
1396 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
1397 * @dd: the infinipath device
1398 * @port: the port
1399 *
1400 * Used from ipath_close(), and at chip initialization.
1401 */
1402static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
1403{
1404 u64 __iomem *tidbase;
1405 int i;
1406
1407 if (!dd->ipath_kregbase)
1408 return;
1409
1410 ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
1411
1412 /*
1413 * need to invalidate all of the expected TID entries for this
1414 * port, so we don't have valid entries that might somehow get
1415 * used (early in next use of this port, or through some bug)
1416 */
1417 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
1418 dd->ipath_rcvtidbase +
1419 port * dd->ipath_rcvtidcnt *
1420 sizeof(*tidbase));
1421 for (i = 0; i < dd->ipath_rcvtidcnt; i++)
1422 ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid);
1423
1424 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
1425 dd->ipath_rcvegrbase +
1426 port * dd->ipath_rcvegrcnt *
1427 sizeof(*tidbase));
1428
1429 for (i = 0; i < dd->ipath_rcvegrcnt; i++)
1430 ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid);
1431}
1432
1433/**
1434 * ipath_ht_tidtemplate - setup constants for TID updates
1435 * @dd: the infinipath device
1436 *
1437 * We setup stuff that we use a lot, to avoid calculating each time
1438 */
1439static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
1440{
1441 dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2;
1442 dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT;
1443 dd->ipath_tidtemplate |= INFINIPATH_RT_VALID;
1444
1445 /*
1446 * work around chip errata bug 7358, by marking invalid tids
1447 * as having max length
1448 */
1449 dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) <<
1450 INFINIPATH_RT_BUFSIZE_SHIFT;
1451}
1452
1453static int ipath_ht_early_init(struct ipath_devdata *dd)
1454{
1455 u32 __iomem *piobuf;
1456 u32 pioincr, val32, egrsize;
1457 int i;
1458
1459 /*
1460 * one cache line; long IB headers will spill over into received
1461 * buffer
1462 */
1463 dd->ipath_rcvhdrentsize = 16;
1464 dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
1465
1466 /*
1467 * For HT-400, we allocate a somewhat overly large eager buffer,
1468 * such that we can guarantee that we can receive the largest
1469 * packet that we can send out. To truly support a 4KB MTU,
1470 * we need to bump this to a large value. To date, other than
1471 * testing, we have never encountered an HCA that can really
1472 * send 4KB MTU packets, so we do not handle that (we'll get
1473 * errors interrupts if we ever see one).
1474 */
1475 dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
1476 egrsize = dd->ipath_rcvegrbufsize;
1477
1478 /*
1479 * the min() check here is currently a nop, but it may not
1480 * always be, depending on just how we do ipath_rcvegrbufsize
1481 */
1482 dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
1483 dd->ipath_rcvegrbufsize);
1484 dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
1485 ipath_ht_tidtemplate(dd);
1486
1487 /*
1488 * zero all the TID entries at startup. We do this for sanity,
1489 * in case of a previous driver crash of some kind, and also
1490 * because the chip powers up with these memories in an unknown
1491 * state. Use portcnt, not cfgports, since this is for the
1492 * full chip, not for current (possibly different) configuration
1493 * value.
1494 * Chip Errata bug 6447
1495 */
1496 for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
1497 ipath_ht_clear_tids(dd, val32);
1498
1499 /*
1500 * write the pbc of each buffer, to be sure it's initialized, then
1501 * cancel all the buffers, and also abort any packets that might
1502 * have been in flight for some reason (the latter is for driver
1503 * unload/reload, but isn't a bad idea at first init). PIO send
1504 * isn't enabled at this point, so there is no danger of sending
1505 * these out on the wire.
1506 * Chip Errata bug 6610
1507 */
1508 piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) +
1509 dd->ipath_piobufbase);
1510 pioincr = dd->ipath_palign / sizeof(*piobuf);
1511 for (i = 0; i < dd->ipath_piobcnt2k; i++) {
1512 /*
1513 * reasonable word count, just to init pbc
1514 */
1515 writel(16, piobuf);
1516 piobuf += pioincr;
1517 }
1518 /*
1519 * self-clearing
1520 */
1521 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1522 INFINIPATH_S_ABORT);
1523 return 0;
1524}
1525
1526/**
1527 * ipath_init_ht_get_base_info - set chip-specific flags for user code
1528 * @dd: the infinipath device
1529 * @kbase: ipath_base_info pointer
1530 *
1531 * We set the PCIE flag because the lower bandwidth on PCIe vs
1532 * HyperTransport can affect some user packet algorithims.
1533 */
1534static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
1535{
1536 struct ipath_base_info *kinfo = kbase;
1537
1538 kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
1539 IPATH_RUNTIME_RCVHDR_COPY;
1540
1541 return 0;
1542}
1543
1544/**
1545 * ipath_init_ht400_funcs - set up the chip-specific function pointers
1546 * @dd: the infinipath device
1547 *
1548 * This is global, and is called directly at init to set up the
1549 * chip-specific function pointers for later use.
1550 */
1551void ipath_init_ht400_funcs(struct ipath_devdata *dd)
1552{
1553 dd->ipath_f_intrsetup = ipath_ht_intconfig;
1554 dd->ipath_f_bus = ipath_setup_ht_config;
1555 dd->ipath_f_reset = ipath_setup_ht_reset;
1556 dd->ipath_f_get_boardname = ipath_ht_boardname;
1557 dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
1558 dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
1559 dd->ipath_f_early_init = ipath_ht_early_init;
1560 dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
1561 dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
1562 dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes;
1563 dd->ipath_f_clear_tids = ipath_ht_clear_tids;
1564 dd->ipath_f_put_tid = ipath_ht_put_tid;
1565 dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
1566 dd->ipath_f_setextled = ipath_setup_ht_setextled;
1567 dd->ipath_f_get_base_info = ipath_ht_get_base_info;
1568
1569 /*
1570 * initialize chip-specific variables
1571 */
1572 dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
1573
1574 /*
1575 * setup the register offsets, since they are different for each
1576 * chip
1577 */
1578 dd->ipath_kregs = &ipath_ht_kregs;
1579 dd->ipath_cregs = &ipath_ht_cregs;
1580
1581 /*
1582 * do very early init that is needed before ipath_f_bus is
1583 * called
1584 */
1585 ipath_init_ht_variables();
1586}
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
new file mode 100644
index 000000000000..2823ff9c0c62
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -0,0 +1,951 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/pci.h>
34#include <linux/netdevice.h>
35#include <linux/vmalloc.h>
36
37#include "ipath_kernel.h"
38#include "ips_common.h"
39
40/*
41 * min buffers we want to have per port, after driver
42 */
43#define IPATH_MIN_USER_PORT_BUFCNT 8
44
45/*
46 * Number of ports we are configured to use (to allow for more pio
47 * buffers per port, etc.) Zero means use chip value.
48 */
49static ushort ipath_cfgports;
50
51module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
52MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
53
54/*
55 * Number of buffers reserved for driver (layered drivers and SMA
56 * send). Reserved at end of buffer list.
57 */
58static ushort ipath_kpiobufs = 32;
59
60static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
61
62module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_uint,
63 &ipath_kpiobufs, S_IWUSR | S_IRUGO);
64MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
65
66/**
67 * create_port0_egr - allocate the eager TID buffers
68 * @dd: the infinipath device
69 *
70 * This code is now quite different for user and kernel, because
71 * the kernel uses skb's, for the accelerated network performance.
72 * This is the kernel (port0) version.
73 *
74 * Allocate the eager TID buffers and program them into infinipath.
75 * We use the network layer alloc_skb() allocator to allocate the
76 * memory, and either use the buffers as is for things like SMA
77 * packets, or pass the buffers up to the ipath layered driver and
78 * thence the network layer, replacing them as we do so (see
79 * ipath_rcv_layer()).
80 */
81static int create_port0_egr(struct ipath_devdata *dd)
82{
83 unsigned e, egrcnt;
84 struct sk_buff **skbs;
85 int ret;
86
87 egrcnt = dd->ipath_rcvegrcnt;
88
89 skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt);
90 if (skbs == NULL) {
91 ipath_dev_err(dd, "allocation error for eager TID "
92 "skb array\n");
93 ret = -ENOMEM;
94 goto bail;
95 }
96 for (e = 0; e < egrcnt; e++) {
97 /*
98 * This is a bit tricky in that we allocate extra
99 * space for 2 bytes of the 14 byte ethernet header.
100 * These two bytes are passed in the ipath header so
101 * the rest of the data is word aligned. We allocate
102 * 4 bytes so that the data buffer stays word aligned.
103 * See ipath_kreceive() for more details.
104 */
105 skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL);
106 if (!skbs[e]) {
107 ipath_dev_err(dd, "SKB allocation error for "
108 "eager TID %u\n", e);
109 while (e != 0)
110 dev_kfree_skb(skbs[--e]);
111 ret = -ENOMEM;
112 goto bail;
113 }
114 }
115 /*
116 * After loop above, so we can test non-NULL to see if ready
117 * to use at receive, etc.
118 */
119 dd->ipath_port0_skbs = skbs;
120
121 for (e = 0; e < egrcnt; e++) {
122 unsigned long phys =
123 virt_to_phys(dd->ipath_port0_skbs[e]->data);
124 dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
125 ((char __iomem *) dd->ipath_kregbase +
126 dd->ipath_rcvegrbase), 0, phys);
127 }
128
129 ret = 0;
130
131bail:
132 return ret;
133}
134
135static int bringup_link(struct ipath_devdata *dd)
136{
137 u64 val, ibc;
138 int ret = 0;
139
140 /* hold IBC in reset */
141 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
142 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
143 dd->ipath_control);
144
145 /*
146 * Note that prior to try 14 or 15 of IB, the credit scaling
147 * wasn't working, because it was swapped for writes with the
148 * 1 bit default linkstate field
149 */
150
151 /* ignore pbc and align word */
152 val = dd->ipath_piosize2k - 2 * sizeof(u32);
153 /*
154 * for ICRC, which we only send in diag test pkt mode, and we
155 * don't need to worry about that for mtu
156 */
157 val += 1;
158 /*
159 * Set the IBC maxpktlength to the size of our pio buffers the
160 * maxpktlength is in words. This is *not* the IB data MTU.
161 */
162 ibc = (val / sizeof(u32)) << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
163 /* in KB */
164 ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
165 /*
166 * How often flowctrl sent. More or less in usecs; balance against
167 * watermark value, so that in theory senders always get a flow
168 * control update in time to not let the IB link go idle.
169 */
170 ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT;
171 /* max error tolerance */
172 ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
173 /* use "real" buffer space for */
174 ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT;
175 /* IB credit flow control. */
176 ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
177 /* initially come up waiting for TS1, without sending anything. */
178 dd->ipath_ibcctrl = ibc;
179 /*
180 * Want to start out with both LINKCMD and LINKINITCMD in NOP
181 * (0 and 0). Don't put linkinitcmd in ipath_ibcctrl, want that
182 * to stay a NOP
183 */
184 ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
185 INFINIPATH_IBCC_LINKINITCMD_SHIFT;
186 ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
187 (unsigned long long) ibc);
188 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
189
190 // be sure chip saw it
191 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
192
193 ret = dd->ipath_f_bringup_serdes(dd);
194
195 if (ret)
196 dev_info(&dd->pcidev->dev, "Could not initialize SerDes, "
197 "not usable\n");
198 else {
199 /* enable IBC */
200 dd->ipath_control |= INFINIPATH_C_LINKENABLE;
201 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
202 dd->ipath_control);
203 }
204
205 return ret;
206}
207
208static int init_chip_first(struct ipath_devdata *dd,
209 struct ipath_portdata **pdp)
210{
211 struct ipath_portdata *pd = NULL;
212 int ret = 0;
213 u64 val;
214
215 /*
216 * skip cfgports stuff because we are not allocating memory,
217 * and we don't want problems if the portcnt changed due to
218 * cfgports. We do still check and report a difference, if
219 * not same (should be impossible).
220 */
221 dd->ipath_portcnt =
222 ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
223 if (!ipath_cfgports)
224 dd->ipath_cfgports = dd->ipath_portcnt;
225 else if (ipath_cfgports <= dd->ipath_portcnt) {
226 dd->ipath_cfgports = ipath_cfgports;
227 ipath_dbg("Configured to use %u ports out of %u in chip\n",
228 dd->ipath_cfgports, dd->ipath_portcnt);
229 } else {
230 dd->ipath_cfgports = dd->ipath_portcnt;
231 ipath_dbg("Tried to configured to use %u ports; chip "
232 "only supports %u\n", ipath_cfgports,
233 dd->ipath_portcnt);
234 }
235 dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports,
236 GFP_KERNEL);
237
238 if (!dd->ipath_pd) {
239 ipath_dev_err(dd, "Unable to allocate portdata array, "
240 "failing\n");
241 ret = -ENOMEM;
242 goto done;
243 }
244
245 dd->ipath_lastegrheads = kzalloc(sizeof(*dd->ipath_lastegrheads)
246 * dd->ipath_cfgports,
247 GFP_KERNEL);
248 dd->ipath_lastrcvhdrqtails =
249 kzalloc(sizeof(*dd->ipath_lastrcvhdrqtails)
250 * dd->ipath_cfgports, GFP_KERNEL);
251
252 if (!dd->ipath_lastegrheads || !dd->ipath_lastrcvhdrqtails) {
253 ipath_dev_err(dd, "Unable to allocate head arrays, "
254 "failing\n");
255 ret = -ENOMEM;
256 goto done;
257 }
258
259 dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL);
260
261 if (!dd->ipath_pd[0]) {
262 ipath_dev_err(dd, "Unable to allocate portdata for port "
263 "0, failing\n");
264 ret = -ENOMEM;
265 goto done;
266 }
267 pd = dd->ipath_pd[0];
268 pd->port_dd = dd;
269 pd->port_port = 0;
270 pd->port_cnt = 1;
271 /* The port 0 pkey table is used by the layer interface. */
272 pd->port_pkeys[0] = IPS_DEFAULT_P_KEY;
273 dd->ipath_rcvtidcnt =
274 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
275 dd->ipath_rcvtidbase =
276 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
277 dd->ipath_rcvegrcnt =
278 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
279 dd->ipath_rcvegrbase =
280 ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
281 dd->ipath_palign =
282 ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
283 dd->ipath_piobufbase =
284 ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase);
285 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
286 dd->ipath_piosize2k = val & ~0U;
287 dd->ipath_piosize4k = val >> 32;
288 dd->ipath_ibmtu = 4096; /* default to largest legal MTU */
289 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
290 dd->ipath_piobcnt2k = val & ~0U;
291 dd->ipath_piobcnt4k = val >> 32;
292 dd->ipath_pio2kbase =
293 (u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
294 (dd->ipath_piobufbase & 0xffffffff));
295 if (dd->ipath_piobcnt4k) {
296 dd->ipath_pio4kbase = (u32 __iomem *)
297 (((char __iomem *) dd->ipath_kregbase) +
298 (dd->ipath_piobufbase >> 32));
299 /*
300 * 4K buffers take 2 pages; we use roundup just to be
301 * paranoid; we calculate it once here, rather than on
302 * ever buf allocate
303 */
304 dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k,
305 dd->ipath_palign);
306 ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p "
307 "(%x aligned)\n",
308 dd->ipath_piobcnt2k, dd->ipath_piosize2k,
309 dd->ipath_pio2kbase, dd->ipath_piobcnt4k,
310 dd->ipath_piosize4k, dd->ipath_pio4kbase,
311 dd->ipath_4kalign);
312 }
313 else ipath_dbg("%u 2k piobufs @ %p\n",
314 dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
315
316 spin_lock_init(&dd->ipath_tid_lock);
317
318done:
319 *pdp = pd;
320 return ret;
321}
322
323/**
324 * init_chip_reset - re-initialize after a reset, or enable
325 * @dd: the infinipath device
326 * @pdp: output for port data
327 *
328 * sanity check at least some of the values after reset, and
329 * ensure no receive or transmit (explictly, in case reset
330 * failed
331 */
332static int init_chip_reset(struct ipath_devdata *dd,
333 struct ipath_portdata **pdp)
334{
335 struct ipath_portdata *pd;
336 u32 rtmp;
337
338 *pdp = pd = dd->ipath_pd[0];
339 /* ensure chip does no sends or receives while we re-initialize */
340 dd->ipath_control = dd->ipath_sendctrl = dd->ipath_rcvctrl = 0U;
341 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 0);
342 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0);
343 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0);
344
345 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
346 if (dd->ipath_portcnt != rtmp)
347 dev_info(&dd->pcidev->dev, "portcnt was %u before "
348 "reset, now %u, using original\n",
349 dd->ipath_portcnt, rtmp);
350 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
351 if (rtmp != dd->ipath_rcvtidcnt)
352 dev_info(&dd->pcidev->dev, "tidcnt was %u before "
353 "reset, now %u, using original\n",
354 dd->ipath_rcvtidcnt, rtmp);
355 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
356 if (rtmp != dd->ipath_rcvtidbase)
357 dev_info(&dd->pcidev->dev, "tidbase was %u before "
358 "reset, now %u, using original\n",
359 dd->ipath_rcvtidbase, rtmp);
360 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
361 if (rtmp != dd->ipath_rcvegrcnt)
362 dev_info(&dd->pcidev->dev, "egrcnt was %u before "
363 "reset, now %u, using original\n",
364 dd->ipath_rcvegrcnt, rtmp);
365 rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
366 if (rtmp != dd->ipath_rcvegrbase)
367 dev_info(&dd->pcidev->dev, "egrbase was %u before "
368 "reset, now %u, using original\n",
369 dd->ipath_rcvegrbase, rtmp);
370
371 return 0;
372}
373
374static int init_pioavailregs(struct ipath_devdata *dd)
375{
376 int ret;
377
378 dd->ipath_pioavailregs_dma = dma_alloc_coherent(
379 &dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys,
380 GFP_KERNEL);
381 if (!dd->ipath_pioavailregs_dma) {
382 ipath_dev_err(dd, "failed to allocate PIOavail reg area "
383 "in memory\n");
384 ret = -ENOMEM;
385 goto done;
386 }
387
388 /*
389 * we really want L2 cache aligned, but for current CPUs of
390 * interest, they are the same.
391 */
392 dd->ipath_statusp = (u64 *)
393 ((char *)dd->ipath_pioavailregs_dma +
394 ((2 * L1_CACHE_BYTES +
395 dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES));
396 /* copy the current value now that it's really allocated */
397 *dd->ipath_statusp = dd->_ipath_status;
398 /*
399 * setup buffer to hold freeze msg, accessible to apps,
400 * following statusp
401 */
402 dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1];
403 /* and its length */
404 dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
405
406 if (dd->ipath_unit * 64 > (IPATH_PORT0_RCVHDRTAIL_SIZE - 64)) {
407 ipath_dev_err(dd, "unit %u too large for port 0 "
408 "rcvhdrtail buffer size\n", dd->ipath_unit);
409 ret = -ENODEV;
410 }
411 else
412 ret = 0;
413
414 /* so we can get current tail in ipath_kreceive(), per chip */
415 dd->ipath_hdrqtailptr = &ipath_port0_rcvhdrtail[
416 dd->ipath_unit * (64 / sizeof(*ipath_port0_rcvhdrtail))];
417done:
418 return ret;
419}
420
421/**
422 * init_shadow_tids - allocate the shadow TID array
423 * @dd: the infinipath device
424 *
425 * allocate the shadow TID array, so we can ipath_munlock previous
426 * entries. It may make more sense to move the pageshadow to the
427 * port data structure, so we only allocate memory for ports actually
428 * in use, since we at 8k per port, now.
429 */
430static void init_shadow_tids(struct ipath_devdata *dd)
431{
432 dd->ipath_pageshadow = (struct page **)
433 vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
434 sizeof(struct page *));
435 if (!dd->ipath_pageshadow)
436 ipath_dev_err(dd, "failed to allocate shadow page * "
437 "array, no expected sends!\n");
438 else
439 memset(dd->ipath_pageshadow, 0,
440 dd->ipath_cfgports * dd->ipath_rcvtidcnt *
441 sizeof(struct page *));
442}
443
444static void enable_chip(struct ipath_devdata *dd,
445 struct ipath_portdata *pd, int reinit)
446{
447 u32 val;
448 int i;
449
450 if (!reinit) {
451 init_waitqueue_head(&ipath_sma_state_wait);
452 }
453 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
454 dd->ipath_rcvctrl);
455
456 /* Enable PIO send, and update of PIOavail regs to memory. */
457 dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
458 INFINIPATH_S_PIOBUFAVAILUPD;
459 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
460 dd->ipath_sendctrl);
461
462 /*
463 * enable port 0 receive, and receive interrupt. other ports
464 * done as user opens and inits them.
465 */
466 dd->ipath_rcvctrl = INFINIPATH_R_TAILUPD |
467 (1ULL << INFINIPATH_R_PORTENABLE_SHIFT) |
468 (1ULL << INFINIPATH_R_INTRAVAIL_SHIFT);
469 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
470 dd->ipath_rcvctrl);
471
472 /*
473 * now ready for use. this should be cleared whenever we
474 * detect a reset, or initiate one.
475 */
476 dd->ipath_flags |= IPATH_INITTED;
477
478 /*
479 * init our shadow copies of head from tail values, and write
480 * head values to match.
481 */
482 val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
483 (void)ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
484 dd->ipath_port0head = ipath_read_ureg32(dd, ur_rcvhdrtail, 0);
485
486 /* Initialize so we interrupt on next packet received */
487 (void)ipath_write_ureg(dd, ur_rcvhdrhead,
488 dd->ipath_rhdrhead_intr_off |
489 dd->ipath_port0head, 0);
490
491 /*
492 * by now pioavail updates to memory should have occurred, so
493 * copy them into our working/shadow registers; this is in
494 * case something went wrong with abort, but mostly to get the
495 * initial values of the generation bit correct.
496 */
497 for (i = 0; i < dd->ipath_pioavregs; i++) {
498 __le64 val;
499
500 /*
501 * Chip Errata bug 6641; even and odd qwords>3 are swapped.
502 */
503 if (i > 3) {
504 if (i & 1)
505 val = dd->ipath_pioavailregs_dma[i - 1];
506 else
507 val = dd->ipath_pioavailregs_dma[i + 1];
508 }
509 else
510 val = dd->ipath_pioavailregs_dma[i];
511 dd->ipath_pioavailshadow[i] = le64_to_cpu(val);
512 }
513 /* can get counters, stats, etc. */
514 dd->ipath_flags |= IPATH_PRESENT;
515}
516
517static int init_housekeeping(struct ipath_devdata *dd,
518 struct ipath_portdata **pdp, int reinit)
519{
520 char boardn[32];
521 int ret = 0;
522
523 /*
524 * have to clear shadow copies of registers at init that are
525 * not otherwise set here, or all kinds of bizarre things
526 * happen with driver on chip reset
527 */
528 dd->ipath_rcvhdrsize = 0;
529
530 /*
531 * Don't clear ipath_flags as 8bit mode was set before
532 * entering this func. However, we do set the linkstate to
533 * unknown, so we can watch for a transition.
534 */
535 dd->ipath_flags |= IPATH_LINKUNK;
536 dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED |
537 IPATH_LINKDOWN | IPATH_LINKINIT);
538
539 ipath_cdbg(VERBOSE, "Try to read spc chip revision\n");
540 dd->ipath_revision =
541 ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
542
543 /*
544 * set up fundamental info we need to use the chip; we assume
545 * if the revision reg and these regs are OK, we don't need to
546 * special case the rest
547 */
548 dd->ipath_sregbase =
549 ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase);
550 dd->ipath_cregbase =
551 ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase);
552 dd->ipath_uregbase =
553 ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase);
554 ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, "
555 "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase,
556 dd->ipath_uregbase, dd->ipath_cregbase);
557 if ((dd->ipath_revision & 0xffffffff) == 0xffffffff
558 || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff
559 || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff
560 || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
561 ipath_dev_err(dd, "Register read failures from chip, "
562 "giving up initialization\n");
563 ret = -ENODEV;
564 goto done;
565 }
566
567 /* clear the initial reset flag, in case first driver load */
568 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
569 INFINIPATH_E_RESET);
570
571 if (reinit)
572 ret = init_chip_reset(dd, pdp);
573 else
574 ret = init_chip_first(dd, pdp);
575
576 if (ret)
577 goto done;
578
579 ipath_cdbg(VERBOSE, "Revision %llx (PCI %x), %u ports, %u tids, "
580 "%u egrtids\n", (unsigned long long) dd->ipath_revision,
581 dd->ipath_pcirev, dd->ipath_portcnt, dd->ipath_rcvtidcnt,
582 dd->ipath_rcvegrcnt);
583
584 if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
585 INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
586 ipath_dev_err(dd, "Driver only handles version %d, "
587 "chip swversion is %d (%llx), failng\n",
588 IPATH_CHIP_SWVERSION,
589 (int)(dd->ipath_revision >>
590 INFINIPATH_R_SOFTWARE_SHIFT) &
591 INFINIPATH_R_SOFTWARE_MASK,
592 (unsigned long long) dd->ipath_revision);
593 ret = -ENOSYS;
594 goto done;
595 }
596 dd->ipath_majrev = (u8) ((dd->ipath_revision >>
597 INFINIPATH_R_CHIPREVMAJOR_SHIFT) &
598 INFINIPATH_R_CHIPREVMAJOR_MASK);
599 dd->ipath_minrev = (u8) ((dd->ipath_revision >>
600 INFINIPATH_R_CHIPREVMINOR_SHIFT) &
601 INFINIPATH_R_CHIPREVMINOR_MASK);
602 dd->ipath_boardrev = (u8) ((dd->ipath_revision >>
603 INFINIPATH_R_BOARDID_SHIFT) &
604 INFINIPATH_R_BOARDID_MASK);
605
606 ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
607
608 snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
609 "Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
610 "SW Compat %u\n",
611 IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
612 (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
613 INFINIPATH_R_ARCH_MASK,
614 dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev,
615 (unsigned)(dd->ipath_revision >>
616 INFINIPATH_R_SOFTWARE_SHIFT) &
617 INFINIPATH_R_SOFTWARE_MASK);
618
619 ipath_dbg("%s", dd->ipath_boardversion);
620
621done:
622 return ret;
623}
624
625
626/**
627 * ipath_init_chip - do the actual initialization sequence on the chip
628 * @dd: the infinipath device
629 * @reinit: reinitializing, so don't allocate new memory
630 *
631 * Do the actual initialization sequence on the chip. This is done
632 * both from the init routine called from the PCI infrastructure, and
633 * when we reset the chip, or detect that it was reset internally,
634 * or it's administratively re-enabled.
635 *
636 * Memory allocation here and in called routines is only done in
637 * the first case (reinit == 0). We have to be careful, because even
638 * without memory allocation, we need to re-write all the chip registers
639 * TIDs, etc. after the reset or enable has completed.
640 */
641int ipath_init_chip(struct ipath_devdata *dd, int reinit)
642{
643 int ret = 0, i;
644 u32 val32, kpiobufs;
645 u64 val, atmp;
646 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
647
648 ret = init_housekeeping(dd, &pd, reinit);
649 if (ret)
650 goto done;
651
652 /*
653 * we ignore most issues after reporting them, but have to specially
654 * handle hardware-disabled chips.
655 */
656 if (ret == 2) {
657 /* unique error, known to ipath_init_one */
658 ret = -EPERM;
659 goto done;
660 }
661
662 /*
663 * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
664 * but then it no longer nicely fits power of two, and since
665 * we now use routines that backend onto __get_free_pages, the
666 * rest would be wasted.
667 */
668 dd->ipath_rcvhdrcnt = dd->ipath_rcvegrcnt;
669 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
670 dd->ipath_rcvhdrcnt);
671
672 /*
673 * Set up the shadow copies of the piobufavail registers,
674 * which we compare against the chip registers for now, and
675 * the in memory DMA'ed copies of the registers. This has to
676 * be done early, before we calculate lastport, etc.
677 */
678 val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
679 /*
680 * calc number of pioavail registers, and save it; we have 2
681 * bits per buffer.
682 */
683 dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
684 / (sizeof(u64) * BITS_PER_BYTE / 2);
685 if (!ipath_kpiobufs) /* have to have at least 1, for SMA */
686 kpiobufs = ipath_kpiobufs = 1;
687 else if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) <
688 (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT)) {
689 dev_info(&dd->pcidev->dev, "Too few PIO buffers (%u) "
690 "for %u ports to have %u each!\n",
691 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
692 dd->ipath_cfgports, IPATH_MIN_USER_PORT_BUFCNT);
693 kpiobufs = 1; /* reserve just the minimum for SMA/ether */
694 } else
695 kpiobufs = ipath_kpiobufs;
696
697 if (kpiobufs >
698 (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
699 (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) {
700 i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
701 (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
702 if (i < 0)
703 i = 0;
704 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for "
705 "kernel leaves too few for %d user ports "
706 "(%d each); using %u\n", kpiobufs,
707 dd->ipath_cfgports - 1,
708 IPATH_MIN_USER_PORT_BUFCNT, i);
709 /*
710 * shouldn't change ipath_kpiobufs, because could be
711 * different for different devices...
712 */
713 kpiobufs = i;
714 }
715 dd->ipath_lastport_piobuf =
716 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs;
717 dd->ipath_pbufsport = dd->ipath_cfgports > 1
718 ? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1)
719 : 0;
720 val32 = dd->ipath_lastport_piobuf -
721 (dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
722 if (val32 > 0) {
723 ipath_dbg("allocating %u pbufs/port leaves %u unused, "
724 "add to kernel\n", dd->ipath_pbufsport, val32);
725 dd->ipath_lastport_piobuf -= val32;
726 ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n",
727 dd->ipath_pbufsport, val32);
728 }
729 dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
730 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
731 "each for %u user ports\n", kpiobufs,
732 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
733 dd->ipath_pbufsport, dd->ipath_cfgports - 1);
734
735 dd->ipath_f_early_init(dd);
736
737 /* early_init sets rcvhdrentsize and rcvhdrsize, so this must be
738 * done after early_init */
739 dd->ipath_hdrqlast =
740 dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
741 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
742 dd->ipath_rcvhdrentsize);
743 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
744 dd->ipath_rcvhdrsize);
745
746 if (!reinit) {
747 ret = init_pioavailregs(dd);
748 init_shadow_tids(dd);
749 if (ret)
750 goto done;
751 }
752
753 (void)ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
754 dd->ipath_pioavailregs_phys);
755 /*
756 * this is to detect s/w errors, which the h/w works around by
757 * ignoring the low 6 bits of address, if it wasn't aligned.
758 */
759 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr);
760 if (val != dd->ipath_pioavailregs_phys) {
761 ipath_dev_err(dd, "Catastrophic software error, "
762 "SendPIOAvailAddr written as %lx, "
763 "read back as %llx\n",
764 (unsigned long) dd->ipath_pioavailregs_phys,
765 (unsigned long long) val);
766 ret = -EINVAL;
767 goto done;
768 }
769
770 val = ipath_port0_rcvhdrtail_dma + dd->ipath_unit * 64;
771
772 /* verify that the alignment requirement was met */
773 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
774 0, val);
775 atmp = ipath_read_kreg64_port(
776 dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 0);
777 if (val != atmp) {
778 ipath_dev_err(dd, "Catastrophic software error, "
779 "RcvHdrTailAddr0 written as %llx, "
780 "read back as %llx from %x\n",
781 (unsigned long long) val,
782 (unsigned long long) atmp,
783 dd->ipath_kregs->kr_rcvhdrtailaddr);
784 ret = -EINVAL;
785 goto done;
786 }
787
788 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
789
790 /*
791 * make sure we are not in freeze, and PIO send enabled, so
792 * writes to pbc happen
793 */
794 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL);
795 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
796 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
797 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
798 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
799 INFINIPATH_S_PIOENABLE);
800
801 /*
802 * before error clears, since we expect serdes pll errors during
803 * this, the first time after reset
804 */
805 if (bringup_link(dd)) {
806 dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n");
807 ret = -ENETDOWN;
808 goto done;
809 }
810
811 /*
812 * clear any "expected" hwerrs from reset and/or initialization
813 * clear any that aren't enabled (at least this once), and then
814 * set the enable mask
815 */
816 dd->ipath_f_init_hwerrors(dd);
817 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
818 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
819 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
820 dd->ipath_hwerrmask);
821
822 dd->ipath_maskederrs = dd->ipath_ignorederrs;
823 /* clear all */
824 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
825 /* enable errors that are masked, at least this first time. */
826 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
827 ~dd->ipath_maskederrs);
828 /* clear any interrups up to this point (ints still not enabled) */
829 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
830
831 ipath_stats.sps_lid[dd->ipath_unit] = dd->ipath_lid;
832
833 /*
834 * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
835 * re-init, the simplest way to handle this is to free
836 * existing, and re-allocate.
837 */
838 if (reinit)
839 ipath_free_pddata(dd, 0, 0);
840 dd->ipath_f_tidtemplate(dd);
841 ret = ipath_create_rcvhdrq(dd, pd);
842 if (!ret)
843 ret = create_port0_egr(dd);
844 if (ret)
845 ipath_dev_err(dd, "failed to allocate port 0 (kernel) "
846 "rcvhdrq and/or egr bufs\n");
847 else
848 enable_chip(dd, pd, reinit);
849
850 /*
851 * cause retrigger of pending interrupts ignored during init,
852 * even if we had errors
853 */
854 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
855
856 if(!dd->ipath_stats_timer_active) {
857 /*
858 * first init, or after an admin disable/enable
859 * set up stats retrieval timer, even if we had errors
860 * in last portion of setup
861 */
862 init_timer(&dd->ipath_stats_timer);
863 dd->ipath_stats_timer.function = ipath_get_faststats;
864 dd->ipath_stats_timer.data = (unsigned long) dd;
865 /* every 5 seconds; */
866 dd->ipath_stats_timer.expires = jiffies + 5 * HZ;
867 /* takes ~16 seconds to overflow at full IB 4x bandwdith */
868 add_timer(&dd->ipath_stats_timer);
869 dd->ipath_stats_timer_active = 1;
870 }
871
872done:
873 if (!ret) {
874 ipath_get_guid(dd);
875 *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
876 if (!dd->ipath_f_intrsetup(dd)) {
877 /* now we can enable all interrupts from the chip */
878 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
879 -1LL);
880 /* force re-interrupt of any pending interrupts. */
881 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear,
882 0ULL);
883 /* chip is usable; mark it as initialized */
884 *dd->ipath_statusp |= IPATH_STATUS_INITTED;
885 } else
886 ipath_dev_err(dd, "No interrupts enabled, couldn't "
887 "setup interrupt address\n");
888
889 if (dd->ipath_cfgports > ipath_stats.sps_nports)
890 /*
891 * sps_nports is a global, so, we set it to
892 * the highest number of ports of any of the
893 * chips we find; we never decrement it, at
894 * least for now. Since this might have changed
895 * over disable/enable or prior to reset, always
896 * do the check and potentially adjust.
897 */
898 ipath_stats.sps_nports = dd->ipath_cfgports;
899 } else
900 ipath_dbg("Failed (%d) to initialize chip\n", ret);
901
902 /* if ret is non-zero, we probably should do some cleanup
903 here... */
904 return ret;
905}
906
907static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
908{
909 struct ipath_devdata *dd;
910 unsigned long flags;
911 unsigned short val;
912 int ret;
913
914 ret = ipath_parse_ushort(str, &val);
915
916 spin_lock_irqsave(&ipath_devs_lock, flags);
917
918 if (ret < 0)
919 goto bail;
920
921 if (val == 0) {
922 ret = -EINVAL;
923 goto bail;
924 }
925
926 list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
927 if (dd->ipath_kregbase)
928 continue;
929 if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
930 (dd->ipath_cfgports *
931 IPATH_MIN_USER_PORT_BUFCNT)))
932 {
933 ipath_dev_err(
934 dd,
935 "Allocating %d PIO bufs for kernel leaves "
936 "too few for %d user ports (%d each)\n",
937 val, dd->ipath_cfgports - 1,
938 IPATH_MIN_USER_PORT_BUFCNT);
939 ret = -EINVAL;
940 goto bail;
941 }
942 dd->ipath_lastport_piobuf =
943 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
944 }
945
946 ret = 0;
947bail:
948 spin_unlock_irqrestore(&ipath_devs_lock, flags);
949
950 return ret;
951}
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
new file mode 100644
index 000000000000..0bcb428041f3
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -0,0 +1,841 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/pci.h>
34
35#include "ipath_kernel.h"
36#include "ips_common.h"
37#include "ipath_layer.h"
38
39#define E_SUM_PKTERRS \
40 (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
41 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
42 INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
43 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
44 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
45 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
46
47#define E_SUM_ERRS \
48 (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
49 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
50 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
51 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
52 INFINIPATH_E_INVALIDADDR)
53
54static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
55{
56 unsigned long sbuf[4];
57 u64 ignore_this_time = 0;
58 u32 piobcnt;
59
60 /* if possible that sendbuffererror could be valid */
61 piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
62 /* read these before writing errorclear */
63 sbuf[0] = ipath_read_kreg64(
64 dd, dd->ipath_kregs->kr_sendbuffererror);
65 sbuf[1] = ipath_read_kreg64(
66 dd, dd->ipath_kregs->kr_sendbuffererror + 1);
67 if (piobcnt > 128) {
68 sbuf[2] = ipath_read_kreg64(
69 dd, dd->ipath_kregs->kr_sendbuffererror + 2);
70 sbuf[3] = ipath_read_kreg64(
71 dd, dd->ipath_kregs->kr_sendbuffererror + 3);
72 }
73
74 if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
75 int i;
76
77 ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
78 if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
79 printk("%lx %lx ", sbuf[2], sbuf[3]);
80 for (i = 0; i < piobcnt; i++) {
81 if (test_bit(i, sbuf)) {
82 u32 __iomem *piobuf;
83 if (i < dd->ipath_piobcnt2k)
84 piobuf = (u32 __iomem *)
85 (dd->ipath_pio2kbase +
86 i * dd->ipath_palign);
87 else
88 piobuf = (u32 __iomem *)
89 (dd->ipath_pio4kbase +
90 (i - dd->ipath_piobcnt2k) *
91 dd->ipath_4kalign);
92
93 ipath_cdbg(PKT,
94 "PIObuf[%u] @%p pbc is %x; ",
95 i, piobuf, readl(piobuf));
96
97 ipath_disarm_piobufs(dd, i, 1);
98 }
99 }
100 if (ipath_debug & __IPATH_PKTDBG)
101 printk("\n");
102 }
103 if ((errs & (INFINIPATH_E_SDROPPEDDATAPKT |
104 INFINIPATH_E_SDROPPEDSMPPKT |
105 INFINIPATH_E_SMINPKTLEN)) &&
106 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
107 /*
108 * This can happen when SMA is trying to bring the link
109 * up, but the IB link changes state at the "wrong" time.
110 * The IB logic then complains that the packet isn't
111 * valid. We don't want to confuse people, so we just
112 * don't print them, except at debug
113 */
114 ipath_dbg("Ignoring pktsend errors %llx, because not "
115 "yet active\n", (unsigned long long) errs);
116 ignore_this_time = INFINIPATH_E_SDROPPEDDATAPKT |
117 INFINIPATH_E_SDROPPEDSMPPKT |
118 INFINIPATH_E_SMINPKTLEN;
119 }
120
121 return ignore_this_time;
122}
123
124/* return the strings for the most common link states */
125static char *ib_linkstate(u32 linkstate)
126{
127 char *ret;
128
129 switch (linkstate) {
130 case IPATH_IBSTATE_INIT:
131 ret = "Init";
132 break;
133 case IPATH_IBSTATE_ARM:
134 ret = "Arm";
135 break;
136 case IPATH_IBSTATE_ACTIVE:
137 ret = "Active";
138 break;
139 default:
140 ret = "Down";
141 }
142
143 return ret;
144}
145
146static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
147 ipath_err_t errs, int noprint)
148{
149 u64 val;
150 u32 ltstate, lstate;
151
152 /*
153 * even if diags are enabled, we want to notice LINKINIT, etc.
154 * We just don't want to change the LED state, or
155 * dd->ipath_kregs->kr_ibcctrl
156 */
157 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
158 lstate = val & IPATH_IBSTATE_MASK;
159 if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
160 lstate == IPATH_IBSTATE_ACTIVE) {
161 /*
162 * only print at SMA if there is a change, debug if not
163 * (sometimes we want to know that, usually not).
164 */
165 if (lstate == ((unsigned) dd->ipath_lastibcstat
166 & IPATH_IBSTATE_MASK)) {
167 ipath_dbg("Status change intr but no change (%s)\n",
168 ib_linkstate(lstate));
169 }
170 else
171 ipath_cdbg(SMA, "Unit %u link state %s, last "
172 "was %s\n", dd->ipath_unit,
173 ib_linkstate(lstate),
174 ib_linkstate((unsigned)
175 dd->ipath_lastibcstat
176 & IPATH_IBSTATE_MASK));
177 }
178 else {
179 lstate = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
180 if (lstate == IPATH_IBSTATE_INIT ||
181 lstate == IPATH_IBSTATE_ARM ||
182 lstate == IPATH_IBSTATE_ACTIVE)
183 ipath_cdbg(SMA, "Unit %u link state down"
184 " (state 0x%x), from %s\n",
185 dd->ipath_unit,
186 (u32)val & IPATH_IBSTATE_MASK,
187 ib_linkstate(lstate));
188 else
189 ipath_cdbg(VERBOSE, "Unit %u link state changed "
190 "to 0x%x from down (%x)\n",
191 dd->ipath_unit, (u32) val, lstate);
192 }
193 ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
194 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
195 lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
196 INFINIPATH_IBCS_LINKSTATE_MASK;
197
198 if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
199 ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
200 u32 last_ltstate;
201
202 /*
203 * Ignore cycling back and forth from Polling.Active
204 * to Polling.Quiet while waiting for the other end of
205 * the link to come up. We will cycle back and forth
206 * between them if no cable is plugged in,
207 * the other device is powered off or disabled, etc.
208 */
209 last_ltstate = (dd->ipath_lastibcstat >>
210 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)
211 & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
212 if (last_ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE
213 || last_ltstate ==
214 INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
215 if (dd->ipath_ibpollcnt > 40) {
216 dd->ipath_flags |= IPATH_NOCABLE;
217 *dd->ipath_statusp |=
218 IPATH_STATUS_IB_NOCABLE;
219 } else
220 dd->ipath_ibpollcnt++;
221 goto skip_ibchange;
222 }
223 }
224 dd->ipath_ibpollcnt = 0; /* some state other than 2 or 3 */
225 ipath_stats.sps_iblink++;
226 if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
227 dd->ipath_flags |= IPATH_LINKDOWN;
228 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
229 | IPATH_LINKACTIVE |
230 IPATH_LINKARMED);
231 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
232 if (!noprint) {
233 if (((dd->ipath_lastibcstat >>
234 INFINIPATH_IBCS_LINKSTATE_SHIFT) &
235 INFINIPATH_IBCS_LINKSTATE_MASK)
236 == INFINIPATH_IBCS_L_STATE_ACTIVE)
237 /* if from up to down be more vocal */
238 ipath_cdbg(SMA,
239 "Unit %u link now down (%s)\n",
240 dd->ipath_unit,
241 ipath_ibcstatus_str[ltstate]);
242 else
243 ipath_cdbg(VERBOSE, "Unit %u link is "
244 "down (%s)\n", dd->ipath_unit,
245 ipath_ibcstatus_str[ltstate]);
246 }
247
248 dd->ipath_f_setextled(dd, lstate, ltstate);
249 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ACTIVE) {
250 dd->ipath_flags |= IPATH_LINKACTIVE;
251 dd->ipath_flags &=
252 ~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN |
253 IPATH_LINKARMED | IPATH_NOCABLE);
254 *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
255 *dd->ipath_statusp |=
256 IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
257 dd->ipath_f_setextled(dd, lstate, ltstate);
258
259 __ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
260 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
261 /*
262 * set INIT and DOWN. Down is checked by most of the other
263 * code, but INIT is useful to know in a few places.
264 */
265 dd->ipath_flags |= IPATH_LINKINIT | IPATH_LINKDOWN;
266 dd->ipath_flags &=
267 ~(IPATH_LINKUNK | IPATH_LINKACTIVE | IPATH_LINKARMED
268 | IPATH_NOCABLE);
269 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
270 | IPATH_STATUS_IB_READY);
271 dd->ipath_f_setextled(dd, lstate, ltstate);
272 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
273 dd->ipath_flags |= IPATH_LINKARMED;
274 dd->ipath_flags &=
275 ~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
276 IPATH_LINKACTIVE | IPATH_NOCABLE);
277 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
278 | IPATH_STATUS_IB_READY);
279 dd->ipath_f_setextled(dd, lstate, ltstate);
280 } else {
281 if (!noprint)
282 ipath_dbg("IBstatuschange unit %u: %s (%x)\n",
283 dd->ipath_unit,
284 ipath_ibcstatus_str[ltstate], ltstate);
285 }
286skip_ibchange:
287 dd->ipath_lastibcstat = val;
288}
289
290static void handle_supp_msgs(struct ipath_devdata *dd,
291 unsigned supp_msgs, char msg[512])
292{
293 /*
294 * Print the message unless it's ibc status change only, which
295 * happens so often we never want to count it.
296 */
297 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
298 ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
299 ~INFINIPATH_E_IBSTATUSCHANGED);
300 if (dd->ipath_lasterror &
301 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
302 ipath_dev_err(dd, "Suppressed %u messages for "
303 "fast-repeating errors (%s) (%llx)\n",
304 supp_msgs, msg,
305 (unsigned long long)
306 dd->ipath_lasterror);
307 else {
308 /*
309 * rcvegrfull and rcvhdrqfull are "normal", for some
310 * types of processes (mostly benchmarks) that send
311 * huge numbers of messages, while not processing
312 * them. So only complain about these at debug
313 * level.
314 */
315 ipath_dbg("Suppressed %u messages for %s\n",
316 supp_msgs, msg);
317 }
318 }
319}
320
321static unsigned handle_frequent_errors(struct ipath_devdata *dd,
322 ipath_err_t errs, char msg[512],
323 int *noprint)
324{
325 unsigned long nc;
326 static unsigned long nextmsg_time;
327 static unsigned nmsgs, supp_msgs;
328
329 /*
330 * Throttle back "fast" messages to no more than 10 per 5 seconds.
331 * This isn't perfect, but it's a reasonable heuristic. If we get
332 * more than 10, give a 6x longer delay.
333 */
334 nc = jiffies;
335 if (nmsgs > 10) {
336 if (time_before(nc, nextmsg_time)) {
337 *noprint = 1;
338 if (!supp_msgs++)
339 nextmsg_time = nc + HZ * 3;
340 }
341 else if (supp_msgs) {
342 handle_supp_msgs(dd, supp_msgs, msg);
343 supp_msgs = 0;
344 nmsgs = 0;
345 }
346 }
347 else if (!nmsgs++ || time_after(nc, nextmsg_time))
348 nextmsg_time = nc + HZ / 2;
349
350 return supp_msgs;
351}
352
353static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
354{
355 char msg[512];
356 u64 ignore_this_time = 0;
357 int i;
358 int chkerrpkts = 0, noprint = 0;
359 unsigned supp_msgs;
360
361 supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
362
363 /*
364 * don't report errors that are masked (includes those always
365 * ignored)
366 */
367 errs &= ~dd->ipath_maskederrs;
368
369 /* do these first, they are most important */
370 if (errs & INFINIPATH_E_HARDWARE) {
371 /* reuse same msg buf */
372 dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
373 }
374
375 if (!noprint && (errs & ~infinipath_e_bitsextant))
376 ipath_dev_err(dd, "error interrupt with unknown errors "
377 "%llx set\n", (unsigned long long)
378 (errs & ~infinipath_e_bitsextant));
379
380 if (errs & E_SUM_ERRS)
381 ignore_this_time = handle_e_sum_errs(dd, errs);
382
383 if (supp_msgs == 250000) {
384 /*
385 * It's not entirely reasonable assuming that the errors set
386 * in the last clear period are all responsible for the
387 * problem, but the alternative is to assume it's the only
388 * ones on this particular interrupt, which also isn't great
389 */
390 dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
391 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
392 ~dd->ipath_maskederrs);
393 ipath_decode_err(msg, sizeof msg,
394 (dd->ipath_maskederrs & ~dd->
395 ipath_ignorederrs));
396
397 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
398 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
399 ipath_dev_err(dd, "Disabling error(s) %llx because "
400 "occuring too frequently (%s)\n",
401 (unsigned long long)
402 (dd->ipath_maskederrs &
403 ~dd->ipath_ignorederrs), msg);
404 else {
405 /*
406 * rcvegrfull and rcvhdrqfull are "normal",
407 * for some types of processes (mostly benchmarks)
408 * that send huge numbers of messages, while not
409 * processing them. So only complain about
410 * these at debug level.
411 */
412 ipath_dbg("Disabling frequent queue full errors "
413 "(%s)\n", msg);
414 }
415
416 /*
417 * Re-enable the masked errors after around 3 minutes. in
418 * ipath_get_faststats(). If we have a series of fast
419 * repeating but different errors, the interval will keep
420 * stretching out, but that's OK, as that's pretty
421 * catastrophic.
422 */
423 dd->ipath_unmasktime = jiffies + HZ * 180;
424 }
425
426 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
427 if (ignore_this_time)
428 errs &= ~ignore_this_time;
429 if (errs & ~dd->ipath_lasterror) {
430 errs &= ~dd->ipath_lasterror;
431 /* never suppress duplicate hwerrors or ibstatuschange */
432 dd->ipath_lasterror |= errs &
433 ~(INFINIPATH_E_HARDWARE |
434 INFINIPATH_E_IBSTATUSCHANGED);
435 }
436 if (!errs)
437 return;
438
439 if (!noprint)
440 /*
441 * the ones we mask off are handled specially below or above
442 */
443 ipath_decode_err(msg, sizeof msg,
444 errs & ~(INFINIPATH_E_IBSTATUSCHANGED |
445 INFINIPATH_E_RRCVEGRFULL |
446 INFINIPATH_E_RRCVHDRFULL |
447 INFINIPATH_E_HARDWARE));
448 else
449 /* so we don't need if (!noprint) at strlcat's below */
450 *msg = 0;
451
452 if (errs & E_SUM_PKTERRS) {
453 ipath_stats.sps_pkterrs++;
454 chkerrpkts = 1;
455 }
456 if (errs & E_SUM_ERRS)
457 ipath_stats.sps_errs++;
458
459 if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
460 ipath_stats.sps_crcerrs++;
461 chkerrpkts = 1;
462 }
463
464 /*
465 * We don't want to print these two as they happen, or we can make
466 * the situation even worse, because it takes so long to print
467 * messages to serial consoles. Kernel ports get printed from
468 * fast_stats, no more than every 5 seconds, user ports get printed
469 * on close
470 */
471 if (errs & INFINIPATH_E_RRCVHDRFULL) {
472 int any;
473 u32 hd, tl;
474 ipath_stats.sps_hdrqfull++;
475 for (any = i = 0; i < dd->ipath_cfgports; i++) {
476 struct ipath_portdata *pd = dd->ipath_pd[i];
477 if (i == 0) {
478 hd = dd->ipath_port0head;
479 tl = (u32) le64_to_cpu(
480 *dd->ipath_hdrqtailptr);
481 } else if (pd && pd->port_cnt &&
482 pd->port_rcvhdrtail_kvaddr) {
483 /*
484 * don't report same point multiple times,
485 * except kernel
486 */
487 tl = (u32) * pd->port_rcvhdrtail_kvaddr;
488 if (tl == dd->ipath_lastrcvhdrqtails[i])
489 continue;
490 hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
491 i);
492 } else
493 continue;
494 if (hd == (tl + 1) ||
495 (!hd && tl == dd->ipath_hdrqlast)) {
496 dd->ipath_lastrcvhdrqtails[i] = tl;
497 pd->port_hdrqfull++;
498 if (i == 0)
499 chkerrpkts = 1;
500 }
501 }
502 }
503 if (errs & INFINIPATH_E_RRCVEGRFULL) {
504 /*
505 * since this is of less importance and not likely to
506 * happen without also getting hdrfull, only count
507 * occurrences; don't check each port (or even the kernel
508 * vs user)
509 */
510 ipath_stats.sps_etidfull++;
511 if (dd->ipath_port0head !=
512 (u32) le64_to_cpu(*dd->ipath_hdrqtailptr))
513 chkerrpkts = 1;
514 }
515
516 /*
517 * do this before IBSTATUSCHANGED, in case both bits set in a single
518 * interrupt; we want the STATUSCHANGE to "win", so we do our
519 * internal copy of state machine correctly
520 */
521 if (errs & INFINIPATH_E_RIBLOSTLINK) {
522 /*
523 * force through block below
524 */
525 errs |= INFINIPATH_E_IBSTATUSCHANGED;
526 ipath_stats.sps_iblink++;
527 dd->ipath_flags |= IPATH_LINKDOWN;
528 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
529 | IPATH_LINKARMED | IPATH_LINKACTIVE);
530 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
531 if (!noprint) {
532 u64 st = ipath_read_kreg64(
533 dd, dd->ipath_kregs->kr_ibcstatus);
534
535 ipath_dbg("Lost link, link now down (%s)\n",
536 ipath_ibcstatus_str[st & 0xf]);
537 }
538 }
539 if (errs & INFINIPATH_E_IBSTATUSCHANGED)
540 handle_e_ibstatuschanged(dd, errs, noprint);
541
542 if (errs & INFINIPATH_E_RESET) {
543 if (!noprint)
544 ipath_dev_err(dd, "Got reset, requires re-init "
545 "(unload and reload driver)\n");
546 dd->ipath_flags &= ~IPATH_INITTED; /* needs re-init */
547 /* mark as having had error */
548 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
549 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
550 }
551
552 if (!noprint && *msg)
553 ipath_dev_err(dd, "%s error\n", msg);
554 if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
555 ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
556 "waking\n", dd->ipath_sma_state_wanted,
557 dd->ipath_flags);
558 wake_up_interruptible(&ipath_sma_state_wait);
559 }
560
561 if (chkerrpkts)
562 /* process possible error packets in hdrq */
563 ipath_kreceive(dd);
564}
565
566/* this is separate to allow for better optimization of ipath_intr() */
567
568static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
569{
570 /*
571 * sometimes happen during driver init and unload, don't want
572 * to process any interrupts at that point
573 */
574
575 /* this is just a bandaid, not a fix, if something goes badly
576 * wrong */
577 if (++*unexpectp > 100) {
578 if (++*unexpectp > 105) {
579 /*
580 * ok, we must be taking somebody else's interrupts,
581 * due to a messed up mptable and/or PIRQ table, so
582 * unregister the interrupt. We've seen this during
583 * linuxbios development work, and it may happen in
584 * the future again.
585 */
586 if (dd->pcidev && dd->pcidev->irq) {
587 ipath_dev_err(dd, "Now %u unexpected "
588 "interrupts, unregistering "
589 "interrupt handler\n",
590 *unexpectp);
591 ipath_dbg("free_irq of irq %x\n",
592 dd->pcidev->irq);
593 free_irq(dd->pcidev->irq, dd);
594 }
595 }
596 if (ipath_read_kreg32(dd, dd->ipath_kregs->kr_intmask)) {
597 ipath_dev_err(dd, "%u unexpected interrupts, "
598 "disabling interrupts completely\n",
599 *unexpectp);
600 /*
601 * disable all interrupts, something is very wrong
602 */
603 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
604 0ULL);
605 }
606 } else if (*unexpectp > 1)
607 ipath_dbg("Interrupt when not ready, should not happen, "
608 "ignoring\n");
609}
610
611static void ipath_bad_regread(struct ipath_devdata *dd)
612{
613 static int allbits;
614
615 /* separate routine, for better optimization of ipath_intr() */
616
617 /*
618 * We print the message and disable interrupts, in hope of
619 * having a better chance of debugging the problem.
620 */
621 ipath_dev_err(dd,
622 "Read of interrupt status failed (all bits set)\n");
623 if (allbits++) {
624 /* disable all interrupts, something is very wrong */
625 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
626 if (allbits == 2) {
627 ipath_dev_err(dd, "Still bad interrupt status, "
628 "unregistering interrupt\n");
629 free_irq(dd->pcidev->irq, dd);
630 } else if (allbits > 2) {
631 if ((allbits % 10000) == 0)
632 printk(".");
633 } else
634 ipath_dev_err(dd, "Disabling interrupts, "
635 "multiple errors\n");
636 }
637}
638
639static void handle_port_pioavail(struct ipath_devdata *dd)
640{
641 u32 i;
642 /*
643 * start from port 1, since for now port 0 is never using
644 * wait_event for PIO
645 */
646 for (i = 1; dd->ipath_portpiowait && i < dd->ipath_cfgports; i++) {
647 struct ipath_portdata *pd = dd->ipath_pd[i];
648
649 if (pd && pd->port_cnt &&
650 dd->ipath_portpiowait & (1U << i)) {
651 clear_bit(i, &dd->ipath_portpiowait);
652 if (test_bit(IPATH_PORT_WAITING_PIO,
653 &pd->port_flag)) {
654 clear_bit(IPATH_PORT_WAITING_PIO,
655 &pd->port_flag);
656 wake_up_interruptible(&pd->port_wait);
657 }
658 }
659 }
660}
661
662static void handle_layer_pioavail(struct ipath_devdata *dd)
663{
664 int ret;
665
666 ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
667 if (ret > 0)
668 goto clear;
669
670 ret = __ipath_verbs_piobufavail(dd);
671 if (ret > 0)
672 goto clear;
673
674 return;
675clear:
676 set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
677 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
678 dd->ipath_sendctrl);
679}
680
681static void handle_rcv(struct ipath_devdata *dd, u32 istat)
682{
683 u64 portr;
684 int i;
685 int rcvdint = 0;
686
687 portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
688 infinipath_i_rcvavail_mask)
689 | ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
690 infinipath_i_rcvurg_mask);
691 for (i = 0; i < dd->ipath_cfgports; i++) {
692 struct ipath_portdata *pd = dd->ipath_pd[i];
693 if (portr & (1 << i) && pd &&
694 pd->port_cnt) {
695 if (i == 0)
696 ipath_kreceive(dd);
697 else if (test_bit(IPATH_PORT_WAITING_RCV,
698 &pd->port_flag)) {
699 int rcbit;
700 clear_bit(IPATH_PORT_WAITING_RCV,
701 &pd->port_flag);
702 rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
703 clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
704 wake_up_interruptible(&pd->port_wait);
705 rcvdint = 1;
706 }
707 }
708 }
709 if (rcvdint) {
710 /* only want to take one interrupt, so turn off the rcv
711 * interrupt for all the ports that we did the wakeup on
712 * (but never for kernel port)
713 */
714 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
715 dd->ipath_rcvctrl);
716 }
717}
718
719irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
720{
721 struct ipath_devdata *dd = data;
722 u32 istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
723 ipath_err_t estat = 0;
724 static unsigned unexpected = 0;
725 irqreturn_t ret;
726
727 if (unlikely(!istat)) {
728 ipath_stats.sps_nullintr++;
729 ret = IRQ_NONE; /* not our interrupt, or already handled */
730 goto bail;
731 }
732 if (unlikely(istat == -1)) {
733 ipath_bad_regread(dd);
734 /* don't know if it was our interrupt or not */
735 ret = IRQ_NONE;
736 goto bail;
737 }
738
739 ipath_stats.sps_ints++;
740
741 /*
742 * this needs to be flags&initted, not statusp, so we keep
743 * taking interrupts even after link goes down, etc.
744 * Also, we *must* clear the interrupt at some point, or we won't
745 * take it again, which can be real bad for errors, etc...
746 */
747
748 if (!(dd->ipath_flags & IPATH_INITTED)) {
749 ipath_bad_intr(dd, &unexpected);
750 ret = IRQ_NONE;
751 goto bail;
752 }
753 if (unexpected)
754 unexpected = 0;
755
756 ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
757
758 if (istat & ~infinipath_i_bitsextant)
759 ipath_dev_err(dd,
760 "interrupt with unknown interrupts %x set\n",
761 istat & (u32) ~ infinipath_i_bitsextant);
762
763 if (istat & INFINIPATH_I_ERROR) {
764 ipath_stats.sps_errints++;
765 estat = ipath_read_kreg64(dd,
766 dd->ipath_kregs->kr_errorstatus);
767 if (!estat)
768 dev_info(&dd->pcidev->dev, "error interrupt (%x), "
769 "but no error bits set!\n", istat);
770 else if (estat == -1LL)
771 /*
772 * should we try clearing all, or hope next read
773 * works?
774 */
775 ipath_dev_err(dd, "Read of error status failed "
776 "(all bits set); ignoring\n");
777 else
778 handle_errors(dd, estat);
779 }
780
781 if (istat & INFINIPATH_I_GPIO) {
782 if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
783 u32 gpiostatus;
784 gpiostatus = ipath_read_kreg32(
785 dd, dd->ipath_kregs->kr_gpio_status);
786 ipath_dbg("Unexpected GPIO interrupt bits %x\n",
787 gpiostatus);
788 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
789 gpiostatus);
790 }
791 else {
792 /* Clear GPIO status bit 2 */
793 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
794 (u64) (1 << 2));
795
796 /*
797 * Packets are available in the port 0 rcv queue.
798 * Eventually this needs to be generalized to check
799 * IPATH_GPIO_INTR, and the specific GPIO bit, if
800 * GPIO interrupts are used for anything else.
801 */
802 ipath_kreceive(dd);
803 }
804 }
805
806 /*
807 * clear the ones we will deal with on this round
808 * We clear it early, mostly for receive interrupts, so we
809 * know the chip will have seen this by the time we process
810 * the queue, and will re-interrupt if necessary. The processor
811 * itself won't take the interrupt again until we return.
812 */
813 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
814
815 if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
816 clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
817 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
818 dd->ipath_sendctrl);
819
820 if (dd->ipath_portpiowait)
821 handle_port_pioavail(dd);
822
823 handle_layer_pioavail(dd);
824 }
825
826 /*
827 * we check for both transition from empty to non-empty, and urgent
828 * packets (those with the interrupt bit set in the header)
829 */
830
831 if (istat & ((infinipath_i_rcvavail_mask <<
832 INFINIPATH_I_RCVAVAIL_SHIFT)
833 | (infinipath_i_rcvurg_mask <<
834 INFINIPATH_I_RCVURG_SHIFT)))
835 handle_rcv(dd, istat);
836
837 ret = IRQ_HANDLED;
838
839bail:
840 return ret;
841}
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
new file mode 100644
index 000000000000..0ce5f19c9d62
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -0,0 +1,883 @@
1#ifndef _IPATH_KERNEL_H
2#define _IPATH_KERNEL_H
3/*
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35/*
36 * This header file is the base header file for infinipath kernel code
37 * ipath_user.h serves a similar purpose for user code.
38 */
39
40#include <linux/interrupt.h>
41#include <asm/io.h>
42
43#include "ipath_common.h"
44#include "ipath_debug.h"
45#include "ipath_registers.h"
46
47/* only s/w major version of InfiniPath we can handle */
48#define IPATH_CHIP_VERS_MAJ 2U
49
50/* don't care about this except printing */
51#define IPATH_CHIP_VERS_MIN 0U
52
53/* temporary, maybe always */
54extern struct infinipath_stats ipath_stats;
55
56#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
57
58struct ipath_portdata {
59 void **port_rcvegrbuf;
60 dma_addr_t *port_rcvegrbuf_phys;
61 /* rcvhdrq base, needs mmap before useful */
62 void *port_rcvhdrq;
63 /* kernel virtual address where hdrqtail is updated */
64 u64 *port_rcvhdrtail_kvaddr;
65 /* page * used for uaddr */
66 struct page *port_rcvhdrtail_pagep;
67 /*
68 * temp buffer for expected send setup, allocated at open, instead
69 * of each setup call
70 */
71 void *port_tid_pg_list;
72 /* when waiting for rcv or pioavail */
73 wait_queue_head_t port_wait;
74 /*
75 * rcvegr bufs base, physical, must fit
76 * in 44 bits so 32 bit programs mmap64 44 bit works)
77 */
78 dma_addr_t port_rcvegr_phys;
79 /* mmap of hdrq, must fit in 44 bits */
80 dma_addr_t port_rcvhdrq_phys;
81 /*
82 * the actual user address that we ipath_mlock'ed, so we can
83 * ipath_munlock it at close
84 */
85 unsigned long port_rcvhdrtail_uaddr;
86 /*
87 * number of opens on this instance (0 or 1; ignoring forks, dup,
88 * etc. for now)
89 */
90 int port_cnt;
91 /*
92 * how much space to leave at start of eager TID entries for
93 * protocol use, on each TID
94 */
95 /* instead of calculating it */
96 unsigned port_port;
97 /* chip offset of PIO buffers for this port */
98 u32 port_piobufs;
99 /* how many alloc_pages() chunks in port_rcvegrbuf_pages */
100 u32 port_rcvegrbuf_chunks;
101 /* how many egrbufs per chunk */
102 u32 port_rcvegrbufs_perchunk;
103 /* order for port_rcvegrbuf_pages */
104 size_t port_rcvegrbuf_size;
105 /* rcvhdrq size (for freeing) */
106 size_t port_rcvhdrq_size;
107 /* next expected TID to check when looking for free */
108 u32 port_tidcursor;
109 /* next expected TID to check */
110 unsigned long port_flag;
111 /* WAIT_RCV that timed out, no interrupt */
112 u32 port_rcvwait_to;
113 /* WAIT_PIO that timed out, no interrupt */
114 u32 port_piowait_to;
115 /* WAIT_RCV already happened, no wait */
116 u32 port_rcvnowait;
117 /* WAIT_PIO already happened, no wait */
118 u32 port_pionowait;
119 /* total number of rcvhdrqfull errors */
120 u32 port_hdrqfull;
121 /* pid of process using this port */
122 pid_t port_pid;
123 /* same size as task_struct .comm[] */
124 char port_comm[16];
125 /* pkeys set by this use of this port */
126 u16 port_pkeys[4];
127 /* so file ops can get at unit */
128 struct ipath_devdata *port_dd;
129};
130
131struct sk_buff;
132
133/*
134 * control information for layered drivers
135 */
136struct _ipath_layer {
137 void *l_arg;
138};
139
140/* Verbs layer interface */
141struct _verbs_layer {
142 void *l_arg;
143 struct timer_list l_timer;
144};
145
146struct ipath_devdata {
147 struct list_head ipath_list;
148
149 struct ipath_kregs const *ipath_kregs;
150 struct ipath_cregs const *ipath_cregs;
151
152 /* mem-mapped pointer to base of chip regs */
153 u64 __iomem *ipath_kregbase;
154 /* end of mem-mapped chip space; range checking */
155 u64 __iomem *ipath_kregend;
156 /* physical address of chip for io_remap, etc. */
157 unsigned long ipath_physaddr;
158 /* base of memory alloced for ipath_kregbase, for free */
159 u64 *ipath_kregalloc;
160 /*
161 * version of kregbase that doesn't have high bits set (for 32 bit
162 * programs, so mmap64 44 bit works)
163 */
164 u64 __iomem *ipath_kregvirt;
165 /*
166 * virtual address where port0 rcvhdrqtail updated for this unit.
167 * only written to by the chip, not the driver.
168 */
169 volatile __le64 *ipath_hdrqtailptr;
170 dma_addr_t ipath_dma_addr;
171 /* ipath_cfgports pointers */
172 struct ipath_portdata **ipath_pd;
173 /* sk_buffs used by port 0 eager receive queue */
174 struct sk_buff **ipath_port0_skbs;
175 /* kvirt address of 1st 2k pio buffer */
176 void __iomem *ipath_pio2kbase;
177 /* kvirt address of 1st 4k pio buffer */
178 void __iomem *ipath_pio4kbase;
179 /*
180 * points to area where PIOavail registers will be DMA'ed.
181 * Has to be on a page of it's own, because the page will be
182 * mapped into user program space. This copy is *ONLY* ever
183 * written by DMA, not by the driver! Need a copy per device
184 * when we get to multiple devices
185 */
186 volatile __le64 *ipath_pioavailregs_dma;
187 /* physical address where updates occur */
188 dma_addr_t ipath_pioavailregs_phys;
189 struct _ipath_layer ipath_layer;
190 /* setup intr */
191 int (*ipath_f_intrsetup)(struct ipath_devdata *);
192 /* setup on-chip bus config */
193 int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
194 /* hard reset chip */
195 int (*ipath_f_reset)(struct ipath_devdata *);
196 int (*ipath_f_get_boardname)(struct ipath_devdata *, char *,
197 size_t);
198 void (*ipath_f_init_hwerrors)(struct ipath_devdata *);
199 void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *,
200 size_t);
201 void (*ipath_f_quiet_serdes)(struct ipath_devdata *);
202 int (*ipath_f_bringup_serdes)(struct ipath_devdata *);
203 int (*ipath_f_early_init)(struct ipath_devdata *);
204 void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned);
205 void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*,
206 u32, unsigned long);
207 void (*ipath_f_tidtemplate)(struct ipath_devdata *);
208 void (*ipath_f_cleanup)(struct ipath_devdata *);
209 void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
210 /* fill out chip-specific fields */
211 int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
212 struct _verbs_layer verbs_layer;
213 /* total dwords sent (summed from counter) */
214 u64 ipath_sword;
215 /* total dwords rcvd (summed from counter) */
216 u64 ipath_rword;
217 /* total packets sent (summed from counter) */
218 u64 ipath_spkts;
219 /* total packets rcvd (summed from counter) */
220 u64 ipath_rpkts;
221 /* ipath_statusp initially points to this. */
222 u64 _ipath_status;
223 /* GUID for this interface, in network order */
224 __be64 ipath_guid;
225 /*
226 * aggregrate of error bits reported since last cleared, for
227 * limiting of error reporting
228 */
229 ipath_err_t ipath_lasterror;
230 /*
231 * aggregrate of error bits reported since last cleared, for
232 * limiting of hwerror reporting
233 */
234 ipath_err_t ipath_lasthwerror;
235 /*
236 * errors masked because they occur too fast, also includes errors
237 * that are always ignored (ipath_ignorederrs)
238 */
239 ipath_err_t ipath_maskederrs;
240 /* time in jiffies at which to re-enable maskederrs */
241 unsigned long ipath_unmasktime;
242 /*
243 * errors always ignored (masked), at least for a given
244 * chip/device, because they are wrong or not useful
245 */
246 ipath_err_t ipath_ignorederrs;
247 /* count of egrfull errors, combined for all ports */
248 u64 ipath_last_tidfull;
249 /* for ipath_qcheck() */
250 u64 ipath_lastport0rcv_cnt;
251 /* template for writing TIDs */
252 u64 ipath_tidtemplate;
253 /* value to write to free TIDs */
254 u64 ipath_tidinvalid;
255 /* PE-800 rcv interrupt setup */
256 u64 ipath_rhdrhead_intr_off;
257
258 /* size of memory at ipath_kregbase */
259 u32 ipath_kregsize;
260 /* number of registers used for pioavail */
261 u32 ipath_pioavregs;
262 /* IPATH_POLL, etc. */
263 u32 ipath_flags;
264 /* ipath_flags sma is waiting for */
265 u32 ipath_sma_state_wanted;
266 /* last buffer for user use, first buf for kernel use is this
267 * index. */
268 u32 ipath_lastport_piobuf;
269 /* is a stats timer active */
270 u32 ipath_stats_timer_active;
271 /* dwords sent read from counter */
272 u32 ipath_lastsword;
273 /* dwords received read from counter */
274 u32 ipath_lastrword;
275 /* sent packets read from counter */
276 u32 ipath_lastspkts;
277 /* received packets read from counter */
278 u32 ipath_lastrpkts;
279 /* pio bufs allocated per port */
280 u32 ipath_pbufsport;
281 /*
282 * number of ports configured as max; zero is set to number chip
283 * supports, less gives more pio bufs/port, etc.
284 */
285 u32 ipath_cfgports;
286 /* port0 rcvhdrq head offset */
287 u32 ipath_port0head;
288 /* count of port 0 hdrqfull errors */
289 u32 ipath_p0_hdrqfull;
290
291 /*
292 * (*cfgports) used to suppress multiple instances of same
293 * port staying stuck at same point
294 */
295 u32 *ipath_lastrcvhdrqtails;
296 /*
297 * (*cfgports) used to suppress multiple instances of same
298 * port staying stuck at same point
299 */
300 u32 *ipath_lastegrheads;
301 /*
302 * index of last piobuffer we used. Speeds up searching, by
303 * starting at this point. Doesn't matter if multiple cpu's use and
304 * update, last updater is only write that matters. Whenever it
305 * wraps, we update shadow copies. Need a copy per device when we
306 * get to multiple devices
307 */
308 u32 ipath_lastpioindex;
309 /* max length of freezemsg */
310 u32 ipath_freezelen;
311 /*
312 * consecutive times we wanted a PIO buffer but were unable to
313 * get one
314 */
315 u32 ipath_consec_nopiobuf;
316 /*
317 * hint that we should update ipath_pioavailshadow before
318 * looking for a PIO buffer
319 */
320 u32 ipath_upd_pio_shadow;
321 /* so we can rewrite it after a chip reset */
322 u32 ipath_pcibar0;
323 /* so we can rewrite it after a chip reset */
324 u32 ipath_pcibar1;
325 /* sequential tries for SMA send and no bufs */
326 u32 ipath_nosma_bufs;
327 /* duration (seconds) ipath_nosma_bufs set */
328 u32 ipath_nosma_secs;
329
330 /* HT/PCI Vendor ID (here for NodeInfo) */
331 u16 ipath_vendorid;
332 /* HT/PCI Device ID (here for NodeInfo) */
333 u16 ipath_deviceid;
334 /* offset in HT config space of slave/primary interface block */
335 u8 ipath_ht_slave_off;
336 /* for write combining settings */
337 unsigned long ipath_wc_cookie;
338 /* ref count for each pkey */
339 atomic_t ipath_pkeyrefs[4];
340 /* shadow copy of all exptids physaddr; used only by funcsim */
341 u64 *ipath_tidsimshadow;
342 /* shadow copy of struct page *'s for exp tid pages */
343 struct page **ipath_pageshadow;
344 /* lock to workaround chip bug 9437 */
345 spinlock_t ipath_tid_lock;
346
347 /*
348 * IPATH_STATUS_*,
349 * this address is mapped readonly into user processes so they can
350 * get status cheaply, whenever they want.
351 */
352 u64 *ipath_statusp;
353 /* freeze msg if hw error put chip in freeze */
354 char *ipath_freezemsg;
355 /* pci access data structure */
356 struct pci_dev *pcidev;
357 struct cdev *cdev;
358 struct class_device *class_dev;
359 /* timer used to prevent stats overflow, error throttling, etc. */
360 struct timer_list ipath_stats_timer;
361 /* check for stale messages in rcv queue */
362 /* only allow one intr at a time. */
363 unsigned long ipath_rcv_pending;
364
365 /*
366 * Shadow copies of registers; size indicates read access size.
367 * Most of them are readonly, but some are write-only register,
368 * where we manipulate the bits in the shadow copy, and then write
369 * the shadow copy to infinipath.
370 *
371 * We deliberately make most of these 32 bits, since they have
372 * restricted range. For any that we read, we won't to generate 32
373 * bit accesses, since Opteron will generate 2 separate 32 bit HT
374 * transactions for a 64 bit read, and we want to avoid unnecessary
375 * HT transactions.
376 */
377
378 /* This is the 64 bit group */
379
380 /*
381 * shadow of pioavail, check to be sure it's large enough at
382 * init time.
383 */
384 unsigned long ipath_pioavailshadow[8];
385 /* shadow of kr_gpio_out, for rmw ops */
386 u64 ipath_gpio_out;
387 /* kr_revision shadow */
388 u64 ipath_revision;
389 /*
390 * shadow of ibcctrl, for interrupt handling of link changes,
391 * etc.
392 */
393 u64 ipath_ibcctrl;
394 /*
395 * last ibcstatus, to suppress "duplicate" status change messages,
396 * mostly from 2 to 3
397 */
398 u64 ipath_lastibcstat;
399 /* hwerrmask shadow */
400 ipath_err_t ipath_hwerrmask;
401 /* interrupt config reg shadow */
402 u64 ipath_intconfig;
403 /* kr_sendpiobufbase value */
404 u64 ipath_piobufbase;
405
406 /* these are the "32 bit" regs */
407
408 /*
409 * number of GUIDs in the flash for this interface; may need some
410 * rethinking for setting on other ifaces
411 */
412 u32 ipath_nguid;
413 /*
414 * the following two are 32-bit bitmasks, but {test,clear,set}_bit
415 * all expect bit fields to be "unsigned long"
416 */
417 /* shadow kr_rcvctrl */
418 unsigned long ipath_rcvctrl;
419 /* shadow kr_sendctrl */
420 unsigned long ipath_sendctrl;
421
422 /* value we put in kr_rcvhdrcnt */
423 u32 ipath_rcvhdrcnt;
424 /* value we put in kr_rcvhdrsize */
425 u32 ipath_rcvhdrsize;
426 /* value we put in kr_rcvhdrentsize */
427 u32 ipath_rcvhdrentsize;
428 /* offset of last entry in rcvhdrq */
429 u32 ipath_hdrqlast;
430 /* kr_portcnt value */
431 u32 ipath_portcnt;
432 /* kr_pagealign value */
433 u32 ipath_palign;
434 /* number of "2KB" PIO buffers */
435 u32 ipath_piobcnt2k;
436 /* size in bytes of "2KB" PIO buffers */
437 u32 ipath_piosize2k;
438 /* number of "4KB" PIO buffers */
439 u32 ipath_piobcnt4k;
440 /* size in bytes of "4KB" PIO buffers */
441 u32 ipath_piosize4k;
442 /* kr_rcvegrbase value */
443 u32 ipath_rcvegrbase;
444 /* kr_rcvegrcnt value */
445 u32 ipath_rcvegrcnt;
446 /* kr_rcvtidbase value */
447 u32 ipath_rcvtidbase;
448 /* kr_rcvtidcnt value */
449 u32 ipath_rcvtidcnt;
450 /* kr_sendregbase */
451 u32 ipath_sregbase;
452 /* kr_userregbase */
453 u32 ipath_uregbase;
454 /* kr_counterregbase */
455 u32 ipath_cregbase;
456 /* shadow the control register contents */
457 u32 ipath_control;
458 /* shadow the gpio output contents */
459 u32 ipath_extctrl;
460 /* PCI revision register (HTC rev on FPGA) */
461 u32 ipath_pcirev;
462
463 /* chip address space used by 4k pio buffers */
464 u32 ipath_4kalign;
465 /* The MTU programmed for this unit */
466 u32 ipath_ibmtu;
467 /*
468 * The max size IB packet, included IB headers that we can send.
469 * Starts same as ipath_piosize, but is affected when ibmtu is
470 * changed, or by size of eager buffers
471 */
472 u32 ipath_ibmaxlen;
473 /*
474 * ibmaxlen at init time, limited by chip and by receive buffer
475 * size. Not changed after init.
476 */
477 u32 ipath_init_ibmaxlen;
478 /* size of each rcvegrbuffer */
479 u32 ipath_rcvegrbufsize;
480 /* width (2,4,8,16,32) from HT config reg */
481 u32 ipath_htwidth;
482 /* HT speed (200,400,800,1000) from HT config */
483 u32 ipath_htspeed;
484 /* ports waiting for PIOavail intr */
485 unsigned long ipath_portpiowait;
486 /*
487 * number of sequential ibcstatus change for polling active/quiet
488 * (i.e., link not coming up).
489 */
490 u32 ipath_ibpollcnt;
491 /* low and high portions of MSI capability/vector */
492 u32 ipath_msi_lo;
493 /* saved after PCIe init for restore after reset */
494 u32 ipath_msi_hi;
495 /* MSI data (vector) saved for restore */
496 u16 ipath_msi_data;
497 /* MLID programmed for this instance */
498 u16 ipath_mlid;
499 /* LID programmed for this instance */
500 u16 ipath_lid;
501 /* list of pkeys programmed; 0 if not set */
502 u16 ipath_pkeys[4];
503 /* ASCII serial number, from flash */
504 u8 ipath_serial[12];
505 /* human readable board version */
506 u8 ipath_boardversion[80];
507 /* chip major rev, from ipath_revision */
508 u8 ipath_majrev;
509 /* chip minor rev, from ipath_revision */
510 u8 ipath_minrev;
511 /* board rev, from ipath_revision */
512 u8 ipath_boardrev;
513 /* unit # of this chip, if present */
514 int ipath_unit;
515 /* saved for restore after reset */
516 u8 ipath_pci_cacheline;
517 /* LID mask control */
518 u8 ipath_lmc;
519};
520
521extern volatile __le64 *ipath_port0_rcvhdrtail;
522extern dma_addr_t ipath_port0_rcvhdrtail_dma;
523
524#define IPATH_PORT0_RCVHDRTAIL_SIZE PAGE_SIZE
525
526extern struct list_head ipath_dev_list;
527extern spinlock_t ipath_devs_lock;
528extern struct ipath_devdata *ipath_lookup(int unit);
529
530extern u16 ipath_layer_rcv_opcode;
531extern int __ipath_layer_intr(struct ipath_devdata *, u32);
532extern int ipath_layer_intr(struct ipath_devdata *, u32);
533extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
534 struct sk_buff *);
535extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
536extern int __ipath_verbs_piobufavail(struct ipath_devdata *);
537extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32);
538
539void ipath_layer_add(struct ipath_devdata *);
540void ipath_layer_del(struct ipath_devdata *);
541
542int ipath_init_chip(struct ipath_devdata *, int);
543int ipath_enable_wc(struct ipath_devdata *dd);
544void ipath_disable_wc(struct ipath_devdata *dd);
545int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
546void ipath_shutdown_device(struct ipath_devdata *);
547
548struct file_operations;
549int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
550 struct cdev **cdevp, struct class_device **class_devp);
551void ipath_cdev_cleanup(struct cdev **cdevp,
552 struct class_device **class_devp);
553
554int ipath_diag_init(void);
555void ipath_diag_cleanup(void);
556void ipath_diag_bringup_link(struct ipath_devdata *);
557
558extern wait_queue_head_t ipath_sma_state_wait;
559
560int ipath_user_add(struct ipath_devdata *dd);
561void ipath_user_del(struct ipath_devdata *dd);
562
563struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
564
565extern int ipath_diag_inuse;
566
567irqreturn_t ipath_intr(int irq, void *devid, struct pt_regs *regs);
568void ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
569#if __IPATH_INFO || __IPATH_DBG
570extern const char *ipath_ibcstatus_str[];
571#endif
572
573/* clean up any per-chip chip-specific stuff */
574void ipath_chip_cleanup(struct ipath_devdata *);
575/* clean up any chip type-specific stuff */
576void ipath_chip_done(void);
577
578/* check to see if we have to force ordering for write combining */
579int ipath_unordered_wc(void);
580
581void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
582 unsigned cnt);
583
584int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
585void ipath_free_pddata(struct ipath_devdata *, u32, int);
586
587int ipath_parse_ushort(const char *str, unsigned short *valp);
588
589int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
590void ipath_set_ib_lstate(struct ipath_devdata *, int);
591void ipath_kreceive(struct ipath_devdata *);
592int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
593int ipath_reset_device(int);
594void ipath_get_faststats(unsigned long);
595
596/* for use in system calls, where we want to know device type, etc. */
597#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
598
599/*
600 * values for ipath_flags
601 */
602/* The chip is up and initted */
603#define IPATH_INITTED 0x2
604 /* set if any user code has set kr_rcvhdrsize */
605#define IPATH_RCVHDRSZ_SET 0x4
606 /* The chip is present and valid for accesses */
607#define IPATH_PRESENT 0x8
608 /* HT link0 is only 8 bits wide, ignore upper byte crc
609 * errors, etc. */
610#define IPATH_8BIT_IN_HT0 0x10
611 /* HT link1 is only 8 bits wide, ignore upper byte crc
612 * errors, etc. */
613#define IPATH_8BIT_IN_HT1 0x20
614 /* The link is down */
615#define IPATH_LINKDOWN 0x40
616 /* The link level is up (0x11) */
617#define IPATH_LINKINIT 0x80
618 /* The link is in the armed (0x21) state */
619#define IPATH_LINKARMED 0x100
620 /* The link is in the active (0x31) state */
621#define IPATH_LINKACTIVE 0x200
622 /* link current state is unknown */
623#define IPATH_LINKUNK 0x400
624 /* no IB cable, or no device on IB cable */
625#define IPATH_NOCABLE 0x4000
626 /* Supports port zero per packet receive interrupts via
627 * GPIO */
628#define IPATH_GPIO_INTR 0x8000
629 /* uses the coded 4byte TID, not 8 byte */
630#define IPATH_4BYTE_TID 0x10000
631 /* packet/word counters are 32 bit, else those 4 counters
632 * are 64bit */
633#define IPATH_32BITCOUNTERS 0x20000
634 /* can miss port0 rx interrupts */
635#define IPATH_POLL_RX_INTR 0x40000
636#define IPATH_DISABLED 0x80000 /* administratively disabled */
637
638/* portdata flag bit offsets */
639 /* waiting for a packet to arrive */
640#define IPATH_PORT_WAITING_RCV 2
641 /* waiting for a PIO buffer to be available */
642#define IPATH_PORT_WAITING_PIO 3
643
644/* free up any allocated data at closes */
645void ipath_free_data(struct ipath_portdata *dd);
646int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
647int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
648u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
649/* init PE-800-specific func */
650void ipath_init_pe800_funcs(struct ipath_devdata *);
651/* init HT-400-specific func */
652void ipath_init_ht400_funcs(struct ipath_devdata *);
653void ipath_get_guid(struct ipath_devdata *);
654u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
655
656/*
657 * number of words used for protocol header if not set by ipath_userinit();
658 */
659#define IPATH_DFLT_RCVHDRSIZE 9
660
661#define IPATH_MDIO_CMD_WRITE 1
662#define IPATH_MDIO_CMD_READ 2
663#define IPATH_MDIO_CLD_DIV 25 /* to get 2.5 Mhz mdio clock */
664#define IPATH_MDIO_CMDVALID 0x40000000 /* bit 30 */
665#define IPATH_MDIO_DATAVALID 0x80000000 /* bit 31 */
666#define IPATH_MDIO_CTRL_STD 0x0
667
668static inline u64 ipath_mdio_req(int cmd, int dev, int reg, int data)
669{
670 return (((u64) IPATH_MDIO_CLD_DIV) << 32) |
671 (cmd << 26) |
672 (dev << 21) |
673 (reg << 16) |
674 (data & 0xFFFF);
675}
676
677 /* signal and fifo status, in bank 31 */
678#define IPATH_MDIO_CTRL_XGXS_REG_8 0x8
679 /* controls loopback, redundancy */
680#define IPATH_MDIO_CTRL_8355_REG_1 0x10
681 /* premph, encdec, etc. */
682#define IPATH_MDIO_CTRL_8355_REG_2 0x11
683 /* Kchars, etc. */
684#define IPATH_MDIO_CTRL_8355_REG_6 0x15
685#define IPATH_MDIO_CTRL_8355_REG_9 0x18
686#define IPATH_MDIO_CTRL_8355_REG_10 0x1D
687
688int ipath_get_user_pages(unsigned long, size_t, struct page **);
689int ipath_get_user_pages_nocopy(unsigned long, struct page **);
690void ipath_release_user_pages(struct page **, size_t);
691void ipath_release_user_pages_on_close(struct page **, size_t);
692int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
693int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
694
695/* these are used for the registers that vary with port */
696void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
697 unsigned, u64);
698u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
699 unsigned);
700
701/*
702 * We could have a single register get/put routine, that takes a group type,
703 * but this is somewhat clearer and cleaner. It also gives us some error
704 * checking. 64 bit register reads should always work, but are inefficient
705 * on opteron (the northbridge always generates 2 separate HT 32 bit reads),
706 * so we use kreg32 wherever possible. User register and counter register
707 * reads are always 32 bit reads, so only one form of those routines.
708 */
709
710/*
711 * At the moment, none of the s-registers are writable, so no
712 * ipath_write_sreg(), and none of the c-registers are writable, so no
713 * ipath_write_creg().
714 */
715
716/**
717 * ipath_read_ureg32 - read 32-bit virtualized per-port register
718 * @dd: device
719 * @regno: register number
720 * @port: port number
721 *
722 * Return the contents of a register that is virtualized to be per port.
723 * Prints a debug message and returns -1 on errors (not distinguishable from
724 * valid contents at runtime; we may add a separate error variable at some
725 * point).
726 *
727 * This is normally not used by the kernel, but may be for debugging, and
728 * has a different implementation than user mode, which is why it's not in
729 * _common.h.
730 */
731static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
732 ipath_ureg regno, int port)
733{
734 if (!dd->ipath_kregbase)
735 return 0;
736
737 return readl(regno + (u64 __iomem *)
738 (dd->ipath_uregbase +
739 (char __iomem *)dd->ipath_kregbase +
740 dd->ipath_palign * port));
741}
742
743/**
744 * ipath_write_ureg - write 32-bit virtualized per-port register
745 * @dd: device
746 * @regno: register number
747 * @value: value
748 * @port: port
749 *
750 * Write the contents of a register that is virtualized to be per port.
751 */
752static inline void ipath_write_ureg(const struct ipath_devdata *dd,
753 ipath_ureg regno, u64 value, int port)
754{
755 u64 __iomem *ubase = (u64 __iomem *)
756 (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
757 dd->ipath_palign * port);
758 if (dd->ipath_kregbase)
759 writeq(value, &ubase[regno]);
760}
761
762static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
763 ipath_kreg regno)
764{
765 if (!dd->ipath_kregbase)
766 return -1;
767 return readl((u32 __iomem *) & dd->ipath_kregbase[regno]);
768}
769
770static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd,
771 ipath_kreg regno)
772{
773 if (!dd->ipath_kregbase)
774 return -1;
775
776 return readq(&dd->ipath_kregbase[regno]);
777}
778
779static inline void ipath_write_kreg(const struct ipath_devdata *dd,
780 ipath_kreg regno, u64 value)
781{
782 if (dd->ipath_kregbase)
783 writeq(value, &dd->ipath_kregbase[regno]);
784}
785
786static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
787 ipath_sreg regno)
788{
789 if (!dd->ipath_kregbase)
790 return 0;
791
792 return readq(regno + (u64 __iomem *)
793 (dd->ipath_cregbase +
794 (char __iomem *)dd->ipath_kregbase));
795}
796
797static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
798 ipath_sreg regno)
799{
800 if (!dd->ipath_kregbase)
801 return 0;
802 return readl(regno + (u64 __iomem *)
803 (dd->ipath_cregbase +
804 (char __iomem *)dd->ipath_kregbase));
805}
806
807/*
808 * sysfs interface.
809 */
810
811struct device_driver;
812
813extern const char ipath_core_version[];
814
815int ipath_driver_create_group(struct device_driver *);
816void ipath_driver_remove_group(struct device_driver *);
817
818int ipath_device_create_group(struct device *, struct ipath_devdata *);
819void ipath_device_remove_group(struct device *, struct ipath_devdata *);
820int ipath_expose_reset(struct device *);
821
822int ipath_init_ipathfs(void);
823void ipath_exit_ipathfs(void);
824int ipathfs_add_device(struct ipath_devdata *);
825int ipathfs_remove_device(struct ipath_devdata *);
826
827/*
828 * Flush write combining store buffers (if present) and perform a write
829 * barrier.
830 */
831#if defined(CONFIG_X86_64)
832#define ipath_flush_wc() asm volatile("sfence" ::: "memory")
833#else
834#define ipath_flush_wc() wmb()
835#endif
836
837extern unsigned ipath_debug; /* debugging bit mask */
838
839const char *ipath_get_unit_name(int unit);
840
841extern struct mutex ipath_mutex;
842
843#define IPATH_DRV_NAME "ipath_core"
844#define IPATH_MAJOR 233
845#define IPATH_SMA_MINOR 128
846#define IPATH_DIAG_MINOR 129
847#define IPATH_NMINORS 130
848
849#define ipath_dev_err(dd,fmt,...) \
850 do { \
851 const struct ipath_devdata *__dd = (dd); \
852 if (__dd->pcidev) \
853 dev_err(&__dd->pcidev->dev, "%s: " fmt, \
854 ipath_get_unit_name(__dd->ipath_unit), \
855 ##__VA_ARGS__); \
856 else \
857 printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \
858 ipath_get_unit_name(__dd->ipath_unit), \
859 ##__VA_ARGS__); \
860 } while (0)
861
862#if _IPATH_DEBUGGING
863
864# define __IPATH_DBG_WHICH(which,fmt,...) \
865 do { \
866 if(unlikely(ipath_debug&(which))) \
867 printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
868 __func__,##__VA_ARGS__); \
869 } while(0)
870
871# define ipath_dbg(fmt,...) \
872 __IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__)
873# define ipath_cdbg(which,fmt,...) \
874 __IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__)
875
876#else /* ! _IPATH_DEBUGGING */
877
878# define ipath_dbg(fmt,...)
879# define ipath_cdbg(which,fmt,...)
880
881#endif /* _IPATH_DEBUGGING */
882
883#endif /* _IPATH_KERNEL_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
new file mode 100644
index 000000000000..aa33b0e9f2f6
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -0,0 +1,236 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <asm/io.h>
34
35#include "ipath_verbs.h"
36
37/**
38 * ipath_alloc_lkey - allocate an lkey
39 * @rkt: lkey table in which to allocate the lkey
40 * @mr: memory region that this lkey protects
41 *
42 * Returns 1 if successful, otherwise returns 0.
43 */
44
45int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
46{
47 unsigned long flags;
48 u32 r;
49 u32 n;
50 int ret;
51
52 spin_lock_irqsave(&rkt->lock, flags);
53
54 /* Find the next available LKEY */
55 r = n = rkt->next;
56 for (;;) {
57 if (rkt->table[r] == NULL)
58 break;
59 r = (r + 1) & (rkt->max - 1);
60 if (r == n) {
61 spin_unlock_irqrestore(&rkt->lock, flags);
62 _VERBS_INFO("LKEY table full\n");
63 ret = 0;
64 goto bail;
65 }
66 }
67 rkt->next = (r + 1) & (rkt->max - 1);
68 /*
69 * Make sure lkey is never zero which is reserved to indicate an
70 * unrestricted LKEY.
71 */
72 rkt->gen++;
73 mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) |
74 ((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen)
75 << 8);
76 if (mr->lkey == 0) {
77 mr->lkey |= 1 << 8;
78 rkt->gen++;
79 }
80 rkt->table[r] = mr;
81 spin_unlock_irqrestore(&rkt->lock, flags);
82
83 ret = 1;
84
85bail:
86 return ret;
87}
88
89/**
90 * ipath_free_lkey - free an lkey
91 * @rkt: table from which to free the lkey
92 * @lkey: lkey id to free
93 */
94void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
95{
96 unsigned long flags;
97 u32 r;
98
99 if (lkey == 0)
100 return;
101 r = lkey >> (32 - ib_ipath_lkey_table_size);
102 spin_lock_irqsave(&rkt->lock, flags);
103 rkt->table[r] = NULL;
104 spin_unlock_irqrestore(&rkt->lock, flags);
105}
106
107/**
108 * ipath_lkey_ok - check IB SGE for validity and initialize
109 * @rkt: table containing lkey to check SGE against
110 * @isge: outgoing internal SGE
111 * @sge: SGE to check
112 * @acc: access flags
113 *
114 * Return 1 if valid and successful, otherwise returns 0.
115 *
116 * Check the IB SGE for validity and initialize our internal version
117 * of it.
118 */
119int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
120 struct ib_sge *sge, int acc)
121{
122 struct ipath_mregion *mr;
123 size_t off;
124 int ret;
125
126 /*
127 * We use LKEY == zero to mean a physical kmalloc() address.
128 * This is a bit of a hack since we rely on dma_map_single()
129 * being reversible by calling bus_to_virt().
130 */
131 if (sge->lkey == 0) {
132 isge->mr = NULL;
133 isge->vaddr = bus_to_virt(sge->addr);
134 isge->length = sge->length;
135 isge->sge_length = sge->length;
136 ret = 1;
137 goto bail;
138 }
139 spin_lock(&rkt->lock);
140 mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
141 spin_unlock(&rkt->lock);
142 if (unlikely(mr == NULL || mr->lkey != sge->lkey)) {
143 ret = 0;
144 goto bail;
145 }
146
147 off = sge->addr - mr->user_base;
148 if (unlikely(sge->addr < mr->user_base ||
149 off + sge->length > mr->length ||
150 (mr->access_flags & acc) != acc)) {
151 ret = 0;
152 goto bail;
153 }
154
155 off += mr->offset;
156 isge->mr = mr;
157 isge->m = 0;
158 isge->n = 0;
159 while (off >= mr->map[isge->m]->segs[isge->n].length) {
160 off -= mr->map[isge->m]->segs[isge->n].length;
161 isge->n++;
162 if (isge->n >= IPATH_SEGSZ) {
163 isge->m++;
164 isge->n = 0;
165 }
166 }
167 isge->vaddr = mr->map[isge->m]->segs[isge->n].vaddr + off;
168 isge->length = mr->map[isge->m]->segs[isge->n].length - off;
169 isge->sge_length = sge->length;
170
171 ret = 1;
172
173bail:
174 return ret;
175}
176
177/**
178 * ipath_rkey_ok - check the IB virtual address, length, and RKEY
179 * @dev: infiniband device
180 * @ss: SGE state
181 * @len: length of data
182 * @vaddr: virtual address to place data
183 * @rkey: rkey to check
184 * @acc: access flags
185 *
186 * Return 1 if successful, otherwise 0.
187 *
188 * The QP r_rq.lock should be held.
189 */
190int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
191 u32 len, u64 vaddr, u32 rkey, int acc)
192{
193 struct ipath_lkey_table *rkt = &dev->lk_table;
194 struct ipath_sge *sge = &ss->sge;
195 struct ipath_mregion *mr;
196 size_t off;
197 int ret;
198
199 spin_lock(&rkt->lock);
200 mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
201 spin_unlock(&rkt->lock);
202 if (unlikely(mr == NULL || mr->lkey != rkey)) {
203 ret = 0;
204 goto bail;
205 }
206
207 off = vaddr - mr->iova;
208 if (unlikely(vaddr < mr->iova || off + len > mr->length ||
209 (mr->access_flags & acc) == 0)) {
210 ret = 0;
211 goto bail;
212 }
213
214 off += mr->offset;
215 sge->mr = mr;
216 sge->m = 0;
217 sge->n = 0;
218 while (off >= mr->map[sge->m]->segs[sge->n].length) {
219 off -= mr->map[sge->m]->segs[sge->n].length;
220 sge->n++;
221 if (sge->n >= IPATH_SEGSZ) {
222 sge->m++;
223 sge->n = 0;
224 }
225 }
226 sge->vaddr = mr->map[sge->m]->segs[sge->n].vaddr + off;
227 sge->length = mr->map[sge->m]->segs[sge->n].length - off;
228 sge->sge_length = len;
229 ss->sg_list = NULL;
230 ss->num_sge = 1;
231
232 ret = 1;
233
234bail:
235 return ret;
236}
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
new file mode 100644
index 000000000000..69ed1100701a
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -0,0 +1,1515 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33/*
34 * These are the routines used by layered drivers, currently just the
35 * layered ethernet driver and verbs layer.
36 */
37
38#include <linux/io.h>
39#include <linux/pci.h>
40#include <asm/byteorder.h>
41
42#include "ipath_kernel.h"
43#include "ips_common.h"
44#include "ipath_layer.h"
45
46/* Acquire before ipath_devs_lock. */
47static DEFINE_MUTEX(ipath_layer_mutex);
48
49u16 ipath_layer_rcv_opcode;
50static int (*layer_intr)(void *, u32);
51static int (*layer_rcv)(void *, void *, struct sk_buff *);
52static int (*layer_rcv_lid)(void *, void *);
53static int (*verbs_piobufavail)(void *);
54static void (*verbs_rcv)(void *, void *, void *, u32);
55static int ipath_verbs_registered;
56
57static void *(*layer_add_one)(int, struct ipath_devdata *);
58static void (*layer_remove_one)(void *);
59static void *(*verbs_add_one)(int, struct ipath_devdata *);
60static void (*verbs_remove_one)(void *);
61static void (*verbs_timer_cb)(void *);
62
63int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
64{
65 int ret = -ENODEV;
66
67 if (dd->ipath_layer.l_arg && layer_intr)
68 ret = layer_intr(dd->ipath_layer.l_arg, arg);
69
70 return ret;
71}
72
73int ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
74{
75 int ret;
76
77 mutex_lock(&ipath_layer_mutex);
78
79 ret = __ipath_layer_intr(dd, arg);
80
81 mutex_unlock(&ipath_layer_mutex);
82
83 return ret;
84}
85
86int __ipath_layer_rcv(struct ipath_devdata *dd, void *hdr,
87 struct sk_buff *skb)
88{
89 int ret = -ENODEV;
90
91 if (dd->ipath_layer.l_arg && layer_rcv)
92 ret = layer_rcv(dd->ipath_layer.l_arg, hdr, skb);
93
94 return ret;
95}
96
97int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr)
98{
99 int ret = -ENODEV;
100
101 if (dd->ipath_layer.l_arg && layer_rcv_lid)
102 ret = layer_rcv_lid(dd->ipath_layer.l_arg, hdr);
103
104 return ret;
105}
106
107int __ipath_verbs_piobufavail(struct ipath_devdata *dd)
108{
109 int ret = -ENODEV;
110
111 if (dd->verbs_layer.l_arg && verbs_piobufavail)
112 ret = verbs_piobufavail(dd->verbs_layer.l_arg);
113
114 return ret;
115}
116
117int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf,
118 u32 tlen)
119{
120 int ret = -ENODEV;
121
122 if (dd->verbs_layer.l_arg && verbs_rcv) {
123 verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen);
124 ret = 0;
125 }
126
127 return ret;
128}
129
130int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate)
131{
132 u32 lstate;
133 int ret;
134
135 switch (newstate) {
136 case IPATH_IB_LINKDOWN:
137 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
138 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
139 /* don't wait */
140 ret = 0;
141 goto bail;
142
143 case IPATH_IB_LINKDOWN_SLEEP:
144 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
145 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
146 /* don't wait */
147 ret = 0;
148 goto bail;
149
150 case IPATH_IB_LINKDOWN_DISABLE:
151 ipath_set_ib_lstate(dd,
152 INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
153 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
154 /* don't wait */
155 ret = 0;
156 goto bail;
157
158 case IPATH_IB_LINKINIT:
159 if (dd->ipath_flags & IPATH_LINKINIT) {
160 ret = 0;
161 goto bail;
162 }
163 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
164 INFINIPATH_IBCC_LINKCMD_SHIFT);
165 lstate = IPATH_LINKINIT;
166 break;
167
168 case IPATH_IB_LINKARM:
169 if (dd->ipath_flags & IPATH_LINKARMED) {
170 ret = 0;
171 goto bail;
172 }
173 if (!(dd->ipath_flags &
174 (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
175 ret = -EINVAL;
176 goto bail;
177 }
178 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
179 INFINIPATH_IBCC_LINKCMD_SHIFT);
180 /*
181 * Since the port can transition to ACTIVE by receiving
182 * a non VL 15 packet, wait for either state.
183 */
184 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
185 break;
186
187 case IPATH_IB_LINKACTIVE:
188 if (dd->ipath_flags & IPATH_LINKACTIVE) {
189 ret = 0;
190 goto bail;
191 }
192 if (!(dd->ipath_flags & IPATH_LINKARMED)) {
193 ret = -EINVAL;
194 goto bail;
195 }
196 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
197 INFINIPATH_IBCC_LINKCMD_SHIFT);
198 lstate = IPATH_LINKACTIVE;
199 break;
200
201 default:
202 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
203 ret = -EINVAL;
204 goto bail;
205 }
206 ret = ipath_wait_linkstate(dd, lstate, 2000);
207
208bail:
209 return ret;
210}
211
212EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate);
213
214/**
215 * ipath_layer_set_mtu - set the MTU
216 * @dd: the infinipath device
217 * @arg: the new MTU
218 *
219 * we can handle "any" incoming size, the issue here is whether we
220 * need to restrict our outgoing size. For now, we don't do any
221 * sanity checking on this, and we don't deal with what happens to
222 * programs that are already running when the size changes.
223 * NOTE: changing the MTU will usually cause the IBC to go back to
224 * link initialize (IPATH_IBSTATE_INIT) state...
225 */
226int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg)
227{
228 u32 piosize;
229 int changed = 0;
230 int ret;
231
232 /*
233 * mtu is IB data payload max. It's the largest power of 2 less
234 * than piosize (or even larger, since it only really controls the
235 * largest we can receive; we can send the max of the mtu and
236 * piosize). We check that it's one of the valid IB sizes.
237 */
238 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
239 arg != 4096) {
240 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
241 ret = -EINVAL;
242 goto bail;
243 }
244 if (dd->ipath_ibmtu == arg) {
245 ret = 0; /* same as current */
246 goto bail;
247 }
248
249 piosize = dd->ipath_ibmaxlen;
250 dd->ipath_ibmtu = arg;
251
252 if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
253 /* Only if it's not the initial value (or reset to it) */
254 if (piosize != dd->ipath_init_ibmaxlen) {
255 dd->ipath_ibmaxlen = piosize;
256 changed = 1;
257 }
258 } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
259 piosize = arg + IPATH_PIO_MAXIBHDR;
260 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
261 "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
262 arg);
263 dd->ipath_ibmaxlen = piosize;
264 changed = 1;
265 }
266
267 if (changed) {
268 /*
269 * set the IBC maxpktlength to the size of our pio
270 * buffers in words
271 */
272 u64 ibc = dd->ipath_ibcctrl;
273 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
274 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
275
276 piosize = piosize - 2 * sizeof(u32); /* ignore pbc */
277 dd->ipath_ibmaxlen = piosize;
278 piosize /= sizeof(u32); /* in words */
279 /*
280 * for ICRC, which we only send in diag test pkt mode, and
281 * we don't need to worry about that for mtu
282 */
283 piosize += 1;
284
285 ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
286 dd->ipath_ibcctrl = ibc;
287 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
288 dd->ipath_ibcctrl);
289 dd->ipath_f_tidtemplate(dd);
290 }
291
292 ret = 0;
293
294bail:
295 return ret;
296}
297
298EXPORT_SYMBOL_GPL(ipath_layer_set_mtu);
299
300int ipath_set_sps_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
301{
302 ipath_stats.sps_lid[dd->ipath_unit] = arg;
303 dd->ipath_lid = arg;
304 dd->ipath_lmc = lmc;
305
306 mutex_lock(&ipath_layer_mutex);
307
308 if (dd->ipath_layer.l_arg && layer_intr)
309 layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID);
310
311 mutex_unlock(&ipath_layer_mutex);
312
313 return 0;
314}
315
316EXPORT_SYMBOL_GPL(ipath_set_sps_lid);
317
318int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid)
319{
320 /* XXX - need to inform anyone who cares this just happened. */
321 dd->ipath_guid = guid;
322 return 0;
323}
324
325EXPORT_SYMBOL_GPL(ipath_layer_set_guid);
326
327__be64 ipath_layer_get_guid(struct ipath_devdata *dd)
328{
329 return dd->ipath_guid;
330}
331
332EXPORT_SYMBOL_GPL(ipath_layer_get_guid);
333
334u32 ipath_layer_get_nguid(struct ipath_devdata *dd)
335{
336 return dd->ipath_nguid;
337}
338
339EXPORT_SYMBOL_GPL(ipath_layer_get_nguid);
340
341int ipath_layer_query_device(struct ipath_devdata *dd, u32 * vendor,
342 u32 * boardrev, u32 * majrev, u32 * minrev)
343{
344 *vendor = dd->ipath_vendorid;
345 *boardrev = dd->ipath_boardrev;
346 *majrev = dd->ipath_majrev;
347 *minrev = dd->ipath_minrev;
348
349 return 0;
350}
351
352EXPORT_SYMBOL_GPL(ipath_layer_query_device);
353
354u32 ipath_layer_get_flags(struct ipath_devdata *dd)
355{
356 return dd->ipath_flags;
357}
358
359EXPORT_SYMBOL_GPL(ipath_layer_get_flags);
360
361struct device *ipath_layer_get_device(struct ipath_devdata *dd)
362{
363 return &dd->pcidev->dev;
364}
365
366EXPORT_SYMBOL_GPL(ipath_layer_get_device);
367
368u16 ipath_layer_get_deviceid(struct ipath_devdata *dd)
369{
370 return dd->ipath_deviceid;
371}
372
373EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid);
374
375u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd)
376{
377 return dd->ipath_lastibcstat;
378}
379
380EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat);
381
382u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd)
383{
384 return dd->ipath_ibmtu;
385}
386
387EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu);
388
389void ipath_layer_add(struct ipath_devdata *dd)
390{
391 mutex_lock(&ipath_layer_mutex);
392
393 if (layer_add_one)
394 dd->ipath_layer.l_arg =
395 layer_add_one(dd->ipath_unit, dd);
396
397 if (verbs_add_one)
398 dd->verbs_layer.l_arg =
399 verbs_add_one(dd->ipath_unit, dd);
400
401 mutex_unlock(&ipath_layer_mutex);
402}
403
404void ipath_layer_del(struct ipath_devdata *dd)
405{
406 mutex_lock(&ipath_layer_mutex);
407
408 if (dd->ipath_layer.l_arg && layer_remove_one) {
409 layer_remove_one(dd->ipath_layer.l_arg);
410 dd->ipath_layer.l_arg = NULL;
411 }
412
413 if (dd->verbs_layer.l_arg && verbs_remove_one) {
414 verbs_remove_one(dd->verbs_layer.l_arg);
415 dd->verbs_layer.l_arg = NULL;
416 }
417
418 mutex_unlock(&ipath_layer_mutex);
419}
420
421int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
422 void (*l_remove)(void *),
423 int (*l_intr)(void *, u32),
424 int (*l_rcv)(void *, void *, struct sk_buff *),
425 u16 l_rcv_opcode,
426 int (*l_rcv_lid)(void *, void *))
427{
428 struct ipath_devdata *dd, *tmp;
429 unsigned long flags;
430
431 mutex_lock(&ipath_layer_mutex);
432
433 layer_add_one = l_add;
434 layer_remove_one = l_remove;
435 layer_intr = l_intr;
436 layer_rcv = l_rcv;
437 layer_rcv_lid = l_rcv_lid;
438 ipath_layer_rcv_opcode = l_rcv_opcode;
439
440 spin_lock_irqsave(&ipath_devs_lock, flags);
441
442 list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
443 if (!(dd->ipath_flags & IPATH_INITTED))
444 continue;
445
446 if (dd->ipath_layer.l_arg)
447 continue;
448
449 if (!(*dd->ipath_statusp & IPATH_STATUS_SMA))
450 *dd->ipath_statusp |= IPATH_STATUS_OIB_SMA;
451
452 spin_unlock_irqrestore(&ipath_devs_lock, flags);
453 dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd);
454 spin_lock_irqsave(&ipath_devs_lock, flags);
455 }
456
457 spin_unlock_irqrestore(&ipath_devs_lock, flags);
458 mutex_unlock(&ipath_layer_mutex);
459
460 return 0;
461}
462
463EXPORT_SYMBOL_GPL(ipath_layer_register);
464
465void ipath_layer_unregister(void)
466{
467 struct ipath_devdata *dd, *tmp;
468 unsigned long flags;
469
470 mutex_lock(&ipath_layer_mutex);
471 spin_lock_irqsave(&ipath_devs_lock, flags);
472
473 list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
474 if (dd->ipath_layer.l_arg && layer_remove_one) {
475 spin_unlock_irqrestore(&ipath_devs_lock, flags);
476 layer_remove_one(dd->ipath_layer.l_arg);
477 spin_lock_irqsave(&ipath_devs_lock, flags);
478 dd->ipath_layer.l_arg = NULL;
479 }
480 }
481
482 spin_unlock_irqrestore(&ipath_devs_lock, flags);
483
484 layer_add_one = NULL;
485 layer_remove_one = NULL;
486 layer_intr = NULL;
487 layer_rcv = NULL;
488 layer_rcv_lid = NULL;
489
490 mutex_unlock(&ipath_layer_mutex);
491}
492
493EXPORT_SYMBOL_GPL(ipath_layer_unregister);
494
495static void __ipath_verbs_timer(unsigned long arg)
496{
497 struct ipath_devdata *dd = (struct ipath_devdata *) arg;
498
499 /*
500 * If port 0 receive packet interrupts are not available, or
501 * can be missed, poll the receive queue
502 */
503 if (dd->ipath_flags & IPATH_POLL_RX_INTR)
504 ipath_kreceive(dd);
505
506 /* Handle verbs layer timeouts. */
507 if (dd->verbs_layer.l_arg && verbs_timer_cb)
508 verbs_timer_cb(dd->verbs_layer.l_arg);
509
510 mod_timer(&dd->verbs_layer.l_timer, jiffies + 1);
511}
512
513/**
514 * ipath_verbs_register - verbs layer registration
515 * @l_piobufavail: callback for when PIO buffers become available
516 * @l_rcv: callback for receiving a packet
517 * @l_timer_cb: timer callback
518 * @ipath_devdata: device data structure is put here
519 */
520int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
521 void (*l_remove)(void *arg),
522 int (*l_piobufavail) (void *arg),
523 void (*l_rcv) (void *arg, void *rhdr,
524 void *data, u32 tlen),
525 void (*l_timer_cb) (void *arg))
526{
527 struct ipath_devdata *dd, *tmp;
528 unsigned long flags;
529
530 mutex_lock(&ipath_layer_mutex);
531
532 verbs_add_one = l_add;
533 verbs_remove_one = l_remove;
534 verbs_piobufavail = l_piobufavail;
535 verbs_rcv = l_rcv;
536 verbs_timer_cb = l_timer_cb;
537
538 spin_lock_irqsave(&ipath_devs_lock, flags);
539
540 list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
541 if (!(dd->ipath_flags & IPATH_INITTED))
542 continue;
543
544 if (dd->verbs_layer.l_arg)
545 continue;
546
547 spin_unlock_irqrestore(&ipath_devs_lock, flags);
548 dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd);
549 spin_lock_irqsave(&ipath_devs_lock, flags);
550 }
551
552 spin_unlock_irqrestore(&ipath_devs_lock, flags);
553 mutex_unlock(&ipath_layer_mutex);
554
555 ipath_verbs_registered = 1;
556
557 return 0;
558}
559
560EXPORT_SYMBOL_GPL(ipath_verbs_register);
561
562void ipath_verbs_unregister(void)
563{
564 struct ipath_devdata *dd, *tmp;
565 unsigned long flags;
566
567 mutex_lock(&ipath_layer_mutex);
568 spin_lock_irqsave(&ipath_devs_lock, flags);
569
570 list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
571 *dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
572
573 if (dd->verbs_layer.l_arg && verbs_remove_one) {
574 spin_unlock_irqrestore(&ipath_devs_lock, flags);
575 verbs_remove_one(dd->verbs_layer.l_arg);
576 spin_lock_irqsave(&ipath_devs_lock, flags);
577 dd->verbs_layer.l_arg = NULL;
578 }
579 }
580
581 spin_unlock_irqrestore(&ipath_devs_lock, flags);
582
583 verbs_add_one = NULL;
584 verbs_remove_one = NULL;
585 verbs_piobufavail = NULL;
586 verbs_rcv = NULL;
587 verbs_timer_cb = NULL;
588
589 mutex_unlock(&ipath_layer_mutex);
590}
591
592EXPORT_SYMBOL_GPL(ipath_verbs_unregister);
593
594int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
595{
596 int ret;
597 u32 intval = 0;
598
599 mutex_lock(&ipath_layer_mutex);
600
601 if (!dd->ipath_layer.l_arg) {
602 ret = -EINVAL;
603 goto bail;
604 }
605
606 ret = ipath_setrcvhdrsize(dd, NUM_OF_EXTRA_WORDS_IN_HEADER_QUEUE);
607
608 if (ret < 0)
609 goto bail;
610
611 *pktmax = dd->ipath_ibmaxlen;
612
613 if (*dd->ipath_statusp & IPATH_STATUS_IB_READY)
614 intval |= IPATH_LAYER_INT_IF_UP;
615 if (ipath_stats.sps_lid[dd->ipath_unit])
616 intval |= IPATH_LAYER_INT_LID;
617 if (ipath_stats.sps_mlid[dd->ipath_unit])
618 intval |= IPATH_LAYER_INT_BCAST;
619 /*
620 * do this on open, in case low level is already up and
621 * just layered driver was reloaded, etc.
622 */
623 if (intval)
624 layer_intr(dd->ipath_layer.l_arg, intval);
625
626 ret = 0;
627bail:
628 mutex_unlock(&ipath_layer_mutex);
629
630 return ret;
631}
632
633EXPORT_SYMBOL_GPL(ipath_layer_open);
634
635u16 ipath_layer_get_lid(struct ipath_devdata *dd)
636{
637 return dd->ipath_lid;
638}
639
640EXPORT_SYMBOL_GPL(ipath_layer_get_lid);
641
642/**
643 * ipath_layer_get_mac - get the MAC address
644 * @dd: the infinipath device
645 * @mac: the MAC is put here
646 *
647 * This is the EUID-64 OUI octets (top 3), then
648 * skip the next 2 (which should both be zero or 0xff).
649 * The returned MAC is in network order
650 * mac points to at least 6 bytes of buffer
651 * We assume that by the time the LID is set, that the GUID is as valid
652 * as it's ever going to be, rather than adding yet another status bit.
653 */
654
655int ipath_layer_get_mac(struct ipath_devdata *dd, u8 * mac)
656{
657 u8 *guid;
658
659 guid = (u8 *) &dd->ipath_guid;
660
661 mac[0] = guid[0];
662 mac[1] = guid[1];
663 mac[2] = guid[2];
664 mac[3] = guid[5];
665 mac[4] = guid[6];
666 mac[5] = guid[7];
667 if ((guid[3] || guid[4]) && !(guid[3] == 0xff && guid[4] == 0xff))
668 ipath_dbg("Warning, guid bytes 3 and 4 not 0 or 0xffff: "
669 "%x %x\n", guid[3], guid[4]);
670 return 0;
671}
672
673EXPORT_SYMBOL_GPL(ipath_layer_get_mac);
674
675u16 ipath_layer_get_bcast(struct ipath_devdata *dd)
676{
677 return dd->ipath_mlid;
678}
679
680EXPORT_SYMBOL_GPL(ipath_layer_get_bcast);
681
682u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd)
683{
684 return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
685}
686
687EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey);
688
689static void update_sge(struct ipath_sge_state *ss, u32 length)
690{
691 struct ipath_sge *sge = &ss->sge;
692
693 sge->vaddr += length;
694 sge->length -= length;
695 sge->sge_length -= length;
696 if (sge->sge_length == 0) {
697 if (--ss->num_sge)
698 *sge = *ss->sg_list++;
699 } else if (sge->length == 0 && sge->mr != NULL) {
700 if (++sge->n >= IPATH_SEGSZ) {
701 if (++sge->m >= sge->mr->mapsz)
702 return;
703 sge->n = 0;
704 }
705 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
706 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
707 }
708}
709
710#ifdef __LITTLE_ENDIAN
711static inline u32 get_upper_bits(u32 data, u32 shift)
712{
713 return data >> shift;
714}
715
716static inline u32 set_upper_bits(u32 data, u32 shift)
717{
718 return data << shift;
719}
720
721static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
722{
723 data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
724 data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
725 return data;
726}
727#else
728static inline u32 get_upper_bits(u32 data, u32 shift)
729{
730 return data << shift;
731}
732
733static inline u32 set_upper_bits(u32 data, u32 shift)
734{
735 return data >> shift;
736}
737
738static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
739{
740 data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
741 data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
742 return data;
743}
744#endif
745
746static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
747 u32 length)
748{
749 u32 extra = 0;
750 u32 data = 0;
751 u32 last;
752
753 while (1) {
754 u32 len = ss->sge.length;
755 u32 off;
756
757 BUG_ON(len == 0);
758 if (len > length)
759 len = length;
760 if (len > ss->sge.sge_length)
761 len = ss->sge.sge_length;
762 /* If the source address is not aligned, try to align it. */
763 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
764 if (off) {
765 u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
766 ~(sizeof(u32) - 1));
767 u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
768 u32 y;
769
770 y = sizeof(u32) - off;
771 if (len > y)
772 len = y;
773 if (len + extra >= sizeof(u32)) {
774 data |= set_upper_bits(v, extra *
775 BITS_PER_BYTE);
776 len = sizeof(u32) - extra;
777 if (len == length) {
778 last = data;
779 break;
780 }
781 __raw_writel(data, piobuf);
782 piobuf++;
783 extra = 0;
784 data = 0;
785 } else {
786 /* Clear unused upper bytes */
787 data |= clear_upper_bytes(v, len, extra);
788 if (len == length) {
789 last = data;
790 break;
791 }
792 extra += len;
793 }
794 } else if (extra) {
795 /* Source address is aligned. */
796 u32 *addr = (u32 *) ss->sge.vaddr;
797 int shift = extra * BITS_PER_BYTE;
798 int ushift = 32 - shift;
799 u32 l = len;
800
801 while (l >= sizeof(u32)) {
802 u32 v = *addr;
803
804 data |= set_upper_bits(v, shift);
805 __raw_writel(data, piobuf);
806 data = get_upper_bits(v, ushift);
807 piobuf++;
808 addr++;
809 l -= sizeof(u32);
810 }
811 /*
812 * We still have 'extra' number of bytes leftover.
813 */
814 if (l) {
815 u32 v = *addr;
816
817 if (l + extra >= sizeof(u32)) {
818 data |= set_upper_bits(v, shift);
819 len -= l + extra - sizeof(u32);
820 if (len == length) {
821 last = data;
822 break;
823 }
824 __raw_writel(data, piobuf);
825 piobuf++;
826 extra = 0;
827 data = 0;
828 } else {
829 /* Clear unused upper bytes */
830 data |= clear_upper_bytes(v, l,
831 extra);
832 if (len == length) {
833 last = data;
834 break;
835 }
836 extra += l;
837 }
838 } else if (len == length) {
839 last = data;
840 break;
841 }
842 } else if (len == length) {
843 u32 w;
844
845 /*
846 * Need to round up for the last dword in the
847 * packet.
848 */
849 w = (len + 3) >> 2;
850 __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
851 piobuf += w - 1;
852 last = ((u32 *) ss->sge.vaddr)[w - 1];
853 break;
854 } else {
855 u32 w = len >> 2;
856
857 __iowrite32_copy(piobuf, ss->sge.vaddr, w);
858 piobuf += w;
859
860 extra = len & (sizeof(u32) - 1);
861 if (extra) {
862 u32 v = ((u32 *) ss->sge.vaddr)[w];
863
864 /* Clear unused upper bytes */
865 data = clear_upper_bytes(v, extra, 0);
866 }
867 }
868 update_sge(ss, len);
869 length -= len;
870 }
871 /* must flush early everything before trigger word */
872 ipath_flush_wc();
873 __raw_writel(last, piobuf);
874 /* be sure trigger word is written */
875 ipath_flush_wc();
876 update_sge(ss, length);
877}
878
879/**
880 * ipath_verbs_send - send a packet from the verbs layer
881 * @dd: the infinipath device
882 * @hdrwords: the number of works in the header
883 * @hdr: the packet header
884 * @len: the length of the packet in bytes
885 * @ss: the SGE to send
886 *
887 * This is like ipath_sma_send_pkt() in that we need to be able to send
888 * packets after the chip is initialized (MADs) but also like
889 * ipath_layer_send_hdr() since its used by the verbs layer.
890 */
891int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
892 u32 *hdr, u32 len, struct ipath_sge_state *ss)
893{
894 u32 __iomem *piobuf;
895 u32 plen;
896 int ret;
897
898 /* +1 is for the qword padding of pbc */
899 plen = hdrwords + ((len + 3) >> 2) + 1;
900 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
901 ipath_dbg("packet len 0x%x too long, failing\n", plen);
902 ret = -EINVAL;
903 goto bail;
904 }
905
906 /* Get a PIO buffer to use. */
907 piobuf = ipath_getpiobuf(dd, NULL);
908 if (unlikely(piobuf == NULL)) {
909 ret = -EBUSY;
910 goto bail;
911 }
912
913 /*
914 * Write len to control qword, no flags.
915 * We have to flush after the PBC for correctness on some cpus
916 * or WC buffer can be written out of order.
917 */
918 writeq(plen, piobuf);
919 ipath_flush_wc();
920 piobuf += 2;
921 if (len == 0) {
922 /*
923 * If there is just the header portion, must flush before
924 * writing last word of header for correctness, and after
925 * the last header word (trigger word).
926 */
927 __iowrite32_copy(piobuf, hdr, hdrwords - 1);
928 ipath_flush_wc();
929 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
930 ipath_flush_wc();
931 ret = 0;
932 goto bail;
933 }
934
935 __iowrite32_copy(piobuf, hdr, hdrwords);
936 piobuf += hdrwords;
937
938 /* The common case is aligned and contained in one segment. */
939 if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
940 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
941 u32 w;
942
943 /* Need to round up for the last dword in the packet. */
944 w = (len + 3) >> 2;
945 __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
946 /* must flush early everything before trigger word */
947 ipath_flush_wc();
948 __raw_writel(((u32 *) ss->sge.vaddr)[w - 1],
949 piobuf + w - 1);
950 /* be sure trigger word is written */
951 ipath_flush_wc();
952 update_sge(ss, len);
953 ret = 0;
954 goto bail;
955 }
956 copy_io(piobuf, ss, len);
957 ret = 0;
958
959bail:
960 return ret;
961}
962
963EXPORT_SYMBOL_GPL(ipath_verbs_send);
964
965int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
966 u64 *rwords, u64 *spkts, u64 *rpkts,
967 u64 *xmit_wait)
968{
969 int ret;
970
971 if (!(dd->ipath_flags & IPATH_INITTED)) {
972 /* no hardware, freeze, etc. */
973 ipath_dbg("unit %u not usable\n", dd->ipath_unit);
974 ret = -EINVAL;
975 goto bail;
976 }
977 *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
978 *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
979 *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
980 *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
981 *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
982
983 ret = 0;
984
985bail:
986 return ret;
987}
988
989EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters);
990
991/**
992 * ipath_layer_get_counters - get various chip counters
993 * @dd: the infinipath device
994 * @cntrs: counters are placed here
995 *
996 * Return the counters needed by recv_pma_get_portcounters().
997 */
998int ipath_layer_get_counters(struct ipath_devdata *dd,
999 struct ipath_layer_counters *cntrs)
1000{
1001 int ret;
1002
1003 if (!(dd->ipath_flags & IPATH_INITTED)) {
1004 /* no hardware, freeze, etc. */
1005 ipath_dbg("unit %u not usable\n", dd->ipath_unit);
1006 ret = -EINVAL;
1007 goto bail;
1008 }
1009 cntrs->symbol_error_counter =
1010 ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
1011 cntrs->link_error_recovery_counter =
1012 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
1013 cntrs->link_downed_counter =
1014 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
1015 cntrs->port_rcv_errors =
1016 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
1017 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
1018 ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
1019 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errrcvflowctrlcnt) +
1020 ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
1021 ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
1022 ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
1023 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
1024 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
1025 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlinkcnt) +
1026 ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
1027 cntrs->port_rcv_remphys_errors =
1028 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
1029 cntrs->port_xmit_discards =
1030 ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
1031 cntrs->port_xmit_data =
1032 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
1033 cntrs->port_rcv_data =
1034 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1035 cntrs->port_xmit_packets =
1036 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1037 cntrs->port_rcv_packets =
1038 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1039
1040 ret = 0;
1041
1042bail:
1043 return ret;
1044}
1045
1046EXPORT_SYMBOL_GPL(ipath_layer_get_counters);
1047
1048int ipath_layer_want_buffer(struct ipath_devdata *dd)
1049{
1050 set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
1051 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1052 dd->ipath_sendctrl);
1053
1054 return 0;
1055}
1056
1057EXPORT_SYMBOL_GPL(ipath_layer_want_buffer);
1058
1059int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
1060{
1061 int ret = 0;
1062 u32 __iomem *piobuf;
1063 u32 plen, *uhdr;
1064 size_t count;
1065 __be16 vlsllnh;
1066
1067 if (!(dd->ipath_flags & IPATH_RCVHDRSZ_SET)) {
1068 ipath_dbg("send while not open\n");
1069 ret = -EINVAL;
1070 } else
1071 if ((dd->ipath_flags & (IPATH_LINKUNK | IPATH_LINKDOWN)) ||
1072 dd->ipath_lid == 0) {
1073 /*
1074 * lid check is for when sma hasn't yet configured
1075 */
1076 ret = -ENETDOWN;
1077 ipath_cdbg(VERBOSE, "send while not ready, "
1078 "mylid=%u, flags=0x%x\n",
1079 dd->ipath_lid, dd->ipath_flags);
1080 }
1081
1082 vlsllnh = *((__be16 *) hdr);
1083 if (vlsllnh != htons(IPS_LRH_BTH)) {
1084 ipath_dbg("Warning: lrh[0] wrong (%x, not %x); "
1085 "not sending\n", be16_to_cpu(vlsllnh),
1086 IPS_LRH_BTH);
1087 ret = -EINVAL;
1088 }
1089 if (ret)
1090 goto done;
1091
1092 /* Get a PIO buffer to use. */
1093 piobuf = ipath_getpiobuf(dd, NULL);
1094 if (piobuf == NULL) {
1095 ret = -EBUSY;
1096 goto done;
1097 }
1098
1099 plen = (sizeof(*hdr) >> 2); /* actual length */
1100 ipath_cdbg(EPKT, "0x%x+1w pio %p\n", plen, piobuf);
1101
1102 writeq(plen+1, piobuf); /* len (+1 for pad) to pbc, no flags */
1103 ipath_flush_wc();
1104 piobuf += 2;
1105 uhdr = (u32 *)hdr;
1106 count = plen-1; /* amount we can copy before trigger word */
1107 __iowrite32_copy(piobuf, uhdr, count);
1108 ipath_flush_wc();
1109 __raw_writel(uhdr[count], piobuf + count);
1110 ipath_flush_wc(); /* ensure it's sent, now */
1111
1112 ipath_stats.sps_ether_spkts++; /* ether packet sent */
1113
1114done:
1115 return ret;
1116}
1117
1118EXPORT_SYMBOL_GPL(ipath_layer_send_hdr);
1119
1120int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd)
1121{
1122 set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
1123
1124 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1125 dd->ipath_sendctrl);
1126 return 0;
1127}
1128
1129EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int);
1130
1131int ipath_layer_enable_timer(struct ipath_devdata *dd)
1132{
1133 /*
1134 * HT-400 has a design flaw where the chip and kernel idea
1135 * of the tail register don't always agree, and therefore we won't
1136 * get an interrupt on the next packet received.
1137 * If the board supports per packet receive interrupts, use it.
1138 * Otherwise, the timer function periodically checks for packets
1139 * to cover this case.
1140 * Either way, the timer is needed for verbs layer related
1141 * processing.
1142 */
1143 if (dd->ipath_flags & IPATH_GPIO_INTR) {
1144 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1145 0x2074076542310ULL);
1146 /* Enable GPIO bit 2 interrupt */
1147 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1148 (u64) (1 << 2));
1149 }
1150
1151 init_timer(&dd->verbs_layer.l_timer);
1152 dd->verbs_layer.l_timer.function = __ipath_verbs_timer;
1153 dd->verbs_layer.l_timer.data = (unsigned long)dd;
1154 dd->verbs_layer.l_timer.expires = jiffies + 1;
1155 add_timer(&dd->verbs_layer.l_timer);
1156
1157 return 0;
1158}
1159
1160EXPORT_SYMBOL_GPL(ipath_layer_enable_timer);
1161
1162int ipath_layer_disable_timer(struct ipath_devdata *dd)
1163{
1164 /* Disable GPIO bit 2 interrupt */
1165 if (dd->ipath_flags & IPATH_GPIO_INTR)
1166 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
1167
1168 del_timer_sync(&dd->verbs_layer.l_timer);
1169
1170 return 0;
1171}
1172
1173EXPORT_SYMBOL_GPL(ipath_layer_disable_timer);
1174
1175/**
1176 * ipath_layer_set_verbs_flags - set the verbs layer flags
1177 * @dd: the infinipath device
1178 * @flags: the flags to set
1179 */
1180int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
1181{
1182 struct ipath_devdata *ss;
1183 unsigned long lflags;
1184
1185 spin_lock_irqsave(&ipath_devs_lock, lflags);
1186
1187 list_for_each_entry(ss, &ipath_dev_list, ipath_list) {
1188 if (!(ss->ipath_flags & IPATH_INITTED))
1189 continue;
1190 if ((flags & IPATH_VERBS_KERNEL_SMA) &&
1191 !(*ss->ipath_statusp & IPATH_STATUS_SMA))
1192 *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA;
1193 else
1194 *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
1195 }
1196
1197 spin_unlock_irqrestore(&ipath_devs_lock, lflags);
1198
1199 return 0;
1200}
1201
1202EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags);
1203
1204/**
1205 * ipath_layer_get_npkeys - return the size of the PKEY table for port 0
1206 * @dd: the infinipath device
1207 */
1208unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd)
1209{
1210 return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1211}
1212
1213EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys);
1214
1215/**
1216 * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table
1217 * @dd: the infinipath device
1218 * @index: the PKEY index
1219 */
1220unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index)
1221{
1222 unsigned ret;
1223
1224 if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1225 ret = 0;
1226 else
1227 ret = dd->ipath_pd[0]->port_pkeys[index];
1228
1229 return ret;
1230}
1231
1232EXPORT_SYMBOL_GPL(ipath_layer_get_pkey);
1233
1234/**
1235 * ipath_layer_get_pkeys - return the PKEY table for port 0
1236 * @dd: the infinipath device
1237 * @pkeys: the pkey table is placed here
1238 */
1239int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
1240{
1241 struct ipath_portdata *pd = dd->ipath_pd[0];
1242
1243 memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
1244
1245 return 0;
1246}
1247
1248EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys);
1249
1250/**
1251 * rm_pkey - decrecment the reference count for the given PKEY
1252 * @dd: the infinipath device
1253 * @key: the PKEY index
1254 *
1255 * Return true if this was the last reference and the hardware table entry
1256 * needs to be changed.
1257 */
1258static int rm_pkey(struct ipath_devdata *dd, u16 key)
1259{
1260 int i;
1261 int ret;
1262
1263 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
1264 if (dd->ipath_pkeys[i] != key)
1265 continue;
1266 if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
1267 dd->ipath_pkeys[i] = 0;
1268 ret = 1;
1269 goto bail;
1270 }
1271 break;
1272 }
1273
1274 ret = 0;
1275
1276bail:
1277 return ret;
1278}
1279
1280/**
1281 * add_pkey - add the given PKEY to the hardware table
1282 * @dd: the infinipath device
1283 * @key: the PKEY
1284 *
1285 * Return an error code if unable to add the entry, zero if no change,
1286 * or 1 if the hardware PKEY register needs to be updated.
1287 */
1288static int add_pkey(struct ipath_devdata *dd, u16 key)
1289{
1290 int i;
1291 u16 lkey = key & 0x7FFF;
1292 int any = 0;
1293 int ret;
1294
1295 if (lkey == 0x7FFF) {
1296 ret = 0;
1297 goto bail;
1298 }
1299
1300 /* Look for an empty slot or a matching PKEY. */
1301 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
1302 if (!dd->ipath_pkeys[i]) {
1303 any++;
1304 continue;
1305 }
1306 /* If it matches exactly, try to increment the ref count */
1307 if (dd->ipath_pkeys[i] == key) {
1308 if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
1309 ret = 0;
1310 goto bail;
1311 }
1312 /* Lost the race. Look for an empty slot below. */
1313 atomic_dec(&dd->ipath_pkeyrefs[i]);
1314 any++;
1315 }
1316 /*
1317 * It makes no sense to have both the limited and unlimited
1318 * PKEY set at the same time since the unlimited one will
1319 * disable the limited one.
1320 */
1321 if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
1322 ret = -EEXIST;
1323 goto bail;
1324 }
1325 }
1326 if (!any) {
1327 ret = -EBUSY;
1328 goto bail;
1329 }
1330 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
1331 if (!dd->ipath_pkeys[i] &&
1332 atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
1333 /* for ipathstats, etc. */
1334 ipath_stats.sps_pkeys[i] = lkey;
1335 dd->ipath_pkeys[i] = key;
1336 ret = 1;
1337 goto bail;
1338 }
1339 }
1340 ret = -EBUSY;
1341
1342bail:
1343 return ret;
1344}
1345
1346/**
1347 * ipath_layer_set_pkeys - set the PKEY table for port 0
1348 * @dd: the infinipath device
1349 * @pkeys: the PKEY table
1350 */
1351int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys)
1352{
1353 struct ipath_portdata *pd;
1354 int i;
1355 int changed = 0;
1356
1357 pd = dd->ipath_pd[0];
1358
1359 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
1360 u16 key = pkeys[i];
1361 u16 okey = pd->port_pkeys[i];
1362
1363 if (key == okey)
1364 continue;
1365 /*
1366 * The value of this PKEY table entry is changing.
1367 * Remove the old entry in the hardware's array of PKEYs.
1368 */
1369 if (okey & 0x7FFF)
1370 changed |= rm_pkey(dd, okey);
1371 if (key & 0x7FFF) {
1372 int ret = add_pkey(dd, key);
1373
1374 if (ret < 0)
1375 key = 0;
1376 else
1377 changed |= ret;
1378 }
1379 pd->port_pkeys[i] = key;
1380 }
1381 if (changed) {
1382 u64 pkey;
1383
1384 pkey = (u64) dd->ipath_pkeys[0] |
1385 ((u64) dd->ipath_pkeys[1] << 16) |
1386 ((u64) dd->ipath_pkeys[2] << 32) |
1387 ((u64) dd->ipath_pkeys[3] << 48);
1388 ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
1389 (unsigned long long) pkey);
1390 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
1391 pkey);
1392 }
1393 return 0;
1394}
1395
1396EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys);
1397
1398/**
1399 * ipath_layer_get_linkdowndefaultstate - get the default linkdown state
1400 * @dd: the infinipath device
1401 *
1402 * Returns zero if the default is POLL, 1 if the default is SLEEP.
1403 */
1404int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd)
1405{
1406 return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
1407}
1408
1409EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate);
1410
1411/**
1412 * ipath_layer_set_linkdowndefaultstate - set the default linkdown state
1413 * @dd: the infinipath device
1414 * @sleep: the new state
1415 *
1416 * Note that this will only take effect when the link state changes.
1417 */
1418int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
1419 int sleep)
1420{
1421 if (sleep)
1422 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
1423 else
1424 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
1425 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1426 dd->ipath_ibcctrl);
1427 return 0;
1428}
1429
1430EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate);
1431
1432int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd)
1433{
1434 return (dd->ipath_ibcctrl >>
1435 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
1436 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
1437}
1438
1439EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold);
1440
1441/**
1442 * ipath_layer_set_phyerrthreshold - set the physical error threshold
1443 * @dd: the infinipath device
1444 * @n: the new threshold
1445 *
1446 * Note that this will only take effect when the link state changes.
1447 */
1448int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
1449{
1450 unsigned v;
1451
1452 v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
1453 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
1454 if (v != n) {
1455 dd->ipath_ibcctrl &=
1456 ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
1457 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
1458 dd->ipath_ibcctrl |=
1459 (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
1460 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1461 dd->ipath_ibcctrl);
1462 }
1463 return 0;
1464}
1465
1466EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold);
1467
1468int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd)
1469{
1470 return (dd->ipath_ibcctrl >>
1471 INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
1472 INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
1473}
1474
1475EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold);
1476
1477/**
1478 * ipath_layer_set_overrunthreshold - set the overrun threshold
1479 * @dd: the infinipath device
1480 * @n: the new threshold
1481 *
1482 * Note that this will only take effect when the link state changes.
1483 */
1484int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
1485{
1486 unsigned v;
1487
1488 v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
1489 INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
1490 if (v != n) {
1491 dd->ipath_ibcctrl &=
1492 ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
1493 INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
1494 dd->ipath_ibcctrl |=
1495 (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
1496 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1497 dd->ipath_ibcctrl);
1498 }
1499 return 0;
1500}
1501
1502EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold);
1503
1504int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
1505 size_t namelen)
1506{
1507 return dd->ipath_f_get_boardname(dd, name, namelen);
1508}
1509EXPORT_SYMBOL_GPL(ipath_layer_get_boardname);
1510
1511u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd)
1512{
1513 return dd->ipath_rcvhdrentsize;
1514}
1515EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize);
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
new file mode 100644
index 000000000000..6fefd15bd2da
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@@ -0,0 +1,181 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef _IPATH_LAYER_H
34#define _IPATH_LAYER_H
35
36/*
37 * This header file is for symbols shared between the infinipath driver
38 * and drivers layered upon it (such as ipath).
39 */
40
41struct sk_buff;
42struct ipath_sge_state;
43struct ipath_devdata;
44struct ether_header;
45
46struct ipath_layer_counters {
47 u64 symbol_error_counter;
48 u64 link_error_recovery_counter;
49 u64 link_downed_counter;
50 u64 port_rcv_errors;
51 u64 port_rcv_remphys_errors;
52 u64 port_xmit_discards;
53 u64 port_xmit_data;
54 u64 port_rcv_data;
55 u64 port_xmit_packets;
56 u64 port_rcv_packets;
57};
58
59/*
60 * A segment is a linear region of low physical memory.
61 * XXX Maybe we should use phys addr here and kmap()/kunmap().
62 * Used by the verbs layer.
63 */
64struct ipath_seg {
65 void *vaddr;
66 size_t length;
67};
68
69/* The number of ipath_segs that fit in a page. */
70#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
71
72struct ipath_segarray {
73 struct ipath_seg segs[IPATH_SEGSZ];
74};
75
76struct ipath_mregion {
77 u64 user_base; /* User's address for this region */
78 u64 iova; /* IB start address of this region */
79 size_t length;
80 u32 lkey;
81 u32 offset; /* offset (bytes) to start of region */
82 int access_flags;
83 u32 max_segs; /* number of ipath_segs in all the arrays */
84 u32 mapsz; /* size of the map array */
85 struct ipath_segarray *map[0]; /* the segments */
86};
87
88/*
89 * These keep track of the copy progress within a memory region.
90 * Used by the verbs layer.
91 */
92struct ipath_sge {
93 struct ipath_mregion *mr;
94 void *vaddr; /* current pointer into the segment */
95 u32 sge_length; /* length of the SGE */
96 u32 length; /* remaining length of the segment */
97 u16 m; /* current index: mr->map[m] */
98 u16 n; /* current index: mr->map[m]->segs[n] */
99};
100
101struct ipath_sge_state {
102 struct ipath_sge *sg_list; /* next SGE to be used if any */
103 struct ipath_sge sge; /* progress state for the current SGE */
104 u8 num_sge;
105};
106
107int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
108 void (*l_remove)(void *),
109 int (*l_intr)(void *, u32),
110 int (*l_rcv)(void *, void *,
111 struct sk_buff *),
112 u16 rcv_opcode,
113 int (*l_rcv_lid)(void *, void *));
114int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
115 void (*l_remove)(void *arg),
116 int (*l_piobufavail)(void *arg),
117 void (*l_rcv)(void *arg, void *rhdr,
118 void *data, u32 tlen),
119 void (*l_timer_cb)(void *arg));
120void ipath_layer_unregister(void);
121void ipath_verbs_unregister(void);
122int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
123u16 ipath_layer_get_lid(struct ipath_devdata *dd);
124int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
125u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
126u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd);
127int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state);
128int ipath_layer_set_mtu(struct ipath_devdata *, u16);
129int ipath_set_sps_lid(struct ipath_devdata *, u32, u8);
130int ipath_layer_send_hdr(struct ipath_devdata *dd,
131 struct ether_header *hdr);
132int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
133 u32 * hdr, u32 len, struct ipath_sge_state *ss);
134int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
135int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
136 size_t namelen);
137int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
138 u64 *rwords, u64 *spkts, u64 *rpkts,
139 u64 *xmit_wait);
140int ipath_layer_get_counters(struct ipath_devdata *dd,
141 struct ipath_layer_counters *cntrs);
142int ipath_layer_want_buffer(struct ipath_devdata *dd);
143int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
144__be64 ipath_layer_get_guid(struct ipath_devdata *);
145u32 ipath_layer_get_nguid(struct ipath_devdata *);
146int ipath_layer_query_device(struct ipath_devdata *, u32 * vendor,
147 u32 * boardrev, u32 * majrev, u32 * minrev);
148u32 ipath_layer_get_flags(struct ipath_devdata *dd);
149struct device *ipath_layer_get_device(struct ipath_devdata *dd);
150u16 ipath_layer_get_deviceid(struct ipath_devdata *dd);
151u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd);
152u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd);
153int ipath_layer_enable_timer(struct ipath_devdata *dd);
154int ipath_layer_disable_timer(struct ipath_devdata *dd);
155int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags);
156unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd);
157unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index);
158int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys);
159int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys);
160int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd);
161int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
162 int sleep);
163int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd);
164int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n);
165int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd);
166int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n);
167u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
168
169/* ipath_ether interrupt values */
170#define IPATH_LAYER_INT_IF_UP 0x2
171#define IPATH_LAYER_INT_IF_DOWN 0x4
172#define IPATH_LAYER_INT_LID 0x8
173#define IPATH_LAYER_INT_SEND_CONTINUE 0x10
174#define IPATH_LAYER_INT_BCAST 0x40
175
176/* _verbs_layer.l_flags */
177#define IPATH_VERBS_KERNEL_SMA 0x1
178
179extern unsigned ipath_debug; /* debugging bit mask */
180
181#endif /* _IPATH_LAYER_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
new file mode 100644
index 000000000000..f7f8391fe43f
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -0,0 +1,1352 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_smi.h>
34
35#include "ipath_kernel.h"
36#include "ipath_verbs.h"
37#include "ips_common.h"
38
39#define IB_SMP_UNSUP_VERSION __constant_htons(0x0004)
40#define IB_SMP_UNSUP_METHOD __constant_htons(0x0008)
41#define IB_SMP_UNSUP_METH_ATTR __constant_htons(0x000C)
42#define IB_SMP_INVALID_FIELD __constant_htons(0x001C)
43
44static int reply(struct ib_smp *smp)
45{
46 /*
47 * The verbs framework will handle the directed/LID route
48 * packet changes.
49 */
50 smp->method = IB_MGMT_METHOD_GET_RESP;
51 if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
52 smp->status |= IB_SMP_DIRECTION;
53 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
54}
55
56static int recv_subn_get_nodedescription(struct ib_smp *smp,
57 struct ib_device *ibdev)
58{
59 if (smp->attr_mod)
60 smp->status |= IB_SMP_INVALID_FIELD;
61
62 strncpy(smp->data, ibdev->node_desc, sizeof(smp->data));
63
64 return reply(smp);
65}
66
67struct nodeinfo {
68 u8 base_version;
69 u8 class_version;
70 u8 node_type;
71 u8 num_ports;
72 __be64 sys_guid;
73 __be64 node_guid;
74 __be64 port_guid;
75 __be16 partition_cap;
76 __be16 device_id;
77 __be32 revision;
78 u8 local_port_num;
79 u8 vendor_id[3];
80} __attribute__ ((packed));
81
82static int recv_subn_get_nodeinfo(struct ib_smp *smp,
83 struct ib_device *ibdev, u8 port)
84{
85 struct nodeinfo *nip = (struct nodeinfo *)&smp->data;
86 struct ipath_devdata *dd = to_idev(ibdev)->dd;
87 u32 vendor, boardid, majrev, minrev;
88
89 if (smp->attr_mod)
90 smp->status |= IB_SMP_INVALID_FIELD;
91
92 nip->base_version = 1;
93 nip->class_version = 1;
94 nip->node_type = 1; /* channel adapter */
95 /*
96 * XXX The num_ports value will need a layer function to get
97 * the value if we ever have more than one IB port on a chip.
98 * We will also need to get the GUID for the port.
99 */
100 nip->num_ports = ibdev->phys_port_cnt;
101 /* This is already in network order */
102 nip->sys_guid = to_idev(ibdev)->sys_image_guid;
103 nip->node_guid = ipath_layer_get_guid(dd);
104 nip->port_guid = nip->sys_guid;
105 nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd));
106 nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd));
107 ipath_layer_query_device(dd, &vendor, &boardid, &majrev, &minrev);
108 nip->revision = cpu_to_be32((majrev << 16) | minrev);
109 nip->local_port_num = port;
110 nip->vendor_id[0] = 0;
111 nip->vendor_id[1] = vendor >> 8;
112 nip->vendor_id[2] = vendor;
113
114 return reply(smp);
115}
116
117static int recv_subn_get_guidinfo(struct ib_smp *smp,
118 struct ib_device *ibdev)
119{
120 u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
121 __be64 *p = (__be64 *) smp->data;
122
123 /* 32 blocks of 8 64-bit GUIDs per block */
124
125 memset(smp->data, 0, sizeof(smp->data));
126
127 /*
128 * We only support one GUID for now. If this changes, the
129 * portinfo.guid_cap field needs to be updated too.
130 */
131 if (startgx == 0)
132 /* The first is a copy of the read-only HW GUID. */
133 *p = ipath_layer_get_guid(to_idev(ibdev)->dd);
134 else
135 smp->status |= IB_SMP_INVALID_FIELD;
136
137 return reply(smp);
138}
139
140struct port_info {
141 __be64 mkey;
142 __be64 gid_prefix;
143 __be16 lid;
144 __be16 sm_lid;
145 __be32 cap_mask;
146 __be16 diag_code;
147 __be16 mkey_lease_period;
148 u8 local_port_num;
149 u8 link_width_enabled;
150 u8 link_width_supported;
151 u8 link_width_active;
152 u8 linkspeed_portstate; /* 4 bits, 4 bits */
153 u8 portphysstate_linkdown; /* 4 bits, 4 bits */
154 u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */
155 u8 linkspeedactive_enabled; /* 4 bits, 4 bits */
156 u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */
157 u8 vlcap_inittype; /* 4 bits, 4 bits */
158 u8 vl_high_limit;
159 u8 vl_arb_high_cap;
160 u8 vl_arb_low_cap;
161 u8 inittypereply_mtucap; /* 4 bits, 4 bits */
162 u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */
163 u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */
164 __be16 mkey_violations;
165 __be16 pkey_violations;
166 __be16 qkey_violations;
167 u8 guid_cap;
168 u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */
169 u8 resv_resptimevalue; /* 3 bits, 5 bits */
170 u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */
171 __be16 max_credit_hint;
172 u8 resv;
173 u8 link_roundtrip_latency[3];
174} __attribute__ ((packed));
175
176static int recv_subn_get_portinfo(struct ib_smp *smp,
177 struct ib_device *ibdev, u8 port)
178{
179 struct ipath_ibdev *dev;
180 struct port_info *pip = (struct port_info *)smp->data;
181 u16 lid;
182 u8 ibcstat;
183 u8 mtu;
184 int ret;
185
186 if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) {
187 smp->status |= IB_SMP_INVALID_FIELD;
188 ret = reply(smp);
189 goto bail;
190 }
191
192 dev = to_idev(ibdev);
193
194 /* Clear all fields. Only set the non-zero fields. */
195 memset(smp->data, 0, sizeof(smp->data));
196
197 /* Only return the mkey if the protection field allows it. */
198 if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
199 (dev->mkeyprot_resv_lmc >> 6) == 0)
200 pip->mkey = dev->mkey;
201 pip->gid_prefix = dev->gid_prefix;
202 lid = ipath_layer_get_lid(dev->dd);
203 pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
204 pip->sm_lid = cpu_to_be16(dev->sm_lid);
205 pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
206 /* pip->diag_code; */
207 pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
208 pip->local_port_num = port;
209 pip->link_width_enabled = dev->link_width_enabled;
210 pip->link_width_supported = 3; /* 1x or 4x */
211 pip->link_width_active = 2; /* 4x */
212 pip->linkspeed_portstate = 0x10; /* 2.5Gbps */
213 ibcstat = ipath_layer_get_lastibcstat(dev->dd);
214 pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
215 pip->portphysstate_linkdown =
216 (ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
217 (ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2);
218 pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
219 pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */
220 switch (ipath_layer_get_ibmtu(dev->dd)) {
221 case 4096:
222 mtu = IB_MTU_4096;
223 break;
224 case 2048:
225 mtu = IB_MTU_2048;
226 break;
227 case 1024:
228 mtu = IB_MTU_1024;
229 break;
230 case 512:
231 mtu = IB_MTU_512;
232 break;
233 case 256:
234 mtu = IB_MTU_256;
235 break;
236 default: /* oops, something is wrong */
237 mtu = IB_MTU_2048;
238 break;
239 }
240 pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl;
241 pip->vlcap_inittype = 0x10; /* VLCap = VL0, InitType = 0 */
242 pip->vl_high_limit = dev->vl_high_limit;
243 /* pip->vl_arb_high_cap; // only one VL */
244 /* pip->vl_arb_low_cap; // only one VL */
245 /* InitTypeReply = 0 */
246 pip->inittypereply_mtucap = IB_MTU_4096;
247 // HCAs ignore VLStallCount and HOQLife
248 /* pip->vlstallcnt_hoqlife; */
249 pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */
250 pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
251 /* P_KeyViolations are counted by hardware. */
252 pip->pkey_violations =
253 cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) -
254 dev->n_pkey_violations) & 0xFFFF);
255 pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
256 /* Only the hardware GUID is supported for now */
257 pip->guid_cap = 1;
258 pip->clientrereg_resv_subnetto = dev->subnet_timeout;
259 /* 32.768 usec. response time (guessing) */
260 pip->resv_resptimevalue = 3;
261 pip->localphyerrors_overrunerrors =
262 (ipath_layer_get_phyerrthreshold(dev->dd) << 4) |
263 ipath_layer_get_overrunthreshold(dev->dd);
264 /* pip->max_credit_hint; */
265 /* pip->link_roundtrip_latency[3]; */
266
267 ret = reply(smp);
268
269bail:
270 return ret;
271}
272
273static int recv_subn_get_pkeytable(struct ib_smp *smp,
274 struct ib_device *ibdev)
275{
276 u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
277 u16 *p = (u16 *) smp->data;
278 __be16 *q = (__be16 *) smp->data;
279
280 /* 64 blocks of 32 16-bit P_Key entries */
281
282 memset(smp->data, 0, sizeof(smp->data));
283 if (startpx == 0) {
284 struct ipath_ibdev *dev = to_idev(ibdev);
285 unsigned i, n = ipath_layer_get_npkeys(dev->dd);
286
287 ipath_layer_get_pkeys(dev->dd, p);
288
289 for (i = 0; i < n; i++)
290 q[i] = cpu_to_be16(p[i]);
291 } else
292 smp->status |= IB_SMP_INVALID_FIELD;
293
294 return reply(smp);
295}
296
297static int recv_subn_set_guidinfo(struct ib_smp *smp,
298 struct ib_device *ibdev)
299{
300 /* The only GUID we support is the first read-only entry. */
301 return recv_subn_get_guidinfo(smp, ibdev);
302}
303
304/**
305 * recv_subn_set_portinfo - set port information
306 * @smp: the incoming SM packet
307 * @ibdev: the infiniband device
308 * @port: the port on the device
309 *
310 * Set Portinfo (see ch. 14.2.5.6).
311 */
312static int recv_subn_set_portinfo(struct ib_smp *smp,
313 struct ib_device *ibdev, u8 port)
314{
315 struct port_info *pip = (struct port_info *)smp->data;
316 struct ib_event event;
317 struct ipath_ibdev *dev;
318 u32 flags;
319 char clientrereg = 0;
320 u16 lid, smlid;
321 u8 lwe;
322 u8 lse;
323 u8 state;
324 u16 lstate;
325 u32 mtu;
326 int ret;
327
328 if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
329 goto err;
330
331 dev = to_idev(ibdev);
332 event.device = ibdev;
333 event.element.port_num = port;
334
335 dev->mkey = pip->mkey;
336 dev->gid_prefix = pip->gid_prefix;
337 dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
338
339 lid = be16_to_cpu(pip->lid);
340 if (lid != ipath_layer_get_lid(dev->dd)) {
341 /* Must be a valid unicast LID address. */
342 if (lid == 0 || lid >= IPS_MULTICAST_LID_BASE)
343 goto err;
344 ipath_set_sps_lid(dev->dd, lid, pip->mkeyprot_resv_lmc & 7);
345 event.event = IB_EVENT_LID_CHANGE;
346 ib_dispatch_event(&event);
347 }
348
349 smlid = be16_to_cpu(pip->sm_lid);
350 if (smlid != dev->sm_lid) {
351 /* Must be a valid unicast LID address. */
352 if (smlid == 0 || smlid >= IPS_MULTICAST_LID_BASE)
353 goto err;
354 dev->sm_lid = smlid;
355 event.event = IB_EVENT_SM_CHANGE;
356 ib_dispatch_event(&event);
357 }
358
359 /* Only 4x supported but allow 1x or 4x to be set (see 14.2.6.6). */
360 lwe = pip->link_width_enabled;
361 if ((lwe >= 4 && lwe <= 8) || (lwe >= 0xC && lwe <= 0xFE))
362 goto err;
363 if (lwe == 0xFF)
364 dev->link_width_enabled = 3; /* 1x or 4x */
365 else if (lwe)
366 dev->link_width_enabled = lwe;
367
368 /* Only 2.5 Gbs supported. */
369 lse = pip->linkspeedactive_enabled & 0xF;
370 if (lse >= 2 && lse <= 0xE)
371 goto err;
372
373 /* Set link down default state. */
374 switch (pip->portphysstate_linkdown & 0xF) {
375 case 0: /* NOP */
376 break;
377 case 1: /* SLEEP */
378 if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1))
379 goto err;
380 break;
381 case 2: /* POLL */
382 if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0))
383 goto err;
384 break;
385 default:
386 goto err;
387 }
388
389 dev->mkeyprot_resv_lmc = pip->mkeyprot_resv_lmc;
390 dev->vl_high_limit = pip->vl_high_limit;
391
392 switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
393 case IB_MTU_256:
394 mtu = 256;
395 break;
396 case IB_MTU_512:
397 mtu = 512;
398 break;
399 case IB_MTU_1024:
400 mtu = 1024;
401 break;
402 case IB_MTU_2048:
403 mtu = 2048;
404 break;
405 case IB_MTU_4096:
406 mtu = 4096;
407 break;
408 default:
409 /* XXX We have already partially updated our state! */
410 goto err;
411 }
412 ipath_layer_set_mtu(dev->dd, mtu);
413
414 dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
415
416 /* We only support VL0 */
417 if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1)
418 goto err;
419
420 if (pip->mkey_violations == 0)
421 dev->mkey_violations = 0;
422
423 /*
424 * Hardware counter can't be reset so snapshot and subtract
425 * later.
426 */
427 if (pip->pkey_violations == 0)
428 dev->n_pkey_violations =
429 ipath_layer_get_cr_errpkey(dev->dd);
430
431 if (pip->qkey_violations == 0)
432 dev->qkey_violations = 0;
433
434 if (ipath_layer_set_phyerrthreshold(
435 dev->dd,
436 (pip->localphyerrors_overrunerrors >> 4) & 0xF))
437 goto err;
438
439 if (ipath_layer_set_overrunthreshold(
440 dev->dd,
441 (pip->localphyerrors_overrunerrors & 0xF)))
442 goto err;
443
444 dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
445
446 if (pip->clientrereg_resv_subnetto & 0x80) {
447 clientrereg = 1;
448 event.event = IB_EVENT_LID_CHANGE;
449 ib_dispatch_event(&event);
450 }
451
452 /*
453 * Do the port state change now that the other link parameters
454 * have been set.
455 * Changing the port physical state only makes sense if the link
456 * is down or is being set to down.
457 */
458 state = pip->linkspeed_portstate & 0xF;
459 flags = ipath_layer_get_flags(dev->dd);
460 lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
461 if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
462 goto err;
463
464 /*
465 * Only state changes of DOWN, ARM, and ACTIVE are valid
466 * and must be in the correct state to take effect (see 7.2.6).
467 */
468 switch (state) {
469 case IB_PORT_NOP:
470 if (lstate == 0)
471 break;
472 /* FALLTHROUGH */
473 case IB_PORT_DOWN:
474 if (lstate == 0)
475 if (ipath_layer_get_linkdowndefaultstate(dev->dd))
476 lstate = IPATH_IB_LINKDOWN_SLEEP;
477 else
478 lstate = IPATH_IB_LINKDOWN;
479 else if (lstate == 1)
480 lstate = IPATH_IB_LINKDOWN_SLEEP;
481 else if (lstate == 2)
482 lstate = IPATH_IB_LINKDOWN;
483 else if (lstate == 3)
484 lstate = IPATH_IB_LINKDOWN_DISABLE;
485 else
486 goto err;
487 ipath_layer_set_linkstate(dev->dd, lstate);
488 if (flags & IPATH_LINKACTIVE) {
489 event.event = IB_EVENT_PORT_ERR;
490 ib_dispatch_event(&event);
491 }
492 break;
493 case IB_PORT_ARMED:
494 if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
495 break;
496 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM);
497 if (flags & IPATH_LINKACTIVE) {
498 event.event = IB_EVENT_PORT_ERR;
499 ib_dispatch_event(&event);
500 }
501 break;
502 case IB_PORT_ACTIVE:
503 if (!(flags & IPATH_LINKARMED))
504 break;
505 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
506 event.event = IB_EVENT_PORT_ACTIVE;
507 ib_dispatch_event(&event);
508 break;
509 default:
510 /* XXX We have already partially updated our state! */
511 goto err;
512 }
513
514 ret = recv_subn_get_portinfo(smp, ibdev, port);
515
516 if (clientrereg)
517 pip->clientrereg_resv_subnetto |= 0x80;
518
519 goto done;
520
521err:
522 smp->status |= IB_SMP_INVALID_FIELD;
523 ret = recv_subn_get_portinfo(smp, ibdev, port);
524
525done:
526 return ret;
527}
528
529static int recv_subn_set_pkeytable(struct ib_smp *smp,
530 struct ib_device *ibdev)
531{
532 u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
533 __be16 *p = (__be16 *) smp->data;
534 u16 *q = (u16 *) smp->data;
535 struct ipath_ibdev *dev = to_idev(ibdev);
536 unsigned i, n = ipath_layer_get_npkeys(dev->dd);
537
538 for (i = 0; i < n; i++)
539 q[i] = be16_to_cpu(p[i]);
540
541 if (startpx != 0 ||
542 ipath_layer_set_pkeys(dev->dd, q) != 0)
543 smp->status |= IB_SMP_INVALID_FIELD;
544
545 return recv_subn_get_pkeytable(smp, ibdev);
546}
547
548#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001)
549#define IB_PMA_PORT_SAMPLES_CONTROL __constant_htons(0x0010)
550#define IB_PMA_PORT_SAMPLES_RESULT __constant_htons(0x0011)
551#define IB_PMA_PORT_COUNTERS __constant_htons(0x0012)
552#define IB_PMA_PORT_COUNTERS_EXT __constant_htons(0x001D)
553#define IB_PMA_PORT_SAMPLES_RESULT_EXT __constant_htons(0x001E)
554
555struct ib_perf {
556 u8 base_version;
557 u8 mgmt_class;
558 u8 class_version;
559 u8 method;
560 __be16 status;
561 __be16 unused;
562 __be64 tid;
563 __be16 attr_id;
564 __be16 resv;
565 __be32 attr_mod;
566 u8 reserved[40];
567 u8 data[192];
568} __attribute__ ((packed));
569
570struct ib_pma_classportinfo {
571 u8 base_version;
572 u8 class_version;
573 __be16 cap_mask;
574 u8 reserved[3];
575 u8 resp_time_value; /* only lower 5 bits */
576 union ib_gid redirect_gid;
577 __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
578 __be16 redirect_lid;
579 __be16 redirect_pkey;
580 __be32 redirect_qp; /* only lower 24 bits */
581 __be32 redirect_qkey;
582 union ib_gid trap_gid;
583 __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
584 __be16 trap_lid;
585 __be16 trap_pkey;
586 __be32 trap_hl_qp; /* 8, 24 bits respectively */
587 __be32 trap_qkey;
588} __attribute__ ((packed));
589
590struct ib_pma_portsamplescontrol {
591 u8 opcode;
592 u8 port_select;
593 u8 tick;
594 u8 counter_width; /* only lower 3 bits */
595 __be32 counter_mask0_9; /* 2, 10 * 3, bits */
596 __be16 counter_mask10_14; /* 1, 5 * 3, bits */
597 u8 sample_mechanisms;
598 u8 sample_status; /* only lower 2 bits */
599 __be64 option_mask;
600 __be64 vendor_mask;
601 __be32 sample_start;
602 __be32 sample_interval;
603 __be16 tag;
604 __be16 counter_select[15];
605} __attribute__ ((packed));
606
607struct ib_pma_portsamplesresult {
608 __be16 tag;
609 __be16 sample_status; /* only lower 2 bits */
610 __be32 counter[15];
611} __attribute__ ((packed));
612
613struct ib_pma_portsamplesresult_ext {
614 __be16 tag;
615 __be16 sample_status; /* only lower 2 bits */
616 __be32 extended_width; /* only upper 2 bits */
617 __be64 counter[15];
618} __attribute__ ((packed));
619
620struct ib_pma_portcounters {
621 u8 reserved;
622 u8 port_select;
623 __be16 counter_select;
624 __be16 symbol_error_counter;
625 u8 link_error_recovery_counter;
626 u8 link_downed_counter;
627 __be16 port_rcv_errors;
628 __be16 port_rcv_remphys_errors;
629 __be16 port_rcv_switch_relay_errors;
630 __be16 port_xmit_discards;
631 u8 port_xmit_constraint_errors;
632 u8 port_rcv_constraint_errors;
633 u8 reserved1;
634 u8 lli_ebor_errors; /* 4, 4, bits */
635 __be16 reserved2;
636 __be16 vl15_dropped;
637 __be32 port_xmit_data;
638 __be32 port_rcv_data;
639 __be32 port_xmit_packets;
640 __be32 port_rcv_packets;
641} __attribute__ ((packed));
642
643#define IB_PMA_SEL_SYMBOL_ERROR __constant_htons(0x0001)
644#define IB_PMA_SEL_LINK_ERROR_RECOVERY __constant_htons(0x0002)
645#define IB_PMA_SEL_LINK_DOWNED __constant_htons(0x0004)
646#define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008)
647#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010)
648#define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040)
649#define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000)
650#define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000)
651#define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000)
652#define IB_PMA_SEL_PORT_RCV_PACKETS __constant_htons(0x8000)
653
654struct ib_pma_portcounters_ext {
655 u8 reserved;
656 u8 port_select;
657 __be16 counter_select;
658 __be32 reserved1;
659 __be64 port_xmit_data;
660 __be64 port_rcv_data;
661 __be64 port_xmit_packets;
662 __be64 port_rcv_packets;
663 __be64 port_unicast_xmit_packets;
664 __be64 port_unicast_rcv_packets;
665 __be64 port_multicast_xmit_packets;
666 __be64 port_multicast_rcv_packets;
667} __attribute__ ((packed));
668
669#define IB_PMA_SELX_PORT_XMIT_DATA __constant_htons(0x0001)
670#define IB_PMA_SELX_PORT_RCV_DATA __constant_htons(0x0002)
671#define IB_PMA_SELX_PORT_XMIT_PACKETS __constant_htons(0x0004)
672#define IB_PMA_SELX_PORT_RCV_PACKETS __constant_htons(0x0008)
673#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS __constant_htons(0x0010)
674#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS __constant_htons(0x0020)
675#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS __constant_htons(0x0040)
676#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS __constant_htons(0x0080)
677
678static int recv_pma_get_classportinfo(struct ib_perf *pmp)
679{
680 struct ib_pma_classportinfo *p =
681 (struct ib_pma_classportinfo *)pmp->data;
682
683 memset(pmp->data, 0, sizeof(pmp->data));
684
685 if (pmp->attr_mod != 0)
686 pmp->status |= IB_SMP_INVALID_FIELD;
687
688 /* Indicate AllPortSelect is valid (only one port anyway) */
689 p->cap_mask = __constant_cpu_to_be16(1 << 8);
690 p->base_version = 1;
691 p->class_version = 1;
692 /*
693 * Expected response time is 4.096 usec. * 2^18 == 1.073741824
694 * sec.
695 */
696 p->resp_time_value = 18;
697
698 return reply((struct ib_smp *) pmp);
699}
700
701/*
702 * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
703 * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
704 * We support 5 counters which only count the mandatory quantities.
705 */
706#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
707#define COUNTER_MASK0_9 \
708 __constant_cpu_to_be32(COUNTER_MASK(1, 0) | \
709 COUNTER_MASK(1, 1) | \
710 COUNTER_MASK(1, 2) | \
711 COUNTER_MASK(1, 3) | \
712 COUNTER_MASK(1, 4))
713
714static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp,
715 struct ib_device *ibdev, u8 port)
716{
717 struct ib_pma_portsamplescontrol *p =
718 (struct ib_pma_portsamplescontrol *)pmp->data;
719 struct ipath_ibdev *dev = to_idev(ibdev);
720 unsigned long flags;
721 u8 port_select = p->port_select;
722
723 memset(pmp->data, 0, sizeof(pmp->data));
724
725 p->port_select = port_select;
726 if (pmp->attr_mod != 0 ||
727 (port_select != port && port_select != 0xFF))
728 pmp->status |= IB_SMP_INVALID_FIELD;
729 /*
730 * Ticks are 10x the link transfer period which for 2.5Gbs is 4
731 * nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample
732 * intervals are counted in ticks. Since we use Linux timers, that
733 * count in jiffies, we can't sample for less than 1000 ticks if HZ
734 * == 1000 (4000 ticks if HZ is 250).
735 */
736 /* XXX This is WRONG. */
737 p->tick = 250; /* 1 usec. */
738 p->counter_width = 4; /* 32 bit counters */
739 p->counter_mask0_9 = COUNTER_MASK0_9;
740 spin_lock_irqsave(&dev->pending_lock, flags);
741 p->sample_status = dev->pma_sample_status;
742 p->sample_start = cpu_to_be32(dev->pma_sample_start);
743 p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
744 p->tag = cpu_to_be16(dev->pma_tag);
745 p->counter_select[0] = dev->pma_counter_select[0];
746 p->counter_select[1] = dev->pma_counter_select[1];
747 p->counter_select[2] = dev->pma_counter_select[2];
748 p->counter_select[3] = dev->pma_counter_select[3];
749 p->counter_select[4] = dev->pma_counter_select[4];
750 spin_unlock_irqrestore(&dev->pending_lock, flags);
751
752 return reply((struct ib_smp *) pmp);
753}
754
755static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp,
756 struct ib_device *ibdev, u8 port)
757{
758 struct ib_pma_portsamplescontrol *p =
759 (struct ib_pma_portsamplescontrol *)pmp->data;
760 struct ipath_ibdev *dev = to_idev(ibdev);
761 unsigned long flags;
762 u32 start;
763 int ret;
764
765 if (pmp->attr_mod != 0 ||
766 (p->port_select != port && p->port_select != 0xFF)) {
767 pmp->status |= IB_SMP_INVALID_FIELD;
768 ret = reply((struct ib_smp *) pmp);
769 goto bail;
770 }
771
772 start = be32_to_cpu(p->sample_start);
773 if (start != 0) {
774 spin_lock_irqsave(&dev->pending_lock, flags);
775 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_DONE) {
776 dev->pma_sample_status =
777 IB_PMA_SAMPLE_STATUS_STARTED;
778 dev->pma_sample_start = start;
779 dev->pma_sample_interval =
780 be32_to_cpu(p->sample_interval);
781 dev->pma_tag = be16_to_cpu(p->tag);
782 if (p->counter_select[0])
783 dev->pma_counter_select[0] =
784 p->counter_select[0];
785 if (p->counter_select[1])
786 dev->pma_counter_select[1] =
787 p->counter_select[1];
788 if (p->counter_select[2])
789 dev->pma_counter_select[2] =
790 p->counter_select[2];
791 if (p->counter_select[3])
792 dev->pma_counter_select[3] =
793 p->counter_select[3];
794 if (p->counter_select[4])
795 dev->pma_counter_select[4] =
796 p->counter_select[4];
797 }
798 spin_unlock_irqrestore(&dev->pending_lock, flags);
799 }
800 ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
801
802bail:
803 return ret;
804}
805
806static u64 get_counter(struct ipath_ibdev *dev, __be16 sel)
807{
808 u64 ret;
809
810 switch (sel) {
811 case IB_PMA_PORT_XMIT_DATA:
812 ret = dev->ipath_sword;
813 break;
814 case IB_PMA_PORT_RCV_DATA:
815 ret = dev->ipath_rword;
816 break;
817 case IB_PMA_PORT_XMIT_PKTS:
818 ret = dev->ipath_spkts;
819 break;
820 case IB_PMA_PORT_RCV_PKTS:
821 ret = dev->ipath_rpkts;
822 break;
823 case IB_PMA_PORT_XMIT_WAIT:
824 ret = dev->ipath_xmit_wait;
825 break;
826 default:
827 ret = 0;
828 }
829
830 return ret;
831}
832
833static int recv_pma_get_portsamplesresult(struct ib_perf *pmp,
834 struct ib_device *ibdev)
835{
836 struct ib_pma_portsamplesresult *p =
837 (struct ib_pma_portsamplesresult *)pmp->data;
838 struct ipath_ibdev *dev = to_idev(ibdev);
839 int i;
840
841 memset(pmp->data, 0, sizeof(pmp->data));
842 p->tag = cpu_to_be16(dev->pma_tag);
843 p->sample_status = cpu_to_be16(dev->pma_sample_status);
844 for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
845 p->counter[i] = cpu_to_be32(
846 get_counter(dev, dev->pma_counter_select[i]));
847
848 return reply((struct ib_smp *) pmp);
849}
850
851static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp,
852 struct ib_device *ibdev)
853{
854 struct ib_pma_portsamplesresult_ext *p =
855 (struct ib_pma_portsamplesresult_ext *)pmp->data;
856 struct ipath_ibdev *dev = to_idev(ibdev);
857 int i;
858
859 memset(pmp->data, 0, sizeof(pmp->data));
860 p->tag = cpu_to_be16(dev->pma_tag);
861 p->sample_status = cpu_to_be16(dev->pma_sample_status);
862 /* 64 bits */
863 p->extended_width = __constant_cpu_to_be32(0x80000000);
864 for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
865 p->counter[i] = cpu_to_be64(
866 get_counter(dev, dev->pma_counter_select[i]));
867
868 return reply((struct ib_smp *) pmp);
869}
870
871static int recv_pma_get_portcounters(struct ib_perf *pmp,
872 struct ib_device *ibdev, u8 port)
873{
874 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
875 pmp->data;
876 struct ipath_ibdev *dev = to_idev(ibdev);
877 struct ipath_layer_counters cntrs;
878 u8 port_select = p->port_select;
879
880 ipath_layer_get_counters(dev->dd, &cntrs);
881
882 /* Adjust counters for any resets done. */
883 cntrs.symbol_error_counter -= dev->n_symbol_error_counter;
884 cntrs.link_error_recovery_counter -=
885 dev->n_link_error_recovery_counter;
886 cntrs.link_downed_counter -= dev->n_link_downed_counter;
887 cntrs.port_rcv_errors += dev->rcv_errors;
888 cntrs.port_rcv_errors -= dev->n_port_rcv_errors;
889 cntrs.port_rcv_remphys_errors -= dev->n_port_rcv_remphys_errors;
890 cntrs.port_xmit_discards -= dev->n_port_xmit_discards;
891 cntrs.port_xmit_data -= dev->n_port_xmit_data;
892 cntrs.port_rcv_data -= dev->n_port_rcv_data;
893 cntrs.port_xmit_packets -= dev->n_port_xmit_packets;
894 cntrs.port_rcv_packets -= dev->n_port_rcv_packets;
895
896 memset(pmp->data, 0, sizeof(pmp->data));
897
898 p->port_select = port_select;
899 if (pmp->attr_mod != 0 ||
900 (port_select != port && port_select != 0xFF))
901 pmp->status |= IB_SMP_INVALID_FIELD;
902
903 if (cntrs.symbol_error_counter > 0xFFFFUL)
904 p->symbol_error_counter = __constant_cpu_to_be16(0xFFFF);
905 else
906 p->symbol_error_counter =
907 cpu_to_be16((u16)cntrs.symbol_error_counter);
908 if (cntrs.link_error_recovery_counter > 0xFFUL)
909 p->link_error_recovery_counter = 0xFF;
910 else
911 p->link_error_recovery_counter =
912 (u8)cntrs.link_error_recovery_counter;
913 if (cntrs.link_downed_counter > 0xFFUL)
914 p->link_downed_counter = 0xFF;
915 else
916 p->link_downed_counter = (u8)cntrs.link_downed_counter;
917 if (cntrs.port_rcv_errors > 0xFFFFUL)
918 p->port_rcv_errors = __constant_cpu_to_be16(0xFFFF);
919 else
920 p->port_rcv_errors =
921 cpu_to_be16((u16) cntrs.port_rcv_errors);
922 if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
923 p->port_rcv_remphys_errors = __constant_cpu_to_be16(0xFFFF);
924 else
925 p->port_rcv_remphys_errors =
926 cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
927 if (cntrs.port_xmit_discards > 0xFFFFUL)
928 p->port_xmit_discards = __constant_cpu_to_be16(0xFFFF);
929 else
930 p->port_xmit_discards =
931 cpu_to_be16((u16)cntrs.port_xmit_discards);
932 if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
933 p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF);
934 else
935 p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
936 if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
937 p->port_rcv_data = __constant_cpu_to_be32(0xFFFFFFFF);
938 else
939 p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
940 if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
941 p->port_xmit_packets = __constant_cpu_to_be32(0xFFFFFFFF);
942 else
943 p->port_xmit_packets =
944 cpu_to_be32((u32)cntrs.port_xmit_packets);
945 if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
946 p->port_rcv_packets = __constant_cpu_to_be32(0xFFFFFFFF);
947 else
948 p->port_rcv_packets =
949 cpu_to_be32((u32) cntrs.port_rcv_packets);
950
951 return reply((struct ib_smp *) pmp);
952}
953
954static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
955 struct ib_device *ibdev, u8 port)
956{
957 struct ib_pma_portcounters_ext *p =
958 (struct ib_pma_portcounters_ext *)pmp->data;
959 struct ipath_ibdev *dev = to_idev(ibdev);
960 u64 swords, rwords, spkts, rpkts, xwait;
961 u8 port_select = p->port_select;
962
963 ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
964 &rpkts, &xwait);
965
966 /* Adjust counters for any resets done. */
967 swords -= dev->n_port_xmit_data;
968 rwords -= dev->n_port_rcv_data;
969 spkts -= dev->n_port_xmit_packets;
970 rpkts -= dev->n_port_rcv_packets;
971
972 memset(pmp->data, 0, sizeof(pmp->data));
973
974 p->port_select = port_select;
975 if (pmp->attr_mod != 0 ||
976 (port_select != port && port_select != 0xFF))
977 pmp->status |= IB_SMP_INVALID_FIELD;
978
979 p->port_xmit_data = cpu_to_be64(swords);
980 p->port_rcv_data = cpu_to_be64(rwords);
981 p->port_xmit_packets = cpu_to_be64(spkts);
982 p->port_rcv_packets = cpu_to_be64(rpkts);
983 p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit);
984 p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv);
985 p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit);
986 p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv);
987
988 return reply((struct ib_smp *) pmp);
989}
990
991static int recv_pma_set_portcounters(struct ib_perf *pmp,
992 struct ib_device *ibdev, u8 port)
993{
994 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
995 pmp->data;
996 struct ipath_ibdev *dev = to_idev(ibdev);
997 struct ipath_layer_counters cntrs;
998
999 /*
1000 * Since the HW doesn't support clearing counters, we save the
1001 * current count and subtract it from future responses.
1002 */
1003 ipath_layer_get_counters(dev->dd, &cntrs);
1004
1005 if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
1006 dev->n_symbol_error_counter = cntrs.symbol_error_counter;
1007
1008 if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
1009 dev->n_link_error_recovery_counter =
1010 cntrs.link_error_recovery_counter;
1011
1012 if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
1013 dev->n_link_downed_counter = cntrs.link_downed_counter;
1014
1015 if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
1016 dev->n_port_rcv_errors =
1017 cntrs.port_rcv_errors + dev->rcv_errors;
1018
1019 if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
1020 dev->n_port_rcv_remphys_errors =
1021 cntrs.port_rcv_remphys_errors;
1022
1023 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
1024 dev->n_port_xmit_discards = cntrs.port_xmit_discards;
1025
1026 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
1027 dev->n_port_xmit_data = cntrs.port_xmit_data;
1028
1029 if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
1030 dev->n_port_rcv_data = cntrs.port_rcv_data;
1031
1032 if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
1033 dev->n_port_xmit_packets = cntrs.port_xmit_packets;
1034
1035 if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
1036 dev->n_port_rcv_packets = cntrs.port_rcv_packets;
1037
1038 return recv_pma_get_portcounters(pmp, ibdev, port);
1039}
1040
1041static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
1042 struct ib_device *ibdev, u8 port)
1043{
1044 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
1045 pmp->data;
1046 struct ipath_ibdev *dev = to_idev(ibdev);
1047 u64 swords, rwords, spkts, rpkts, xwait;
1048
1049 ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
1050 &rpkts, &xwait);
1051
1052 if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
1053 dev->n_port_xmit_data = swords;
1054
1055 if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
1056 dev->n_port_rcv_data = rwords;
1057
1058 if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
1059 dev->n_port_xmit_packets = spkts;
1060
1061 if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
1062 dev->n_port_rcv_packets = rpkts;
1063
1064 if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
1065 dev->n_unicast_xmit = 0;
1066
1067 if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
1068 dev->n_unicast_rcv = 0;
1069
1070 if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
1071 dev->n_multicast_xmit = 0;
1072
1073 if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
1074 dev->n_multicast_rcv = 0;
1075
1076 return recv_pma_get_portcounters_ext(pmp, ibdev, port);
1077}
1078
1079static int process_subn(struct ib_device *ibdev, int mad_flags,
1080 u8 port_num, struct ib_mad *in_mad,
1081 struct ib_mad *out_mad)
1082{
1083 struct ib_smp *smp = (struct ib_smp *)out_mad;
1084 struct ipath_ibdev *dev = to_idev(ibdev);
1085 int ret;
1086
1087 *out_mad = *in_mad;
1088 if (smp->class_version != 1) {
1089 smp->status |= IB_SMP_UNSUP_VERSION;
1090 ret = reply(smp);
1091 goto bail;
1092 }
1093
1094 /* Is the mkey in the process of expiring? */
1095 if (dev->mkey_lease_timeout && jiffies >= dev->mkey_lease_timeout) {
1096 /* Clear timeout and mkey protection field. */
1097 dev->mkey_lease_timeout = 0;
1098 dev->mkeyprot_resv_lmc &= 0x3F;
1099 }
1100
1101 /*
1102 * M_Key checking depends on
1103 * Portinfo:M_Key_protect_bits
1104 */
1105 if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 &&
1106 dev->mkey != smp->mkey &&
1107 (smp->method == IB_MGMT_METHOD_SET ||
1108 (smp->method == IB_MGMT_METHOD_GET &&
1109 (dev->mkeyprot_resv_lmc >> 7) != 0))) {
1110 if (dev->mkey_violations != 0xFFFF)
1111 ++dev->mkey_violations;
1112 if (dev->mkey_lease_timeout ||
1113 dev->mkey_lease_period == 0) {
1114 ret = IB_MAD_RESULT_SUCCESS |
1115 IB_MAD_RESULT_CONSUMED;
1116 goto bail;
1117 }
1118 dev->mkey_lease_timeout = jiffies +
1119 dev->mkey_lease_period * HZ;
1120 /* Future: Generate a trap notice. */
1121 ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1122 goto bail;
1123 } else if (dev->mkey_lease_timeout)
1124 dev->mkey_lease_timeout = 0;
1125
1126 switch (smp->method) {
1127 case IB_MGMT_METHOD_GET:
1128 switch (smp->attr_id) {
1129 case IB_SMP_ATTR_NODE_DESC:
1130 ret = recv_subn_get_nodedescription(smp, ibdev);
1131 goto bail;
1132 case IB_SMP_ATTR_NODE_INFO:
1133 ret = recv_subn_get_nodeinfo(smp, ibdev, port_num);
1134 goto bail;
1135 case IB_SMP_ATTR_GUID_INFO:
1136 ret = recv_subn_get_guidinfo(smp, ibdev);
1137 goto bail;
1138 case IB_SMP_ATTR_PORT_INFO:
1139 ret = recv_subn_get_portinfo(smp, ibdev, port_num);
1140 goto bail;
1141 case IB_SMP_ATTR_PKEY_TABLE:
1142 ret = recv_subn_get_pkeytable(smp, ibdev);
1143 goto bail;
1144 case IB_SMP_ATTR_SM_INFO:
1145 if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
1146 ret = IB_MAD_RESULT_SUCCESS |
1147 IB_MAD_RESULT_CONSUMED;
1148 goto bail;
1149 }
1150 if (dev->port_cap_flags & IB_PORT_SM) {
1151 ret = IB_MAD_RESULT_SUCCESS;
1152 goto bail;
1153 }
1154 /* FALLTHROUGH */
1155 default:
1156 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1157 ret = reply(smp);
1158 goto bail;
1159 }
1160
1161 case IB_MGMT_METHOD_SET:
1162 switch (smp->attr_id) {
1163 case IB_SMP_ATTR_GUID_INFO:
1164 ret = recv_subn_set_guidinfo(smp, ibdev);
1165 goto bail;
1166 case IB_SMP_ATTR_PORT_INFO:
1167 ret = recv_subn_set_portinfo(smp, ibdev, port_num);
1168 goto bail;
1169 case IB_SMP_ATTR_PKEY_TABLE:
1170 ret = recv_subn_set_pkeytable(smp, ibdev);
1171 goto bail;
1172 case IB_SMP_ATTR_SM_INFO:
1173 if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
1174 ret = IB_MAD_RESULT_SUCCESS |
1175 IB_MAD_RESULT_CONSUMED;
1176 goto bail;
1177 }
1178 if (dev->port_cap_flags & IB_PORT_SM) {
1179 ret = IB_MAD_RESULT_SUCCESS;
1180 goto bail;
1181 }
1182 /* FALLTHROUGH */
1183 default:
1184 smp->status |= IB_SMP_UNSUP_METH_ATTR;
1185 ret = reply(smp);
1186 goto bail;
1187 }
1188
1189 case IB_MGMT_METHOD_GET_RESP:
1190 /*
1191 * The ib_mad module will call us to process responses
1192 * before checking for other consumers.
1193 * Just tell the caller to process it normally.
1194 */
1195 ret = IB_MAD_RESULT_FAILURE;
1196 goto bail;
1197 default:
1198 smp->status |= IB_SMP_UNSUP_METHOD;
1199 ret = reply(smp);
1200 }
1201
1202bail:
1203 return ret;
1204}
1205
1206static int process_perf(struct ib_device *ibdev, u8 port_num,
1207 struct ib_mad *in_mad,
1208 struct ib_mad *out_mad)
1209{
1210 struct ib_perf *pmp = (struct ib_perf *)out_mad;
1211 int ret;
1212
1213 *out_mad = *in_mad;
1214 if (pmp->class_version != 1) {
1215 pmp->status |= IB_SMP_UNSUP_VERSION;
1216 ret = reply((struct ib_smp *) pmp);
1217 goto bail;
1218 }
1219
1220 switch (pmp->method) {
1221 case IB_MGMT_METHOD_GET:
1222 switch (pmp->attr_id) {
1223 case IB_PMA_CLASS_PORT_INFO:
1224 ret = recv_pma_get_classportinfo(pmp);
1225 goto bail;
1226 case IB_PMA_PORT_SAMPLES_CONTROL:
1227 ret = recv_pma_get_portsamplescontrol(pmp, ibdev,
1228 port_num);
1229 goto bail;
1230 case IB_PMA_PORT_SAMPLES_RESULT:
1231 ret = recv_pma_get_portsamplesresult(pmp, ibdev);
1232 goto bail;
1233 case IB_PMA_PORT_SAMPLES_RESULT_EXT:
1234 ret = recv_pma_get_portsamplesresult_ext(pmp,
1235 ibdev);
1236 goto bail;
1237 case IB_PMA_PORT_COUNTERS:
1238 ret = recv_pma_get_portcounters(pmp, ibdev,
1239 port_num);
1240 goto bail;
1241 case IB_PMA_PORT_COUNTERS_EXT:
1242 ret = recv_pma_get_portcounters_ext(pmp, ibdev,
1243 port_num);
1244 goto bail;
1245 default:
1246 pmp->status |= IB_SMP_UNSUP_METH_ATTR;
1247 ret = reply((struct ib_smp *) pmp);
1248 goto bail;
1249 }
1250
1251 case IB_MGMT_METHOD_SET:
1252 switch (pmp->attr_id) {
1253 case IB_PMA_PORT_SAMPLES_CONTROL:
1254 ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
1255 port_num);
1256 goto bail;
1257 case IB_PMA_PORT_COUNTERS:
1258 ret = recv_pma_set_portcounters(pmp, ibdev,
1259 port_num);
1260 goto bail;
1261 case IB_PMA_PORT_COUNTERS_EXT:
1262 ret = recv_pma_set_portcounters_ext(pmp, ibdev,
1263 port_num);
1264 goto bail;
1265 default:
1266 pmp->status |= IB_SMP_UNSUP_METH_ATTR;
1267 ret = reply((struct ib_smp *) pmp);
1268 goto bail;
1269 }
1270
1271 case IB_MGMT_METHOD_GET_RESP:
1272 /*
1273 * The ib_mad module will call us to process responses
1274 * before checking for other consumers.
1275 * Just tell the caller to process it normally.
1276 */
1277 ret = IB_MAD_RESULT_FAILURE;
1278 goto bail;
1279 default:
1280 pmp->status |= IB_SMP_UNSUP_METHOD;
1281 ret = reply((struct ib_smp *) pmp);
1282 }
1283
1284bail:
1285 return ret;
1286}
1287
1288/**
1289 * ipath_process_mad - process an incoming MAD packet
1290 * @ibdev: the infiniband device this packet came in on
1291 * @mad_flags: MAD flags
1292 * @port_num: the port number this packet came in on
1293 * @in_wc: the work completion entry for this packet
1294 * @in_grh: the global route header for this packet
1295 * @in_mad: the incoming MAD
1296 * @out_mad: any outgoing MAD reply
1297 *
1298 * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
1299 * interested in processing.
1300 *
1301 * Note that the verbs framework has already done the MAD sanity checks,
1302 * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
1303 * MADs.
1304 *
1305 * This is called by the ib_mad module.
1306 */
1307int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
1308 struct ib_wc *in_wc, struct ib_grh *in_grh,
1309 struct ib_mad *in_mad, struct ib_mad *out_mad)
1310{
1311 struct ipath_ibdev *dev = to_idev(ibdev);
1312 int ret;
1313
1314 /*
1315 * Snapshot current HW counters to "clear" them.
1316 * This should be done when the driver is loaded except that for
1317 * some reason we get a zillion errors when brining up the link.
1318 */
1319 if (dev->rcv_errors == 0) {
1320 struct ipath_layer_counters cntrs;
1321
1322 ipath_layer_get_counters(to_idev(ibdev)->dd, &cntrs);
1323 dev->rcv_errors++;
1324 dev->n_symbol_error_counter = cntrs.symbol_error_counter;
1325 dev->n_link_error_recovery_counter =
1326 cntrs.link_error_recovery_counter;
1327 dev->n_link_downed_counter = cntrs.link_downed_counter;
1328 dev->n_port_rcv_errors = cntrs.port_rcv_errors + 1;
1329 dev->n_port_rcv_remphys_errors =
1330 cntrs.port_rcv_remphys_errors;
1331 dev->n_port_xmit_discards = cntrs.port_xmit_discards;
1332 dev->n_port_xmit_data = cntrs.port_xmit_data;
1333 dev->n_port_rcv_data = cntrs.port_rcv_data;
1334 dev->n_port_xmit_packets = cntrs.port_xmit_packets;
1335 dev->n_port_rcv_packets = cntrs.port_rcv_packets;
1336 }
1337 switch (in_mad->mad_hdr.mgmt_class) {
1338 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
1339 case IB_MGMT_CLASS_SUBN_LID_ROUTED:
1340 ret = process_subn(ibdev, mad_flags, port_num,
1341 in_mad, out_mad);
1342 goto bail;
1343 case IB_MGMT_CLASS_PERF_MGMT:
1344 ret = process_perf(ibdev, port_num, in_mad, out_mad);
1345 goto bail;
1346 default:
1347 ret = IB_MAD_RESULT_SUCCESS;
1348 }
1349
1350bail:
1351 return ret;
1352}
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
new file mode 100644
index 000000000000..69ffec66d45d
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -0,0 +1,383 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_pack.h>
34#include <rdma/ib_smi.h>
35
36#include "ipath_verbs.h"
37
38/**
39 * ipath_get_dma_mr - get a DMA memory region
40 * @pd: protection domain for this memory region
41 * @acc: access flags
42 *
43 * Returns the memory region on success, otherwise returns an errno.
44 */
45struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
46{
47 struct ipath_mr *mr;
48 struct ib_mr *ret;
49
50 mr = kzalloc(sizeof *mr, GFP_KERNEL);
51 if (!mr) {
52 ret = ERR_PTR(-ENOMEM);
53 goto bail;
54 }
55
56 mr->mr.access_flags = acc;
57 ret = &mr->ibmr;
58
59bail:
60 return ret;
61}
62
63static struct ipath_mr *alloc_mr(int count,
64 struct ipath_lkey_table *lk_table)
65{
66 struct ipath_mr *mr;
67 int m, i = 0;
68
69 /* Allocate struct plus pointers to first level page tables. */
70 m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
71 mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
72 if (!mr)
73 goto done;
74
75 /* Allocate first level page tables. */
76 for (; i < m; i++) {
77 mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
78 if (!mr->mr.map[i])
79 goto bail;
80 }
81 mr->mr.mapsz = m;
82
83 /*
84 * ib_reg_phys_mr() will initialize mr->ibmr except for
85 * lkey and rkey.
86 */
87 if (!ipath_alloc_lkey(lk_table, &mr->mr))
88 goto bail;
89 mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
90
91 goto done;
92
93bail:
94 while (i) {
95 i--;
96 kfree(mr->mr.map[i]);
97 }
98 kfree(mr);
99 mr = NULL;
100
101done:
102 return mr;
103}
104
105/**
106 * ipath_reg_phys_mr - register a physical memory region
107 * @pd: protection domain for this memory region
108 * @buffer_list: pointer to the list of physical buffers to register
109 * @num_phys_buf: the number of physical buffers to register
110 * @iova_start: the starting address passed over IB which maps to this MR
111 *
112 * Returns the memory region on success, otherwise returns an errno.
113 */
114struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
115 struct ib_phys_buf *buffer_list,
116 int num_phys_buf, int acc, u64 *iova_start)
117{
118 struct ipath_mr *mr;
119 int n, m, i;
120 struct ib_mr *ret;
121
122 mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
123 if (mr == NULL) {
124 ret = ERR_PTR(-ENOMEM);
125 goto bail;
126 }
127
128 mr->mr.user_base = *iova_start;
129 mr->mr.iova = *iova_start;
130 mr->mr.length = 0;
131 mr->mr.offset = 0;
132 mr->mr.access_flags = acc;
133 mr->mr.max_segs = num_phys_buf;
134
135 m = 0;
136 n = 0;
137 for (i = 0; i < num_phys_buf; i++) {
138 mr->mr.map[m]->segs[n].vaddr =
139 phys_to_virt(buffer_list[i].addr);
140 mr->mr.map[m]->segs[n].length = buffer_list[i].size;
141 mr->mr.length += buffer_list[i].size;
142 n++;
143 if (n == IPATH_SEGSZ) {
144 m++;
145 n = 0;
146 }
147 }
148
149 ret = &mr->ibmr;
150
151bail:
152 return ret;
153}
154
155/**
156 * ipath_reg_user_mr - register a userspace memory region
157 * @pd: protection domain for this memory region
158 * @region: the user memory region
159 * @mr_access_flags: access flags for this memory region
160 * @udata: unused by the InfiniPath driver
161 *
162 * Returns the memory region on success, otherwise returns an errno.
163 */
164struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
165 int mr_access_flags, struct ib_udata *udata)
166{
167 struct ipath_mr *mr;
168 struct ib_umem_chunk *chunk;
169 int n, m, i;
170 struct ib_mr *ret;
171
172 n = 0;
173 list_for_each_entry(chunk, &region->chunk_list, list)
174 n += chunk->nents;
175
176 mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
177 if (!mr) {
178 ret = ERR_PTR(-ENOMEM);
179 goto bail;
180 }
181
182 mr->mr.user_base = region->user_base;
183 mr->mr.iova = region->virt_base;
184 mr->mr.length = region->length;
185 mr->mr.offset = region->offset;
186 mr->mr.access_flags = mr_access_flags;
187 mr->mr.max_segs = n;
188
189 m = 0;
190 n = 0;
191 list_for_each_entry(chunk, &region->chunk_list, list) {
192 for (i = 0; i < chunk->nmap; i++) {
193 mr->mr.map[m]->segs[n].vaddr =
194 page_address(chunk->page_list[i].page);
195 mr->mr.map[m]->segs[n].length = region->page_size;
196 n++;
197 if (n == IPATH_SEGSZ) {
198 m++;
199 n = 0;
200 }
201 }
202 }
203 ret = &mr->ibmr;
204
205bail:
206 return ret;
207}
208
209/**
210 * ipath_dereg_mr - unregister and free a memory region
211 * @ibmr: the memory region to free
212 *
213 * Returns 0 on success.
214 *
215 * Note that this is called to free MRs created by ipath_get_dma_mr()
216 * or ipath_reg_user_mr().
217 */
218int ipath_dereg_mr(struct ib_mr *ibmr)
219{
220 struct ipath_mr *mr = to_imr(ibmr);
221 int i;
222
223 ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey);
224 i = mr->mr.mapsz;
225 while (i) {
226 i--;
227 kfree(mr->mr.map[i]);
228 }
229 kfree(mr);
230 return 0;
231}
232
233/**
234 * ipath_alloc_fmr - allocate a fast memory region
235 * @pd: the protection domain for this memory region
236 * @mr_access_flags: access flags for this memory region
237 * @fmr_attr: fast memory region attributes
238 *
239 * Returns the memory region on success, otherwise returns an errno.
240 */
241struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
242 struct ib_fmr_attr *fmr_attr)
243{
244 struct ipath_fmr *fmr;
245 int m, i = 0;
246 struct ib_fmr *ret;
247
248 /* Allocate struct plus pointers to first level page tables. */
249 m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
250 fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
251 if (!fmr)
252 goto bail;
253
254 /* Allocate first level page tables. */
255 for (; i < m; i++) {
256 fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
257 GFP_KERNEL);
258 if (!fmr->mr.map[i])
259 goto bail;
260 }
261 fmr->mr.mapsz = m;
262
263 /*
264 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
265 * rkey.
266 */
267 if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
268 goto bail;
269 fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey;
270 /*
271 * Resources are allocated but no valid mapping (RKEY can't be
272 * used).
273 */
274 fmr->mr.user_base = 0;
275 fmr->mr.iova = 0;
276 fmr->mr.length = 0;
277 fmr->mr.offset = 0;
278 fmr->mr.access_flags = mr_access_flags;
279 fmr->mr.max_segs = fmr_attr->max_pages;
280 fmr->page_shift = fmr_attr->page_shift;
281
282 ret = &fmr->ibfmr;
283 goto done;
284
285bail:
286 while (i)
287 kfree(fmr->mr.map[--i]);
288 kfree(fmr);
289 ret = ERR_PTR(-ENOMEM);
290
291done:
292 return ret;
293}
294
295/**
296 * ipath_map_phys_fmr - set up a fast memory region
297 * @ibmfr: the fast memory region to set up
298 * @page_list: the list of pages to associate with the fast memory region
299 * @list_len: the number of pages to associate with the fast memory region
300 * @iova: the virtual address of the start of the fast memory region
301 *
302 * This may be called from interrupt context.
303 */
304
305int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
306 int list_len, u64 iova)
307{
308 struct ipath_fmr *fmr = to_ifmr(ibfmr);
309 struct ipath_lkey_table *rkt;
310 unsigned long flags;
311 int m, n, i;
312 u32 ps;
313 int ret;
314
315 if (list_len > fmr->mr.max_segs) {
316 ret = -EINVAL;
317 goto bail;
318 }
319 rkt = &to_idev(ibfmr->device)->lk_table;
320 spin_lock_irqsave(&rkt->lock, flags);
321 fmr->mr.user_base = iova;
322 fmr->mr.iova = iova;
323 ps = 1 << fmr->page_shift;
324 fmr->mr.length = list_len * ps;
325 m = 0;
326 n = 0;
327 ps = 1 << fmr->page_shift;
328 for (i = 0; i < list_len; i++) {
329 fmr->mr.map[m]->segs[n].vaddr = phys_to_virt(page_list[i]);
330 fmr->mr.map[m]->segs[n].length = ps;
331 if (++n == IPATH_SEGSZ) {
332 m++;
333 n = 0;
334 }
335 }
336 spin_unlock_irqrestore(&rkt->lock, flags);
337 ret = 0;
338
339bail:
340 return ret;
341}
342
343/**
344 * ipath_unmap_fmr - unmap fast memory regions
345 * @fmr_list: the list of fast memory regions to unmap
346 *
347 * Returns 0 on success.
348 */
349int ipath_unmap_fmr(struct list_head *fmr_list)
350{
351 struct ipath_fmr *fmr;
352 struct ipath_lkey_table *rkt;
353 unsigned long flags;
354
355 list_for_each_entry(fmr, fmr_list, ibfmr.list) {
356 rkt = &to_idev(fmr->ibfmr.device)->lk_table;
357 spin_lock_irqsave(&rkt->lock, flags);
358 fmr->mr.user_base = 0;
359 fmr->mr.iova = 0;
360 fmr->mr.length = 0;
361 spin_unlock_irqrestore(&rkt->lock, flags);
362 }
363 return 0;
364}
365
366/**
367 * ipath_dealloc_fmr - deallocate a fast memory region
368 * @ibfmr: the fast memory region to deallocate
369 *
370 * Returns 0 on success.
371 */
372int ipath_dealloc_fmr(struct ib_fmr *ibfmr)
373{
374 struct ipath_fmr *fmr = to_ifmr(ibfmr);
375 int i;
376
377 ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey);
378 i = fmr->mr.mapsz;
379 while (i)
380 kfree(fmr->mr.map[--i]);
381 kfree(fmr);
382 return 0;
383}
diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_pe800.c
new file mode 100644
index 000000000000..e1dc4f757062
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_pe800.c
@@ -0,0 +1,1247 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32/*
33 * This file contains all of the code that is specific to the
34 * InfiniPath PE-800 chip.
35 */
36
37#include <linux/interrupt.h>
38#include <linux/pci.h>
39#include <linux/delay.h>
40
41
42#include "ipath_kernel.h"
43#include "ipath_registers.h"
44
45/*
46 * This file contains all the chip-specific register information and
47 * access functions for the PathScale PE800, the PCI-Express chip.
48 *
49 * This lists the InfiniPath PE800 registers, in the actual chip layout.
50 * This structure should never be directly accessed.
51 */
52struct _infinipath_do_not_use_kernel_regs {
53 unsigned long long Revision;
54 unsigned long long Control;
55 unsigned long long PageAlign;
56 unsigned long long PortCnt;
57 unsigned long long DebugPortSelect;
58 unsigned long long Reserved0;
59 unsigned long long SendRegBase;
60 unsigned long long UserRegBase;
61 unsigned long long CounterRegBase;
62 unsigned long long Scratch;
63 unsigned long long Reserved1;
64 unsigned long long Reserved2;
65 unsigned long long IntBlocked;
66 unsigned long long IntMask;
67 unsigned long long IntStatus;
68 unsigned long long IntClear;
69 unsigned long long ErrorMask;
70 unsigned long long ErrorStatus;
71 unsigned long long ErrorClear;
72 unsigned long long HwErrMask;
73 unsigned long long HwErrStatus;
74 unsigned long long HwErrClear;
75 unsigned long long HwDiagCtrl;
76 unsigned long long MDIO;
77 unsigned long long IBCStatus;
78 unsigned long long IBCCtrl;
79 unsigned long long ExtStatus;
80 unsigned long long ExtCtrl;
81 unsigned long long GPIOOut;
82 unsigned long long GPIOMask;
83 unsigned long long GPIOStatus;
84 unsigned long long GPIOClear;
85 unsigned long long RcvCtrl;
86 unsigned long long RcvBTHQP;
87 unsigned long long RcvHdrSize;
88 unsigned long long RcvHdrCnt;
89 unsigned long long RcvHdrEntSize;
90 unsigned long long RcvTIDBase;
91 unsigned long long RcvTIDCnt;
92 unsigned long long RcvEgrBase;
93 unsigned long long RcvEgrCnt;
94 unsigned long long RcvBufBase;
95 unsigned long long RcvBufSize;
96 unsigned long long RxIntMemBase;
97 unsigned long long RxIntMemSize;
98 unsigned long long RcvPartitionKey;
99 unsigned long long Reserved3;
100 unsigned long long RcvPktLEDCnt;
101 unsigned long long Reserved4[8];
102 unsigned long long SendCtrl;
103 unsigned long long SendPIOBufBase;
104 unsigned long long SendPIOSize;
105 unsigned long long SendPIOBufCnt;
106 unsigned long long SendPIOAvailAddr;
107 unsigned long long TxIntMemBase;
108 unsigned long long TxIntMemSize;
109 unsigned long long Reserved5;
110 unsigned long long PCIeRBufTestReg0;
111 unsigned long long PCIeRBufTestReg1;
112 unsigned long long Reserved51[6];
113 unsigned long long SendBufferError;
114 unsigned long long SendBufferErrorCONT1;
115 unsigned long long Reserved6SBE[6];
116 unsigned long long RcvHdrAddr0;
117 unsigned long long RcvHdrAddr1;
118 unsigned long long RcvHdrAddr2;
119 unsigned long long RcvHdrAddr3;
120 unsigned long long RcvHdrAddr4;
121 unsigned long long Reserved7RHA[11];
122 unsigned long long RcvHdrTailAddr0;
123 unsigned long long RcvHdrTailAddr1;
124 unsigned long long RcvHdrTailAddr2;
125 unsigned long long RcvHdrTailAddr3;
126 unsigned long long RcvHdrTailAddr4;
127 unsigned long long Reserved8RHTA[11];
128 unsigned long long Reserved9SW[8];
129 unsigned long long SerdesConfig0;
130 unsigned long long SerdesConfig1;
131 unsigned long long SerdesStatus;
132 unsigned long long XGXSConfig;
133 unsigned long long IBPLLCfg;
134 unsigned long long Reserved10SW2[3];
135 unsigned long long PCIEQ0SerdesConfig0;
136 unsigned long long PCIEQ0SerdesConfig1;
137 unsigned long long PCIEQ0SerdesStatus;
138 unsigned long long Reserved11;
139 unsigned long long PCIEQ1SerdesConfig0;
140 unsigned long long PCIEQ1SerdesConfig1;
141 unsigned long long PCIEQ1SerdesStatus;
142 unsigned long long Reserved12;
143};
144
145#define IPATH_KREG_OFFSET(field) (offsetof(struct \
146 _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
147#define IPATH_CREG_OFFSET(field) (offsetof( \
148 struct infinipath_counters, field) / sizeof(u64))
149
150static const struct ipath_kregs ipath_pe_kregs = {
151 .kr_control = IPATH_KREG_OFFSET(Control),
152 .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
153 .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
154 .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
155 .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
156 .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
157 .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
158 .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
159 .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
160 .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
161 .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
162 .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
163 .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
164 .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
165 .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
166 .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
167 .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
168 .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
169 .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
170 .kr_intclear = IPATH_KREG_OFFSET(IntClear),
171 .kr_intmask = IPATH_KREG_OFFSET(IntMask),
172 .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
173 .kr_mdio = IPATH_KREG_OFFSET(MDIO),
174 .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
175 .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
176 .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
177 .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
178 .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
179 .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
180 .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
181 .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
182 .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
183 .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
184 .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
185 .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
186 .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
187 .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
188 .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
189 .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
190 .kr_revision = IPATH_KREG_OFFSET(Revision),
191 .kr_scratch = IPATH_KREG_OFFSET(Scratch),
192 .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
193 .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
194 .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
195 .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
196 .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
197 .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
198 .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
199 .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
200 .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
201 .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
202 .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
203 .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
204 .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
205 .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
206 .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
207
208 /*
209 * These should not be used directly via ipath_read_kreg64(),
210 * use them with ipath_read_kreg64_port()
211 */
212 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
213 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
214
215 /* This group is pe-800-specific; and used only in this file */
216 /* The rcvpktled register controls one of the debug port signals, so
217 * a packet activity LED can be connected to it. */
218 .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
219 .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0),
220 .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1),
221 .kr_pcieq0serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig0),
222 .kr_pcieq0serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig1),
223 .kr_pcieq0serdesstatus = IPATH_KREG_OFFSET(PCIEQ0SerdesStatus),
224 .kr_pcieq1serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig0),
225 .kr_pcieq1serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig1),
226 .kr_pcieq1serdesstatus = IPATH_KREG_OFFSET(PCIEQ1SerdesStatus)
227};
228
229static const struct ipath_cregs ipath_pe_cregs = {
230 .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
231 .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
232 .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
233 .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
234 .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
235 .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
236 .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
237 .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
238 .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
239 .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
240 .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
241 .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
242 .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
243 .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
244 .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
245 .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
246 .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
247 .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
248 .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
249 .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
250 .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
251 .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
252 .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
253 .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
254 .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
255 .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
256 .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
257 .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
258 .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
259 .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
260 .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
261 .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
262 .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
263};
264
265/* kr_intstatus, kr_intclear, kr_intmask bits */
266#define INFINIPATH_I_RCVURG_MASK 0x1F
267#define INFINIPATH_I_RCVAVAIL_MASK 0x1F
268
269/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
270#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL
271#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0
272#define INFINIPATH_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL
273#define INFINIPATH_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL
274#define INFINIPATH_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL
275#define INFINIPATH_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL
276#define INFINIPATH_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL
277#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
278#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
279#define INFINIPATH_HWE_PCIE1PLLFAILED 0x0400000000000000ULL
280#define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL
281#define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL
282
283/* kr_extstatus bits */
284#define INFINIPATH_EXTS_FREQSEL 0x2
285#define INFINIPATH_EXTS_SERDESSEL 0x4
286#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
287#define INFINIPATH_EXTS_MEMBIST_FOUND 0x0000000000008000
288
289#define _IPATH_GPIO_SDA_NUM 1
290#define _IPATH_GPIO_SCL_NUM 0
291
292#define IPATH_GPIO_SDA (1ULL << \
293 (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
294#define IPATH_GPIO_SCL (1ULL << \
295 (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
296
297/**
298 * ipath_pe_handle_hwerrors - display hardware errors.
299 * @dd: the infinipath device
300 * @msg: the output buffer
301 * @msgl: the size of the output buffer
302 *
303 * Use same msg buffer as regular errors to avoid excessive stack
304 * use. Most hardware errors are catastrophic, but for right now,
305 * we'll print them and continue. We reuse the same message buffer as
306 * ipath_handle_errors() to avoid excessive stack usage.
307 */
308static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
309 size_t msgl)
310{
311 ipath_err_t hwerrs;
312 u32 bits, ctrl;
313 int isfatal = 0;
314 char bitsmsg[64];
315
316 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
317 if (!hwerrs) {
318 /*
319 * better than printing cofusing messages
320 * This seems to be related to clearing the crc error, or
321 * the pll error during init.
322 */
323 ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
324 return;
325 } else if (hwerrs == ~0ULL) {
326 ipath_dev_err(dd, "Read of hardware error status failed "
327 "(all bits set); ignoring\n");
328 return;
329 }
330 ipath_stats.sps_hwerrs++;
331
332 /* Always clear the error status register, except MEMBISTFAIL,
333 * regardless of whether we continue or stop using the chip.
334 * We want that set so we know it failed, even across driver reload.
335 * We'll still ignore it in the hwerrmask. We do this partly for
336 * diagnostics, but also for support */
337 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
338 hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
339
340 hwerrs &= dd->ipath_hwerrmask;
341
342 /*
343 * make sure we get this much out, unless told to be quiet,
344 * or it's occurred within the last 5 seconds
345 */
346 if ((hwerrs & ~dd->ipath_lasthwerror) ||
347 (ipath_debug & __IPATH_VERBDBG))
348 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
349 "(cleared)\n", (unsigned long long) hwerrs);
350 dd->ipath_lasthwerror |= hwerrs;
351
352 if (hwerrs & ~infinipath_hwe_bitsextant)
353 ipath_dev_err(dd, "hwerror interrupt with unknown errors "
354 "%llx set\n", (unsigned long long)
355 (hwerrs & ~infinipath_hwe_bitsextant));
356
357 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
358 if (ctrl & INFINIPATH_C_FREEZEMODE) {
359 if (hwerrs) {
360 /*
361 * if any set that we aren't ignoring only make the
362 * complaint once, in case it's stuck or recurring,
363 * and we get here multiple times
364 */
365 if (dd->ipath_flags & IPATH_INITTED) {
366 ipath_dev_err(dd, "Fatal Error (freeze "
367 "mode), no longer usable\n");
368 isfatal = 1;
369 }
370 /*
371 * Mark as having had an error for driver, and also
372 * for /sys and status word mapped to user programs.
373 * This marks unit as not usable, until reset
374 */
375 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
376 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
377 dd->ipath_flags &= ~IPATH_INITTED;
378 } else {
379 ipath_dbg("Clearing freezemode on ignored hardware "
380 "error\n");
381 ctrl &= ~INFINIPATH_C_FREEZEMODE;
382 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
383 ctrl);
384 }
385 }
386
387 *msg = '\0';
388
389 if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
390 strlcat(msg, "[Memory BIST test failed, PE-800 unusable]",
391 msgl);
392 /* ignore from now on, so disable until driver reloaded */
393 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
394 dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
395 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
396 dd->ipath_hwerrmask);
397 }
398 if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
399 << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
400 bits = (u32) ((hwerrs >>
401 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
402 INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
403 snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
404 bits);
405 strlcat(msg, bitsmsg, msgl);
406 }
407 if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
408 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
409 bits = (u32) ((hwerrs >>
410 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
411 INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
412 snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
413 bits);
414 strlcat(msg, bitsmsg, msgl);
415 }
416 if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
417 << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
418 bits = (u32) ((hwerrs >>
419 INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) &
420 INFINIPATH_HWE_PCIEMEMPARITYERR_MASK);
421 snprintf(bitsmsg, sizeof bitsmsg,
422 "[PCIe Mem Parity Errs %x] ", bits);
423 strlcat(msg, bitsmsg, msgl);
424 }
425 if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
426 strlcat(msg, "[IB2IPATH Parity]", msgl);
427 if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
428 strlcat(msg, "[IPATH2IB Parity]", msgl);
429
430#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
431 INFINIPATH_HWE_COREPLL_RFSLIP )
432
433 if (hwerrs & _IPATH_PLL_FAIL) {
434 snprintf(bitsmsg, sizeof bitsmsg,
435 "[PLL failed (%llx), PE-800 unusable]",
436 (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
437 strlcat(msg, bitsmsg, msgl);
438 /* ignore from now on, so disable until driver reloaded */
439 dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
440 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
441 dd->ipath_hwerrmask);
442 }
443
444 if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
445 /*
446 * If it occurs, it is left masked since the eternal
447 * interface is unused
448 */
449 dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
450 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
451 dd->ipath_hwerrmask);
452 }
453
454 if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP)
455 strlcat(msg, "[PCIe Poisoned TLP]", msgl);
456 if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT)
457 strlcat(msg, "[PCIe completion timeout]", msgl);
458
459 /*
460 * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
461 * parity or memory parity error failures, because most likely we
462 * won't be able to talk to the core of the chip. Nonetheless, we
463 * might see them, if they are in parts of the PCIe core that aren't
464 * essential.
465 */
466 if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED)
467 strlcat(msg, "[PCIePLL1]", msgl);
468 if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED)
469 strlcat(msg, "[PCIePLL0]", msgl);
470 if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH)
471 strlcat(msg, "[PCIe XTLH core parity]", msgl);
472 if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM)
473 strlcat(msg, "[PCIe ADM TX core parity]", msgl);
474 if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM)
475 strlcat(msg, "[PCIe ADM RX core parity]", msgl);
476
477 if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
478 strlcat(msg, "[Rx Dsync]", msgl);
479 if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
480 strlcat(msg, "[SerDes PLL]", msgl);
481
482 ipath_dev_err(dd, "%s hardware error\n", msg);
483 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
484 /*
485 * for /sys status file ; if no trailing } is copied, we'll
486 * know it was truncated.
487 */
488 snprintf(dd->ipath_freezemsg, dd->ipath_freezelen,
489 "{%s}", msg);
490 }
491}
492
493/**
494 * ipath_pe_boardname - fill in the board name
495 * @dd: the infinipath device
496 * @name: the output buffer
497 * @namelen: the size of the output buffer
498 *
499 * info is based on the board revision register
500 */
501static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
502 size_t namelen)
503{
504 char *n = NULL;
505 u8 boardrev = dd->ipath_boardrev;
506 int ret;
507
508 switch (boardrev) {
509 case 0:
510 n = "InfiniPath_Emulation";
511 break;
512 case 1:
513 n = "InfiniPath_PE-800-Bringup";
514 break;
515 case 2:
516 n = "InfiniPath_PE-880";
517 break;
518 case 3:
519 n = "InfiniPath_PE-850";
520 break;
521 case 4:
522 n = "InfiniPath_PE-860";
523 break;
524 default:
525 ipath_dev_err(dd,
526 "Don't yet know about board with ID %u\n",
527 boardrev);
528 snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u",
529 boardrev);
530 break;
531 }
532 if (n)
533 snprintf(name, namelen, "%s", n);
534
535 if (dd->ipath_majrev != 4 || dd->ipath_minrev != 1) {
536 ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n",
537 dd->ipath_majrev, dd->ipath_minrev);
538 ret = 1;
539 } else
540 ret = 0;
541
542 return ret;
543}
544
545/**
546 * ipath_pe_init_hwerrors - enable hardware errors
547 * @dd: the infinipath device
548 *
549 * now that we have finished initializing everything that might reasonably
550 * cause a hardware error, and cleared those errors bits as they occur,
551 * we can enable hardware errors in the mask (potentially enabling
552 * freeze mode), and enable hardware errors as errors (along with
553 * everything else) in errormask
554 */
555static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
556{
557 ipath_err_t val;
558 u64 extsval;
559
560 extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
561
562 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
563 ipath_dev_err(dd, "MemBIST did not complete!\n");
564
565 val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
566
567 if (!dd->ipath_boardrev) // no PLL for Emulator
568 val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
569
570 /* workaround bug 9460 in internal interface bus parity checking */
571 val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
572
573 dd->ipath_hwerrmask = val;
574}
575
576/**
577 * ipath_pe_bringup_serdes - bring up the serdes
578 * @dd: the infinipath device
579 */
580static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
581{
582 u64 val, tmp, config1;
583 int ret = 0, change = 0;
584
585 ipath_dbg("Trying to bringup serdes\n");
586
587 if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
588 INFINIPATH_HWE_SERDESPLLFAILED) {
589 ipath_dbg("At start, serdes PLL failed bit set "
590 "in hwerrstatus, clearing and continuing\n");
591 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
592 INFINIPATH_HWE_SERDESPLLFAILED);
593 }
594
595 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
596 config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
597
598 ipath_cdbg(VERBOSE, "SerDes status config0=%llx config1=%llx, "
599 "xgxsconfig %llx\n", (unsigned long long) val,
600 (unsigned long long) config1, (unsigned long long)
601 ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
602
603 /*
604 * Force reset on, also set rxdetect enable. Must do before reading
605 * serdesstatus at least for simulation, or some of the bits in
606 * serdes status will come back as undefined and cause simulation
607 * failures
608 */
609 val |= INFINIPATH_SERDC0_RESET_PLL | INFINIPATH_SERDC0_RXDETECT_EN
610 | INFINIPATH_SERDC0_L1PWR_DN;
611 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
612 /* be sure chip saw it */
613 tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
614 udelay(5); /* need pll reset set at least for a bit */
615 /*
616 * after PLL is reset, set the per-lane Resets and TxIdle and
617 * clear the PLL reset and rxdetect (to get falling edge).
618 * Leave L1PWR bits set (permanently)
619 */
620 val &= ~(INFINIPATH_SERDC0_RXDETECT_EN | INFINIPATH_SERDC0_RESET_PLL
621 | INFINIPATH_SERDC0_L1PWR_DN);
622 val |= INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE;
623 ipath_cdbg(VERBOSE, "Clearing pll reset and setting lane resets "
624 "and txidle (%llx)\n", (unsigned long long) val);
625 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
626 /* be sure chip saw it */
627 tmp = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
628 /* need PLL reset clear for at least 11 usec before lane
629 * resets cleared; give it a few more to be sure */
630 udelay(15);
631 val &= ~(INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE);
632
633 ipath_cdbg(VERBOSE, "Clearing lane resets and txidle "
634 "(writing %llx)\n", (unsigned long long) val);
635 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
636 /* be sure chip saw it */
637 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
638
639 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
640 if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
641 INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
642 val &=
643 ~(INFINIPATH_XGXS_MDIOADDR_MASK <<
644 INFINIPATH_XGXS_MDIOADDR_SHIFT);
645 /* MDIO address 3 */
646 val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
647 change = 1;
648 }
649 if (val & INFINIPATH_XGXS_RESET) {
650 val &= ~INFINIPATH_XGXS_RESET;
651 change = 1;
652 }
653 if (change)
654 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
655
656 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
657
658 /* clear current and de-emphasis bits */
659 config1 &= ~0x0ffffffff00ULL;
660 /* set current to 20ma */
661 config1 |= 0x00000000000ULL;
662 /* set de-emphasis to -5.68dB */
663 config1 |= 0x0cccc000000ULL;
664 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
665
666 ipath_cdbg(VERBOSE, "done: SerDes status config0=%llx "
667 "config1=%llx, sstatus=%llx xgxs=%llx\n",
668 (unsigned long long) val, (unsigned long long) config1,
669 (unsigned long long)
670 ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
671 (unsigned long long)
672 ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
673
674 if (!ipath_waitfor_mdio_cmdready(dd)) {
675 ipath_write_kreg(
676 dd, dd->ipath_kregs->kr_mdio,
677 ipath_mdio_req(IPATH_MDIO_CMD_READ, 31,
678 IPATH_MDIO_CTRL_XGXS_REG_8, 0));
679 if (ipath_waitfor_complete(dd, dd->ipath_kregs->kr_mdio,
680 IPATH_MDIO_DATAVALID, &val))
681 ipath_dbg("Never got MDIO data for XGXS "
682 "status read\n");
683 else
684 ipath_cdbg(VERBOSE, "MDIO Read reg8, "
685 "'bank' 31 %x\n", (u32) val);
686 } else
687 ipath_dbg("Never got MDIO cmdready for XGXS status read\n");
688
689 return ret;
690}
691
692/**
693 * ipath_pe_quiet_serdes - set serdes to txidle
694 * @dd: the infinipath device
695 * Called when driver is being unloaded
696 */
697static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
698{
699 u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
700
701 val |= INFINIPATH_SERDC0_TXIDLE;
702 ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
703 (unsigned long long) val);
704 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
705}
706
707/* this is not yet needed on the PE800, so just return 0. */
708static int ipath_pe_intconfig(struct ipath_devdata *dd)
709{
710 return 0;
711}
712
713/**
714 * ipath_setup_pe_setextled - set the state of the two external LEDs
715 * @dd: the infinipath device
716 * @lst: the L state
717 * @ltst: the LT state
718
719 * These LEDs indicate the physical and logical state of IB link.
720 * For this chip (at least with recommended board pinouts), LED1
721 * is Yellow (logical state) and LED2 is Green (physical state),
722 *
723 * Note: We try to match the Mellanox HCA LED behavior as best
724 * we can. Green indicates physical link state is OK (something is
725 * plugged in, and we can train).
726 * Amber indicates the link is logically up (ACTIVE).
727 * Mellanox further blinks the amber LED to indicate data packet
728 * activity, but we have no hardware support for that, so it would
729 * require waking up every 10-20 msecs and checking the counters
730 * on the chip, and then turning the LED off if appropriate. That's
731 * visible overhead, so not something we will do.
732 *
733 */
734static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
735 u64 ltst)
736{
737 u64 extctl;
738
739 /* the diags use the LED to indicate diag info, so we leave
740 * the external LED alone when the diags are running */
741 if (ipath_diag_inuse)
742 return;
743
744 extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
745 INFINIPATH_EXTC_LED2PRIPORT_ON);
746
747 if (ltst & INFINIPATH_IBCS_LT_STATE_LINKUP)
748 extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
749 if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
750 extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
751 dd->ipath_extctrl = extctl;
752 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
753}
754
755/**
756 * ipath_setup_pe_cleanup - clean up any per-chip chip-specific stuff
757 * @dd: the infinipath device
758 *
759 * This is called during driver unload.
760 * We do the pci_disable_msi here, not in generic code, because it
761 * isn't used for the HT-400. If we do end up needing pci_enable_msi
762 * at some point in the future for HT-400, we'll move the call back
763 * into the main init_one code.
764 */
765static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
766{
767 dd->ipath_msi_lo = 0; /* just in case unload fails */
768 pci_disable_msi(dd->pcidev);
769}
770
771/**
772 * ipath_setup_pe_config - setup PCIe config related stuff
773 * @dd: the infinipath device
774 * @pdev: the PCI device
775 *
776 * The pci_enable_msi() call will fail on systems with MSI quirks
777 * such as those with AMD8131, even if the device of interest is not
778 * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed
779 * late in 2.6.16).
780 * All that can be done is to edit the kernel source to remove the quirk
781 * check until that is fixed.
782 * We do not need to call enable_msi() for our HyperTransport chip (HT-400),
783 * even those it uses MSI, and we want to avoid the quirk warning, so
784 * So we call enable_msi only for the PE-800. If we do end up needing
785 * pci_enable_msi at some point in the future for HT-400, we'll move the
786 * call back into the main init_one code.
787 * We save the msi lo and hi values, so we can restore them after
788 * chip reset (the kernel PCI infrastructure doesn't yet handle that
789 * correctly).
790 */
791static int ipath_setup_pe_config(struct ipath_devdata *dd,
792 struct pci_dev *pdev)
793{
794 int pos, ret;
795
796 dd->ipath_msi_lo = 0; /* used as a flag during reset processing */
797 ret = pci_enable_msi(dd->pcidev);
798 if (ret)
799 ipath_dev_err(dd, "pci_enable_msi failed: %d, "
800 "interrupts may not work\n", ret);
801 /* continue even if it fails, we may still be OK... */
802
803 if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
804 u16 control;
805 pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
806 &dd->ipath_msi_lo);
807 pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
808 &dd->ipath_msi_hi);
809 pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
810 &control);
811 /* now save the data (vector) info */
812 pci_read_config_word(dd->pcidev,
813 pos + ((control & PCI_MSI_FLAGS_64BIT)
814 ? 12 : 8),
815 &dd->ipath_msi_data);
816 ipath_cdbg(VERBOSE, "Read msi data 0x%x from config offset "
817 "0x%x, control=0x%x\n", dd->ipath_msi_data,
818 pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
819 control);
820 /* we save the cachelinesize also, although it doesn't
821 * really matter */
822 pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
823 &dd->ipath_pci_cacheline);
824 } else
825 ipath_dev_err(dd, "Can't find MSI capability, "
826 "can't save MSI settings for reset\n");
827 if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP))) {
828 u16 linkstat;
829 pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
830 &linkstat);
831 linkstat >>= 4;
832 linkstat &= 0x1f;
833 if (linkstat != 8)
834 ipath_dev_err(dd, "PCIe width %u, "
835 "performance reduced\n", linkstat);
836 }
837 else
838 ipath_dev_err(dd, "Can't find PCI Express "
839 "capability!\n");
840 return 0;
841}
842
843static void ipath_init_pe_variables(void)
844{
845 /*
846 * bits for selecting i2c direction and values,
847 * used for I2C serial flash
848 */
849 ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
850 ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
851 ipath_gpio_sda = IPATH_GPIO_SDA;
852 ipath_gpio_scl = IPATH_GPIO_SCL;
853
854 /* variables for sanity checking interrupt and errors */
855 infinipath_hwe_bitsextant =
856 (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
857 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
858 (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
859 INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
860 INFINIPATH_HWE_PCIE1PLLFAILED |
861 INFINIPATH_HWE_PCIE0PLLFAILED |
862 INFINIPATH_HWE_PCIEPOISONEDTLP |
863 INFINIPATH_HWE_PCIECPLTIMEOUT |
864 INFINIPATH_HWE_PCIEBUSPARITYXTLH |
865 INFINIPATH_HWE_PCIEBUSPARITYXADM |
866 INFINIPATH_HWE_PCIEBUSPARITYRADM |
867 INFINIPATH_HWE_MEMBISTFAILED |
868 INFINIPATH_HWE_COREPLL_FBSLIP |
869 INFINIPATH_HWE_COREPLL_RFSLIP |
870 INFINIPATH_HWE_SERDESPLLFAILED |
871 INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
872 INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
873 infinipath_i_bitsextant =
874 (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
875 (INFINIPATH_I_RCVAVAIL_MASK <<
876 INFINIPATH_I_RCVAVAIL_SHIFT) |
877 INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
878 INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
879 infinipath_e_bitsextant =
880 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
881 INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
882 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
883 INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
884 INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
885 INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
886 INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
887 INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
888 INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
889 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
890 INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
891 INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
892 INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
893 INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
894 INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
895 INFINIPATH_E_HARDWARE;
896
897 infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
898 infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
899}
900
901/* setup the MSI stuff again after a reset. I'd like to just call
902 * pci_enable_msi() and request_irq() again, but when I do that,
903 * the MSI enable bit doesn't get set in the command word, and
904 * we switch to to a different interrupt vector, which is confusing,
905 * so I instead just do it all inline. Perhaps somehow can tie this
906 * into the PCIe hotplug support at some point
907 * Note, because I'm doing it all here, I don't call pci_disable_msi()
908 * or free_irq() at the start of ipath_setup_pe_reset().
909 */
910static int ipath_reinit_msi(struct ipath_devdata *dd)
911{
912 int pos;
913 u16 control;
914 int ret;
915
916 if (!dd->ipath_msi_lo) {
917 dev_info(&dd->pcidev->dev, "Can't restore MSI config, "
918 "initial setup failed?\n");
919 ret = 0;
920 goto bail;
921 }
922
923 if (!(pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
924 ipath_dev_err(dd, "Can't find MSI capability, "
925 "can't restore MSI settings\n");
926 ret = 0;
927 goto bail;
928 }
929 ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
930 dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO);
931 pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
932 dd->ipath_msi_lo);
933 ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
934 dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI);
935 pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
936 dd->ipath_msi_hi);
937 pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
938 if (!(control & PCI_MSI_FLAGS_ENABLE)) {
939 ipath_cdbg(VERBOSE, "MSI control at off %x was %x, "
940 "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS,
941 control, control | PCI_MSI_FLAGS_ENABLE);
942 control |= PCI_MSI_FLAGS_ENABLE;
943 pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
944 control);
945 }
946 /* now rewrite the data (vector) info */
947 pci_write_config_word(dd->pcidev, pos +
948 ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
949 dd->ipath_msi_data);
950 /* we restore the cachelinesize also, although it doesn't really
951 * matter */
952 pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
953 dd->ipath_pci_cacheline);
954 /* and now set the pci master bit again */
955 pci_set_master(dd->pcidev);
956 ret = 1;
957
958bail:
959 return ret;
960}
961
962/* This routine sleeps, so it can only be called from user context, not
963 * from interrupt context. If we need interrupt context, we can split
964 * it into two routines.
965*/
966static int ipath_setup_pe_reset(struct ipath_devdata *dd)
967{
968 u64 val;
969 int i;
970 int ret;
971
972 /* Use ERROR so it shows up in logs, etc. */
973 ipath_dev_err(dd, "Resetting PE-800 unit %u\n",
974 dd->ipath_unit);
975 val = dd->ipath_control | INFINIPATH_C_RESET;
976 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val);
977 mb();
978
979 for (i = 1; i <= 5; i++) {
980 int r;
981 /* allow MBIST, etc. to complete; longer on each retry.
982 * We sometimes get machine checks from bus timeout if no
983 * response, so for now, make it *really* long.
984 */
985 msleep(1000 + (1 + i) * 2000);
986 if ((r =
987 pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
988 dd->ipath_pcibar0)))
989 ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n",
990 r);
991 if ((r =
992 pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
993 dd->ipath_pcibar1)))
994 ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n",
995 r);
996 /* now re-enable memory access */
997 if ((r = pci_enable_device(dd->pcidev)))
998 ipath_dev_err(dd, "pci_enable_device failed after "
999 "reset: %d\n", r);
1000 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
1001 if (val == dd->ipath_revision) {
1002 ipath_cdbg(VERBOSE, "Got matching revision "
1003 "register %llx on try %d\n",
1004 (unsigned long long) val, i);
1005 ret = ipath_reinit_msi(dd);
1006 goto bail;
1007 }
1008 /* Probably getting -1 back */
1009 ipath_dbg("Didn't get expected revision register, "
1010 "got %llx, try %d\n", (unsigned long long) val,
1011 i + 1);
1012 }
1013 ret = 0; /* failed */
1014
1015bail:
1016 return ret;
1017}
1018
1019/**
1020 * ipath_pe_put_tid - write a TID in chip
1021 * @dd: the infinipath device
1022 * @tidptr: pointer to the expected TID (in chip) to udpate
1023 * @tidtype: 0 for eager, 1 for expected
1024 * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
1025 *
1026 * This exists as a separate routine to allow for special locking etc.
1027 * It's used for both the full cleanup on exit, as well as the normal
1028 * setup and teardown.
1029 */
1030static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
1031 u32 type, unsigned long pa)
1032{
1033 u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
1034 unsigned long flags = 0; /* keep gcc quiet */
1035
1036 if (pa != dd->ipath_tidinvalid) {
1037 if (pa & ((1U << 11) - 1)) {
1038 dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
1039 "not 4KB aligned!\n", pa);
1040 return;
1041 }
1042 pa >>= 11;
1043 /* paranoia check */
1044 if (pa & (7<<29))
1045 ipath_dev_err(dd,
1046 "BUG: Physical page address 0x%lx "
1047 "has bits set in 31-29\n", pa);
1048
1049 if (type == 0)
1050 pa |= dd->ipath_tidtemplate;
1051 else /* for now, always full 4KB page */
1052 pa |= 2 << 29;
1053 }
1054
1055 /* workaround chip bug 9437 by writing each TID twice
1056 * and holding a spinlock around the writes, so they don't
1057 * intermix with other TID (eager or expected) writes
1058 * Unfortunately, this call can be done from interrupt level
1059 * for the port 0 eager TIDs, so we have to use irqsave
1060 */
1061 spin_lock_irqsave(&dd->ipath_tid_lock, flags);
1062 ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf);
1063 if (dd->ipath_kregbase)
1064 writel(pa, tidp32);
1065 ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xdeadbeef);
1066 mmiowb();
1067 spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
1068}
1069
1070/**
1071 * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
1072 * @dd: the infinipath device
1073 * @port: the port
1074 *
1075 * clear all TID entries for a port, expected and eager.
1076 * Used from ipath_close(). On PE800, TIDs are only 32 bits,
1077 * not 64, but they are still on 64 bit boundaries, so tidbase
1078 * is declared as u64 * for the pointer math, even though we write 32 bits
1079 */
1080static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
1081{
1082 u64 __iomem *tidbase;
1083 unsigned long tidinv;
1084 int i;
1085
1086 if (!dd->ipath_kregbase)
1087 return;
1088
1089 ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
1090
1091 tidinv = dd->ipath_tidinvalid;
1092 tidbase = (u64 __iomem *)
1093 ((char __iomem *)(dd->ipath_kregbase) +
1094 dd->ipath_rcvtidbase +
1095 port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
1096
1097 for (i = 0; i < dd->ipath_rcvtidcnt; i++)
1098 ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv);
1099
1100 tidbase = (u64 __iomem *)
1101 ((char __iomem *)(dd->ipath_kregbase) +
1102 dd->ipath_rcvegrbase +
1103 port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
1104
1105 for (i = 0; i < dd->ipath_rcvegrcnt; i++)
1106 ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv);
1107}
1108
1109/**
1110 * ipath_pe_tidtemplate - setup constants for TID updates
1111 * @dd: the infinipath device
1112 *
1113 * We setup stuff that we use a lot, to avoid calculating each time
1114 */
1115static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
1116{
1117 u32 egrsize = dd->ipath_rcvegrbufsize;
1118
1119 /* For now, we always allocate 4KB buffers (at init) so we can
1120 * receive max size packets. We may want a module parameter to
1121 * specify 2KB or 4KB and/or make be per port instead of per device
1122 * for those who want to reduce memory footprint. Note that the
1123 * ipath_rcvhdrentsize size must be large enough to hold the largest
1124 * IB header (currently 96 bytes) that we expect to handle (plus of
1125 * course the 2 dwords of RHF).
1126 */
1127 if (egrsize == 2048)
1128 dd->ipath_tidtemplate = 1U << 29;
1129 else if (egrsize == 4096)
1130 dd->ipath_tidtemplate = 2U << 29;
1131 else {
1132 egrsize = 4096;
1133 dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize "
1134 "%u, using %u\n", dd->ipath_rcvegrbufsize,
1135 egrsize);
1136 dd->ipath_tidtemplate = 2U << 29;
1137 }
1138 dd->ipath_tidinvalid = 0;
1139}
1140
1141static int ipath_pe_early_init(struct ipath_devdata *dd)
1142{
1143 dd->ipath_flags |= IPATH_4BYTE_TID;
1144
1145 /*
1146 * For openib, we need to be able to handle an IB header of 96 bytes
1147 * or 24 dwords. HT-400 has arbitrary sized receive buffers, so we
1148 * made them the same size as the PIO buffers. The PE-800 does not
1149 * handle arbitrary size buffers, so we need the header large enough
1150 * to handle largest IB header, but still have room for a 2KB MTU
1151 * standard IB packet.
1152 */
1153 dd->ipath_rcvhdrentsize = 24;
1154 dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
1155
1156 /* For HT-400, we allocate a somewhat overly large eager buffer,
1157 * such that we can guarantee that we can receive the largest packet
1158 * that we can send out. To truly support a 4KB MTU, we need to
1159 * bump this to a larger value. We'll do this when I get around to
1160 * testing 4KB sends on the PE-800, which I have not yet done.
1161 */
1162 dd->ipath_rcvegrbufsize = 2048;
1163 /*
1164 * the min() check here is currently a nop, but it may not always
1165 * be, depending on just how we do ipath_rcvegrbufsize
1166 */
1167 dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
1168 dd->ipath_rcvegrbufsize +
1169 (dd->ipath_rcvhdrentsize << 2));
1170 dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
1171
1172 /*
1173 * For PE-800, we can request a receive interrupt for 1 or
1174 * more packets from current offset. For now, we set this
1175 * up for a single packet, to match the HT-400 behavior.
1176 */
1177 dd->ipath_rhdrhead_intr_off = 1ULL<<32;
1178
1179 return 0;
1180}
1181
1182int __attribute__((weak)) ipath_unordered_wc(void)
1183{
1184 return 0;
1185}
1186
1187/**
1188 * ipath_init_pe_get_base_info - set chip-specific flags for user code
1189 * @dd: the infinipath device
1190 * @kbase: ipath_base_info pointer
1191 *
1192 * We set the PCIE flag because the lower bandwidth on PCIe vs
1193 * HyperTransport can affect some user packet algorithims.
1194 */
1195static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
1196{
1197 struct ipath_base_info *kinfo = kbase;
1198
1199 if (ipath_unordered_wc()) {
1200 kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
1201 ipath_cdbg(PROC, "Intel processor, forcing WC order\n");
1202 }
1203 else
1204 ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
1205
1206 kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
1207
1208 return 0;
1209}
1210
1211/**
1212 * ipath_init_pe800_funcs - set up the chip-specific function pointers
1213 * @dd: the infinipath device
1214 *
1215 * This is global, and is called directly at init to set up the
1216 * chip-specific function pointers for later use.
1217 */
1218void ipath_init_pe800_funcs(struct ipath_devdata *dd)
1219{
1220 dd->ipath_f_intrsetup = ipath_pe_intconfig;
1221 dd->ipath_f_bus = ipath_setup_pe_config;
1222 dd->ipath_f_reset = ipath_setup_pe_reset;
1223 dd->ipath_f_get_boardname = ipath_pe_boardname;
1224 dd->ipath_f_init_hwerrors = ipath_pe_init_hwerrors;
1225 dd->ipath_f_early_init = ipath_pe_early_init;
1226 dd->ipath_f_handle_hwerrors = ipath_pe_handle_hwerrors;
1227 dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
1228 dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
1229 dd->ipath_f_clear_tids = ipath_pe_clear_tids;
1230 dd->ipath_f_put_tid = ipath_pe_put_tid;
1231 dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
1232 dd->ipath_f_setextled = ipath_setup_pe_setextled;
1233 dd->ipath_f_get_base_info = ipath_pe_get_base_info;
1234
1235 /* initialize chip-specific variables */
1236 dd->ipath_f_tidtemplate = ipath_pe_tidtemplate;
1237
1238 /*
1239 * setup the register offsets, since they are different for each
1240 * chip
1241 */
1242 dd->ipath_kregs = &ipath_pe_kregs;
1243 dd->ipath_cregs = &ipath_pe_cregs;
1244
1245 ipath_init_pe_variables();
1246}
1247
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
new file mode 100644
index 000000000000..18890716db1e
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -0,0 +1,913 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/err.h>
34#include <linux/vmalloc.h>
35
36#include "ipath_verbs.h"
37#include "ips_common.h"
38
39#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
40#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
41#define mk_qpn(qpt, map, off) (((map) - (qpt)->map) * BITS_PER_PAGE + \
42 (off))
43#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
44 BITS_PER_PAGE, off)
45
46#define TRANS_INVALID 0
47#define TRANS_ANY2RST 1
48#define TRANS_RST2INIT 2
49#define TRANS_INIT2INIT 3
50#define TRANS_INIT2RTR 4
51#define TRANS_RTR2RTS 5
52#define TRANS_RTS2RTS 6
53#define TRANS_SQERR2RTS 7
54#define TRANS_ANY2ERR 8
55#define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */
56#define TRANS_SQD2SQD 10 /* error if not drained & parameter change */
57#define TRANS_SQD2RTS 11 /* error if not drained */
58
59/*
60 * Convert the AETH credit code into the number of credits.
61 */
62static u32 credit_table[31] = {
63 0, /* 0 */
64 1, /* 1 */
65 2, /* 2 */
66 3, /* 3 */
67 4, /* 4 */
68 6, /* 5 */
69 8, /* 6 */
70 12, /* 7 */
71 16, /* 8 */
72 24, /* 9 */
73 32, /* A */
74 48, /* B */
75 64, /* C */
76 96, /* D */
77 128, /* E */
78 192, /* F */
79 256, /* 10 */
80 384, /* 11 */
81 512, /* 12 */
82 768, /* 13 */
83 1024, /* 14 */
84 1536, /* 15 */
85 2048, /* 16 */
86 3072, /* 17 */
87 4096, /* 18 */
88 6144, /* 19 */
89 8192, /* 1A */
90 12288, /* 1B */
91 16384, /* 1C */
92 24576, /* 1D */
93 32768 /* 1E */
94};
95
96static u32 alloc_qpn(struct ipath_qp_table *qpt)
97{
98 u32 i, offset, max_scan, qpn;
99 struct qpn_map *map;
100 u32 ret;
101
102 qpn = qpt->last + 1;
103 if (qpn >= QPN_MAX)
104 qpn = 2;
105 offset = qpn & BITS_PER_PAGE_MASK;
106 map = &qpt->map[qpn / BITS_PER_PAGE];
107 max_scan = qpt->nmaps - !offset;
108 for (i = 0;;) {
109 if (unlikely(!map->page)) {
110 unsigned long page = get_zeroed_page(GFP_KERNEL);
111 unsigned long flags;
112
113 /*
114 * Free the page if someone raced with us
115 * installing it:
116 */
117 spin_lock_irqsave(&qpt->lock, flags);
118 if (map->page)
119 free_page(page);
120 else
121 map->page = (void *)page;
122 spin_unlock_irqrestore(&qpt->lock, flags);
123 if (unlikely(!map->page))
124 break;
125 }
126 if (likely(atomic_read(&map->n_free))) {
127 do {
128 if (!test_and_set_bit(offset, map->page)) {
129 atomic_dec(&map->n_free);
130 qpt->last = qpn;
131 ret = qpn;
132 goto bail;
133 }
134 offset = find_next_offset(map, offset);
135 qpn = mk_qpn(qpt, map, offset);
136 /*
137 * This test differs from alloc_pidmap().
138 * If find_next_offset() does find a zero
139 * bit, we don't need to check for QPN
140 * wrapping around past our starting QPN.
141 * We just need to be sure we don't loop
142 * forever.
143 */
144 } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
145 }
146 /*
147 * In order to keep the number of pages allocated to a
148 * minimum, we scan the all existing pages before increasing
149 * the size of the bitmap table.
150 */
151 if (++i > max_scan) {
152 if (qpt->nmaps == QPNMAP_ENTRIES)
153 break;
154 map = &qpt->map[qpt->nmaps++];
155 offset = 0;
156 } else if (map < &qpt->map[qpt->nmaps]) {
157 ++map;
158 offset = 0;
159 } else {
160 map = &qpt->map[0];
161 offset = 2;
162 }
163 qpn = mk_qpn(qpt, map, offset);
164 }
165
166 ret = 0;
167
168bail:
169 return ret;
170}
171
172static void free_qpn(struct ipath_qp_table *qpt, u32 qpn)
173{
174 struct qpn_map *map;
175
176 map = qpt->map + qpn / BITS_PER_PAGE;
177 if (map->page)
178 clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
179 atomic_inc(&map->n_free);
180}
181
182/**
183 * ipath_alloc_qpn - allocate a QP number
184 * @qpt: the QP table
185 * @qp: the QP
186 * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special)
187 *
188 * Allocate the next available QPN and put the QP into the hash table.
189 * The hash table holds a reference to the QP.
190 */
191static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
192 enum ib_qp_type type)
193{
194 unsigned long flags;
195 u32 qpn;
196 int ret;
197
198 if (type == IB_QPT_SMI)
199 qpn = 0;
200 else if (type == IB_QPT_GSI)
201 qpn = 1;
202 else {
203 /* Allocate the next available QPN */
204 qpn = alloc_qpn(qpt);
205 if (qpn == 0) {
206 ret = -ENOMEM;
207 goto bail;
208 }
209 }
210 qp->ibqp.qp_num = qpn;
211
212 /* Add the QP to the hash table. */
213 spin_lock_irqsave(&qpt->lock, flags);
214
215 qpn %= qpt->max;
216 qp->next = qpt->table[qpn];
217 qpt->table[qpn] = qp;
218 atomic_inc(&qp->refcount);
219
220 spin_unlock_irqrestore(&qpt->lock, flags);
221 ret = 0;
222
223bail:
224 return ret;
225}
226
227/**
228 * ipath_free_qp - remove a QP from the QP table
229 * @qpt: the QP table
230 * @qp: the QP to remove
231 *
232 * Remove the QP from the table so it can't be found asynchronously by
233 * the receive interrupt routine.
234 */
235static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
236{
237 struct ipath_qp *q, **qpp;
238 unsigned long flags;
239 int fnd = 0;
240
241 spin_lock_irqsave(&qpt->lock, flags);
242
243 /* Remove QP from the hash table. */
244 qpp = &qpt->table[qp->ibqp.qp_num % qpt->max];
245 for (; (q = *qpp) != NULL; qpp = &q->next) {
246 if (q == qp) {
247 *qpp = qp->next;
248 qp->next = NULL;
249 atomic_dec(&qp->refcount);
250 fnd = 1;
251 break;
252 }
253 }
254
255 spin_unlock_irqrestore(&qpt->lock, flags);
256
257 if (!fnd)
258 return;
259
260 /* If QPN is not reserved, mark QPN free in the bitmap. */
261 if (qp->ibqp.qp_num > 1)
262 free_qpn(qpt, qp->ibqp.qp_num);
263
264 wait_event(qp->wait, !atomic_read(&qp->refcount));
265}
266
267/**
268 * ipath_free_all_qps - remove all QPs from the table
269 * @qpt: the QP table to empty
270 */
271void ipath_free_all_qps(struct ipath_qp_table *qpt)
272{
273 unsigned long flags;
274 struct ipath_qp *qp, *nqp;
275 u32 n;
276
277 for (n = 0; n < qpt->max; n++) {
278 spin_lock_irqsave(&qpt->lock, flags);
279 qp = qpt->table[n];
280 qpt->table[n] = NULL;
281 spin_unlock_irqrestore(&qpt->lock, flags);
282
283 while (qp) {
284 nqp = qp->next;
285 if (qp->ibqp.qp_num > 1)
286 free_qpn(qpt, qp->ibqp.qp_num);
287 if (!atomic_dec_and_test(&qp->refcount) ||
288 !ipath_destroy_qp(&qp->ibqp))
289 _VERBS_INFO("QP memory leak!\n");
290 qp = nqp;
291 }
292 }
293
294 for (n = 0; n < ARRAY_SIZE(qpt->map); n++) {
295 if (qpt->map[n].page)
296 free_page((unsigned long)qpt->map[n].page);
297 }
298}
299
300/**
301 * ipath_lookup_qpn - return the QP with the given QPN
302 * @qpt: the QP table
303 * @qpn: the QP number to look up
304 *
305 * The caller is responsible for decrementing the QP reference count
306 * when done.
307 */
308struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
309{
310 unsigned long flags;
311 struct ipath_qp *qp;
312
313 spin_lock_irqsave(&qpt->lock, flags);
314
315 for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) {
316 if (qp->ibqp.qp_num == qpn) {
317 atomic_inc(&qp->refcount);
318 break;
319 }
320 }
321
322 spin_unlock_irqrestore(&qpt->lock, flags);
323 return qp;
324}
325
326/**
327 * ipath_reset_qp - initialize the QP state to the reset state
328 * @qp: the QP to reset
329 */
330static void ipath_reset_qp(struct ipath_qp *qp)
331{
332 qp->remote_qpn = 0;
333 qp->qkey = 0;
334 qp->qp_access_flags = 0;
335 qp->s_hdrwords = 0;
336 qp->s_psn = 0;
337 qp->r_psn = 0;
338 atomic_set(&qp->msn, 0);
339 if (qp->ibqp.qp_type == IB_QPT_RC) {
340 qp->s_state = IB_OPCODE_RC_SEND_LAST;
341 qp->r_state = IB_OPCODE_RC_SEND_LAST;
342 } else {
343 qp->s_state = IB_OPCODE_UC_SEND_LAST;
344 qp->r_state = IB_OPCODE_UC_SEND_LAST;
345 }
346 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
347 qp->s_nak_state = 0;
348 qp->s_rnr_timeout = 0;
349 qp->s_head = 0;
350 qp->s_tail = 0;
351 qp->s_cur = 0;
352 qp->s_last = 0;
353 qp->s_ssn = 1;
354 qp->s_lsn = 0;
355 qp->r_rq.head = 0;
356 qp->r_rq.tail = 0;
357 qp->r_reuse_sge = 0;
358}
359
360/**
361 * ipath_error_qp - put a QP into an error state
362 * @qp: the QP to put into an error state
363 *
364 * Flushes both send and receive work queues.
365 * QP r_rq.lock and s_lock should be held.
366 */
367
368static void ipath_error_qp(struct ipath_qp *qp)
369{
370 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
371 struct ib_wc wc;
372
373 _VERBS_INFO("QP%d/%d in error state\n",
374 qp->ibqp.qp_num, qp->remote_qpn);
375
376 spin_lock(&dev->pending_lock);
377 /* XXX What if its already removed by the timeout code? */
378 if (qp->timerwait.next != LIST_POISON1)
379 list_del(&qp->timerwait);
380 if (qp->piowait.next != LIST_POISON1)
381 list_del(&qp->piowait);
382 spin_unlock(&dev->pending_lock);
383
384 wc.status = IB_WC_WR_FLUSH_ERR;
385 wc.vendor_err = 0;
386 wc.byte_len = 0;
387 wc.imm_data = 0;
388 wc.qp_num = qp->ibqp.qp_num;
389 wc.src_qp = 0;
390 wc.wc_flags = 0;
391 wc.pkey_index = 0;
392 wc.slid = 0;
393 wc.sl = 0;
394 wc.dlid_path_bits = 0;
395 wc.port_num = 0;
396
397 while (qp->s_last != qp->s_head) {
398 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
399
400 wc.wr_id = wqe->wr.wr_id;
401 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
402 if (++qp->s_last >= qp->s_size)
403 qp->s_last = 0;
404 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
405 }
406 qp->s_cur = qp->s_tail = qp->s_head;
407 qp->s_hdrwords = 0;
408 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
409
410 wc.opcode = IB_WC_RECV;
411 while (qp->r_rq.tail != qp->r_rq.head) {
412 wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id;
413 if (++qp->r_rq.tail >= qp->r_rq.size)
414 qp->r_rq.tail = 0;
415 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
416 }
417}
418
419/**
420 * ipath_modify_qp - modify the attributes of a queue pair
421 * @ibqp: the queue pair who's attributes we're modifying
422 * @attr: the new attributes
423 * @attr_mask: the mask of attributes to modify
424 *
425 * Returns 0 on success, otherwise returns an errno.
426 */
427int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
428 int attr_mask)
429{
430 struct ipath_qp *qp = to_iqp(ibqp);
431 enum ib_qp_state cur_state, new_state;
432 unsigned long flags;
433 int ret;
434
435 spin_lock_irqsave(&qp->r_rq.lock, flags);
436 spin_lock(&qp->s_lock);
437
438 cur_state = attr_mask & IB_QP_CUR_STATE ?
439 attr->cur_qp_state : qp->state;
440 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
441
442 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
443 attr_mask))
444 goto inval;
445
446 switch (new_state) {
447 case IB_QPS_RESET:
448 ipath_reset_qp(qp);
449 break;
450
451 case IB_QPS_ERR:
452 ipath_error_qp(qp);
453 break;
454
455 default:
456 break;
457
458 }
459
460 if (attr_mask & IB_QP_PKEY_INDEX) {
461 struct ipath_ibdev *dev = to_idev(ibqp->device);
462
463 if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd))
464 goto inval;
465 qp->s_pkey_index = attr->pkey_index;
466 }
467
468 if (attr_mask & IB_QP_DEST_QPN)
469 qp->remote_qpn = attr->dest_qp_num;
470
471 if (attr_mask & IB_QP_SQ_PSN) {
472 qp->s_next_psn = attr->sq_psn;
473 qp->s_last_psn = qp->s_next_psn - 1;
474 }
475
476 if (attr_mask & IB_QP_RQ_PSN)
477 qp->r_psn = attr->rq_psn;
478
479 if (attr_mask & IB_QP_ACCESS_FLAGS)
480 qp->qp_access_flags = attr->qp_access_flags;
481
482 if (attr_mask & IB_QP_AV) {
483 if (attr->ah_attr.dlid == 0 ||
484 attr->ah_attr.dlid >= IPS_MULTICAST_LID_BASE)
485 goto inval;
486 qp->remote_ah_attr = attr->ah_attr;
487 }
488
489 if (attr_mask & IB_QP_PATH_MTU)
490 qp->path_mtu = attr->path_mtu;
491
492 if (attr_mask & IB_QP_RETRY_CNT)
493 qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;
494
495 if (attr_mask & IB_QP_RNR_RETRY) {
496 qp->s_rnr_retry = attr->rnr_retry;
497 if (qp->s_rnr_retry > 7)
498 qp->s_rnr_retry = 7;
499 qp->s_rnr_retry_cnt = qp->s_rnr_retry;
500 }
501
502 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
503 if (attr->min_rnr_timer > 31)
504 goto inval;
505 qp->s_min_rnr_timer = attr->min_rnr_timer;
506 }
507
508 if (attr_mask & IB_QP_QKEY)
509 qp->qkey = attr->qkey;
510
511 if (attr_mask & IB_QP_PKEY_INDEX)
512 qp->s_pkey_index = attr->pkey_index;
513
514 qp->state = new_state;
515 spin_unlock(&qp->s_lock);
516 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
517
518 /*
519 * If QP1 changed to the RTS state, try to move to the link to INIT
520 * even if it was ACTIVE so the SM will reinitialize the SMA's
521 * state.
522 */
523 if (qp->ibqp.qp_num == 1 && new_state == IB_QPS_RTS) {
524 struct ipath_ibdev *dev = to_idev(ibqp->device);
525
526 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
527 }
528 ret = 0;
529 goto bail;
530
531inval:
532 spin_unlock(&qp->s_lock);
533 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
534 ret = -EINVAL;
535
536bail:
537 return ret;
538}
539
540int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
541 int attr_mask, struct ib_qp_init_attr *init_attr)
542{
543 struct ipath_qp *qp = to_iqp(ibqp);
544
545 attr->qp_state = qp->state;
546 attr->cur_qp_state = attr->qp_state;
547 attr->path_mtu = qp->path_mtu;
548 attr->path_mig_state = 0;
549 attr->qkey = qp->qkey;
550 attr->rq_psn = qp->r_psn;
551 attr->sq_psn = qp->s_next_psn;
552 attr->dest_qp_num = qp->remote_qpn;
553 attr->qp_access_flags = qp->qp_access_flags;
554 attr->cap.max_send_wr = qp->s_size - 1;
555 attr->cap.max_recv_wr = qp->r_rq.size - 1;
556 attr->cap.max_send_sge = qp->s_max_sge;
557 attr->cap.max_recv_sge = qp->r_rq.max_sge;
558 attr->cap.max_inline_data = 0;
559 attr->ah_attr = qp->remote_ah_attr;
560 memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr));
561 attr->pkey_index = qp->s_pkey_index;
562 attr->alt_pkey_index = 0;
563 attr->en_sqd_async_notify = 0;
564 attr->sq_draining = 0;
565 attr->max_rd_atomic = 1;
566 attr->max_dest_rd_atomic = 1;
567 attr->min_rnr_timer = qp->s_min_rnr_timer;
568 attr->port_num = 1;
569 attr->timeout = 0;
570 attr->retry_cnt = qp->s_retry_cnt;
571 attr->rnr_retry = qp->s_rnr_retry;
572 attr->alt_port_num = 0;
573 attr->alt_timeout = 0;
574
575 init_attr->event_handler = qp->ibqp.event_handler;
576 init_attr->qp_context = qp->ibqp.qp_context;
577 init_attr->send_cq = qp->ibqp.send_cq;
578 init_attr->recv_cq = qp->ibqp.recv_cq;
579 init_attr->srq = qp->ibqp.srq;
580 init_attr->cap = attr->cap;
581 init_attr->sq_sig_type =
582 (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
583 ? IB_SIGNAL_REQ_WR : 0;
584 init_attr->qp_type = qp->ibqp.qp_type;
585 init_attr->port_num = 1;
586 return 0;
587}
588
589/**
590 * ipath_compute_aeth - compute the AETH (syndrome + MSN)
591 * @qp: the queue pair to compute the AETH for
592 *
593 * Returns the AETH.
594 *
595 * The QP s_lock should be held.
596 */
597__be32 ipath_compute_aeth(struct ipath_qp *qp)
598{
599 u32 aeth = atomic_read(&qp->msn) & IPS_MSN_MASK;
600
601 if (qp->s_nak_state) {
602 aeth |= qp->s_nak_state << IPS_AETH_CREDIT_SHIFT;
603 } else if (qp->ibqp.srq) {
604 /*
605 * Shared receive queues don't generate credits.
606 * Set the credit field to the invalid value.
607 */
608 aeth |= IPS_AETH_CREDIT_INVAL << IPS_AETH_CREDIT_SHIFT;
609 } else {
610 u32 min, max, x;
611 u32 credits;
612
613 /*
614 * Compute the number of credits available (RWQEs).
615 * XXX Not holding the r_rq.lock here so there is a small
616 * chance that the pair of reads are not atomic.
617 */
618 credits = qp->r_rq.head - qp->r_rq.tail;
619 if ((int)credits < 0)
620 credits += qp->r_rq.size;
621 /*
622 * Binary search the credit table to find the code to
623 * use.
624 */
625 min = 0;
626 max = 31;
627 for (;;) {
628 x = (min + max) / 2;
629 if (credit_table[x] == credits)
630 break;
631 if (credit_table[x] > credits)
632 max = x;
633 else if (min == x)
634 break;
635 else
636 min = x;
637 }
638 aeth |= x << IPS_AETH_CREDIT_SHIFT;
639 }
640 return cpu_to_be32(aeth);
641}
642
643/**
644 * ipath_create_qp - create a queue pair for a device
645 * @ibpd: the protection domain who's device we create the queue pair for
646 * @init_attr: the attributes of the queue pair
647 * @udata: unused by InfiniPath
648 *
649 * Returns the queue pair on success, otherwise returns an errno.
650 *
651 * Called by the ib_create_qp() core verbs function.
652 */
653struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
654 struct ib_qp_init_attr *init_attr,
655 struct ib_udata *udata)
656{
657 struct ipath_qp *qp;
658 int err;
659 struct ipath_swqe *swq = NULL;
660 struct ipath_ibdev *dev;
661 size_t sz;
662 struct ib_qp *ret;
663
664 if (init_attr->cap.max_send_sge > 255 ||
665 init_attr->cap.max_recv_sge > 255) {
666 ret = ERR_PTR(-ENOMEM);
667 goto bail;
668 }
669
670 switch (init_attr->qp_type) {
671 case IB_QPT_UC:
672 case IB_QPT_RC:
673 sz = sizeof(struct ipath_sge) *
674 init_attr->cap.max_send_sge +
675 sizeof(struct ipath_swqe);
676 swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
677 if (swq == NULL) {
678 ret = ERR_PTR(-ENOMEM);
679 goto bail;
680 }
681 /* FALLTHROUGH */
682 case IB_QPT_UD:
683 case IB_QPT_SMI:
684 case IB_QPT_GSI:
685 qp = kmalloc(sizeof(*qp), GFP_KERNEL);
686 if (!qp) {
687 ret = ERR_PTR(-ENOMEM);
688 goto bail;
689 }
690 qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
691 sz = sizeof(struct ipath_sge) *
692 init_attr->cap.max_recv_sge +
693 sizeof(struct ipath_rwqe);
694 qp->r_rq.wq = vmalloc(qp->r_rq.size * sz);
695 if (!qp->r_rq.wq) {
696 kfree(qp);
697 ret = ERR_PTR(-ENOMEM);
698 goto bail;
699 }
700
701 /*
702 * ib_create_qp() will initialize qp->ibqp
703 * except for qp->ibqp.qp_num.
704 */
705 spin_lock_init(&qp->s_lock);
706 spin_lock_init(&qp->r_rq.lock);
707 atomic_set(&qp->refcount, 0);
708 init_waitqueue_head(&qp->wait);
709 tasklet_init(&qp->s_task,
710 init_attr->qp_type == IB_QPT_RC ?
711 ipath_do_rc_send : ipath_do_uc_send,
712 (unsigned long)qp);
713 qp->piowait.next = LIST_POISON1;
714 qp->piowait.prev = LIST_POISON2;
715 qp->timerwait.next = LIST_POISON1;
716 qp->timerwait.prev = LIST_POISON2;
717 qp->state = IB_QPS_RESET;
718 qp->s_wq = swq;
719 qp->s_size = init_attr->cap.max_send_wr + 1;
720 qp->s_max_sge = init_attr->cap.max_send_sge;
721 qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
722 qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ?
723 1 << IPATH_S_SIGNAL_REQ_WR : 0;
724 dev = to_idev(ibpd->device);
725 err = ipath_alloc_qpn(&dev->qp_table, qp,
726 init_attr->qp_type);
727 if (err) {
728 vfree(swq);
729 vfree(qp->r_rq.wq);
730 kfree(qp);
731 ret = ERR_PTR(err);
732 goto bail;
733 }
734 ipath_reset_qp(qp);
735
736 /* Tell the core driver that the kernel SMA is present. */
737 if (qp->ibqp.qp_type == IB_QPT_SMI)
738 ipath_layer_set_verbs_flags(dev->dd,
739 IPATH_VERBS_KERNEL_SMA);
740 break;
741
742 default:
743 /* Don't support raw QPs */
744 ret = ERR_PTR(-ENOSYS);
745 goto bail;
746 }
747
748 init_attr->cap.max_inline_data = 0;
749
750 ret = &qp->ibqp;
751
752bail:
753 return ret;
754}
755
756/**
757 * ipath_destroy_qp - destroy a queue pair
758 * @ibqp: the queue pair to destroy
759 *
760 * Returns 0 on success.
761 *
762 * Note that this can be called while the QP is actively sending or
763 * receiving!
764 */
765int ipath_destroy_qp(struct ib_qp *ibqp)
766{
767 struct ipath_qp *qp = to_iqp(ibqp);
768 struct ipath_ibdev *dev = to_idev(ibqp->device);
769 unsigned long flags;
770
771 /* Tell the core driver that the kernel SMA is gone. */
772 if (qp->ibqp.qp_type == IB_QPT_SMI)
773 ipath_layer_set_verbs_flags(dev->dd, 0);
774
775 spin_lock_irqsave(&qp->r_rq.lock, flags);
776 spin_lock(&qp->s_lock);
777 qp->state = IB_QPS_ERR;
778 spin_unlock(&qp->s_lock);
779 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
780
781 /* Stop the sending tasklet. */
782 tasklet_kill(&qp->s_task);
783
784 /* Make sure the QP isn't on the timeout list. */
785 spin_lock_irqsave(&dev->pending_lock, flags);
786 if (qp->timerwait.next != LIST_POISON1)
787 list_del(&qp->timerwait);
788 if (qp->piowait.next != LIST_POISON1)
789 list_del(&qp->piowait);
790 spin_unlock_irqrestore(&dev->pending_lock, flags);
791
792 /*
793 * Make sure that the QP is not in the QPN table so receive
794 * interrupts will discard packets for this QP. XXX Also remove QP
795 * from multicast table.
796 */
797 if (atomic_read(&qp->refcount) != 0)
798 ipath_free_qp(&dev->qp_table, qp);
799
800 vfree(qp->s_wq);
801 vfree(qp->r_rq.wq);
802 kfree(qp);
803 return 0;
804}
805
806/**
807 * ipath_init_qp_table - initialize the QP table for a device
808 * @idev: the device who's QP table we're initializing
809 * @size: the size of the QP table
810 *
811 * Returns 0 on success, otherwise returns an errno.
812 */
813int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
814{
815 int i;
816 int ret;
817
818 idev->qp_table.last = 1; /* QPN 0 and 1 are special. */
819 idev->qp_table.max = size;
820 idev->qp_table.nmaps = 1;
821 idev->qp_table.table = kzalloc(size * sizeof(*idev->qp_table.table),
822 GFP_KERNEL);
823 if (idev->qp_table.table == NULL) {
824 ret = -ENOMEM;
825 goto bail;
826 }
827
828 for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) {
829 atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE);
830 idev->qp_table.map[i].page = NULL;
831 }
832
833 ret = 0;
834
835bail:
836 return ret;
837}
838
839/**
840 * ipath_sqerror_qp - put a QP's send queue into an error state
841 * @qp: QP who's send queue will be put into an error state
842 * @wc: the WC responsible for putting the QP in this state
843 *
844 * Flushes the send work queue.
845 * The QP s_lock should be held.
846 */
847
848void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
849{
850 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
851 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
852
853 _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n",
854 qp->ibqp.qp_num, qp->remote_qpn, wc->status);
855
856 spin_lock(&dev->pending_lock);
857 /* XXX What if its already removed by the timeout code? */
858 if (qp->timerwait.next != LIST_POISON1)
859 list_del(&qp->timerwait);
860 if (qp->piowait.next != LIST_POISON1)
861 list_del(&qp->piowait);
862 spin_unlock(&dev->pending_lock);
863
864 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
865 if (++qp->s_last >= qp->s_size)
866 qp->s_last = 0;
867
868 wc->status = IB_WC_WR_FLUSH_ERR;
869
870 while (qp->s_last != qp->s_head) {
871 wc->wr_id = wqe->wr.wr_id;
872 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
873 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
874 if (++qp->s_last >= qp->s_size)
875 qp->s_last = 0;
876 wqe = get_swqe_ptr(qp, qp->s_last);
877 }
878 qp->s_cur = qp->s_tail = qp->s_head;
879 qp->state = IB_QPS_SQE;
880}
881
882/**
883 * ipath_get_credit - flush the send work queue of a QP
884 * @qp: the qp who's send work queue to flush
885 * @aeth: the Acknowledge Extended Transport Header
886 *
887 * The QP s_lock should be held.
888 */
889void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
890{
891 u32 credit = (aeth >> IPS_AETH_CREDIT_SHIFT) & IPS_AETH_CREDIT_MASK;
892
893 /*
894 * If the credit is invalid, we can send
895 * as many packets as we like. Otherwise, we have to
896 * honor the credit field.
897 */
898 if (credit == IPS_AETH_CREDIT_INVAL) {
899 qp->s_lsn = (u32) -1;
900 } else if (qp->s_lsn != (u32) -1) {
901 /* Compute new LSN (i.e., MSN + credit) */
902 credit = (aeth + credit_table[credit]) & IPS_MSN_MASK;
903 if (ipath_cmp24(credit, qp->s_lsn) > 0)
904 qp->s_lsn = credit;
905 }
906
907 /* Restart sending if it was blocked due to lack of credits. */
908 if (qp->s_cur != qp->s_head &&
909 (qp->s_lsn == (u32) -1 ||
910 ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
911 qp->s_lsn + 1) <= 0))
912 tasklet_hi_schedule(&qp->s_task);
913}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
new file mode 100644
index 000000000000..a4055ca00614
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -0,0 +1,1857 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "ipath_verbs.h"
34#include "ips_common.h"
35
36/* cut down ridiculously long IB macro names */
37#define OP(x) IB_OPCODE_RC_##x
38
39/**
40 * ipath_init_restart- initialize the qp->s_sge after a restart
41 * @qp: the QP who's SGE we're restarting
42 * @wqe: the work queue to initialize the QP's SGE from
43 *
44 * The QP s_lock should be held.
45 */
46static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
47{
48 struct ipath_ibdev *dev;
49 u32 len;
50
51 len = ((qp->s_psn - wqe->psn) & IPS_PSN_MASK) *
52 ib_mtu_enum_to_int(qp->path_mtu);
53 qp->s_sge.sge = wqe->sg_list[0];
54 qp->s_sge.sg_list = wqe->sg_list + 1;
55 qp->s_sge.num_sge = wqe->wr.num_sge;
56 ipath_skip_sge(&qp->s_sge, len);
57 qp->s_len = wqe->length - len;
58 dev = to_idev(qp->ibqp.device);
59 spin_lock(&dev->pending_lock);
60 if (qp->timerwait.next == LIST_POISON1)
61 list_add_tail(&qp->timerwait,
62 &dev->pending[dev->pending_index]);
63 spin_unlock(&dev->pending_lock);
64}
65
66/**
67 * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
68 * @qp: a pointer to the QP
69 * @ohdr: a pointer to the IB header being constructed
70 * @pmtu: the path MTU
71 *
72 * Return bth0 if constructed; otherwise, return 0.
73 * Note the QP s_lock must be held.
74 */
75static inline u32 ipath_make_rc_ack(struct ipath_qp *qp,
76 struct ipath_other_headers *ohdr,
77 u32 pmtu)
78{
79 struct ipath_sge_state *ss;
80 u32 hwords;
81 u32 len;
82 u32 bth0;
83
84 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
85 hwords = 5;
86
87 /*
88 * Send a response. Note that we are in the responder's
89 * side of the QP context.
90 */
91 switch (qp->s_ack_state) {
92 case OP(RDMA_READ_REQUEST):
93 ss = &qp->s_rdma_sge;
94 len = qp->s_rdma_len;
95 if (len > pmtu) {
96 len = pmtu;
97 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
98 }
99 else
100 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
101 qp->s_rdma_len -= len;
102 bth0 = qp->s_ack_state << 24;
103 ohdr->u.aeth = ipath_compute_aeth(qp);
104 hwords++;
105 break;
106
107 case OP(RDMA_READ_RESPONSE_FIRST):
108 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
109 /* FALLTHROUGH */
110 case OP(RDMA_READ_RESPONSE_MIDDLE):
111 ss = &qp->s_rdma_sge;
112 len = qp->s_rdma_len;
113 if (len > pmtu)
114 len = pmtu;
115 else {
116 ohdr->u.aeth = ipath_compute_aeth(qp);
117 hwords++;
118 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
119 }
120 qp->s_rdma_len -= len;
121 bth0 = qp->s_ack_state << 24;
122 break;
123
124 case OP(RDMA_READ_RESPONSE_LAST):
125 case OP(RDMA_READ_RESPONSE_ONLY):
126 /*
127 * We have to prevent new requests from changing
128 * the r_sge state while a ipath_verbs_send()
129 * is in progress.
130 * Changing r_state allows the receiver
131 * to continue processing new packets.
132 * We do it here now instead of above so
133 * that we are sure the packet was sent before
134 * changing the state.
135 */
136 qp->r_state = OP(RDMA_READ_RESPONSE_LAST);
137 qp->s_ack_state = OP(ACKNOWLEDGE);
138 return 0;
139
140 case OP(COMPARE_SWAP):
141 case OP(FETCH_ADD):
142 ss = NULL;
143 len = 0;
144 qp->r_state = OP(SEND_LAST);
145 qp->s_ack_state = OP(ACKNOWLEDGE);
146 bth0 = IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24;
147 ohdr->u.at.aeth = ipath_compute_aeth(qp);
148 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic);
149 hwords += sizeof(ohdr->u.at) / 4;
150 break;
151
152 default:
153 /* Send a regular ACK. */
154 ss = NULL;
155 len = 0;
156 qp->s_ack_state = OP(ACKNOWLEDGE);
157 bth0 = qp->s_ack_state << 24;
158 ohdr->u.aeth = ipath_compute_aeth(qp);
159 hwords++;
160 }
161 qp->s_hdrwords = hwords;
162 qp->s_cur_sge = ss;
163 qp->s_cur_size = len;
164
165 return bth0;
166}
167
168/**
169 * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
170 * @qp: a pointer to the QP
171 * @ohdr: a pointer to the IB header being constructed
172 * @pmtu: the path MTU
173 * @bth0p: pointer to the BTH opcode word
174 * @bth2p: pointer to the BTH PSN word
175 *
176 * Return 1 if constructed; otherwise, return 0.
177 * Note the QP s_lock must be held.
178 */
179static inline int ipath_make_rc_req(struct ipath_qp *qp,
180 struct ipath_other_headers *ohdr,
181 u32 pmtu, u32 *bth0p, u32 *bth2p)
182{
183 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
184 struct ipath_sge_state *ss;
185 struct ipath_swqe *wqe;
186 u32 hwords;
187 u32 len;
188 u32 bth0;
189 u32 bth2;
190 char newreq;
191
192 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
193 qp->s_rnr_timeout)
194 goto done;
195
196 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
197 hwords = 5;
198 bth0 = 0;
199
200 /* Send a request. */
201 wqe = get_swqe_ptr(qp, qp->s_cur);
202 switch (qp->s_state) {
203 default:
204 /*
205 * Resend an old request or start a new one.
206 *
207 * We keep track of the current SWQE so that
208 * we don't reset the "furthest progress" state
209 * if we need to back up.
210 */
211 newreq = 0;
212 if (qp->s_cur == qp->s_tail) {
213 /* Check if send work queue is empty. */
214 if (qp->s_tail == qp->s_head)
215 goto done;
216 qp->s_psn = wqe->psn = qp->s_next_psn;
217 newreq = 1;
218 }
219 /*
220 * Note that we have to be careful not to modify the
221 * original work request since we may need to resend
222 * it.
223 */
224 qp->s_sge.sge = wqe->sg_list[0];
225 qp->s_sge.sg_list = wqe->sg_list + 1;
226 qp->s_sge.num_sge = wqe->wr.num_sge;
227 qp->s_len = len = wqe->length;
228 ss = &qp->s_sge;
229 bth2 = 0;
230 switch (wqe->wr.opcode) {
231 case IB_WR_SEND:
232 case IB_WR_SEND_WITH_IMM:
233 /* If no credit, return. */
234 if (qp->s_lsn != (u32) -1 &&
235 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
236 goto done;
237 wqe->lpsn = wqe->psn;
238 if (len > pmtu) {
239 wqe->lpsn += (len - 1) / pmtu;
240 qp->s_state = OP(SEND_FIRST);
241 len = pmtu;
242 break;
243 }
244 if (wqe->wr.opcode == IB_WR_SEND)
245 qp->s_state = OP(SEND_ONLY);
246 else {
247 qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
248 /* Immediate data comes after the BTH */
249 ohdr->u.imm_data = wqe->wr.imm_data;
250 hwords += 1;
251 }
252 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
253 bth0 |= 1 << 23;
254 bth2 = 1 << 31; /* Request ACK. */
255 if (++qp->s_cur == qp->s_size)
256 qp->s_cur = 0;
257 break;
258
259 case IB_WR_RDMA_WRITE:
260 if (newreq)
261 qp->s_lsn++;
262 /* FALLTHROUGH */
263 case IB_WR_RDMA_WRITE_WITH_IMM:
264 /* If no credit, return. */
265 if (qp->s_lsn != (u32) -1 &&
266 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
267 goto done;
268 ohdr->u.rc.reth.vaddr =
269 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
270 ohdr->u.rc.reth.rkey =
271 cpu_to_be32(wqe->wr.wr.rdma.rkey);
272 ohdr->u.rc.reth.length = cpu_to_be32(len);
273 hwords += sizeof(struct ib_reth) / 4;
274 wqe->lpsn = wqe->psn;
275 if (len > pmtu) {
276 wqe->lpsn += (len - 1) / pmtu;
277 qp->s_state = OP(RDMA_WRITE_FIRST);
278 len = pmtu;
279 break;
280 }
281 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
282 qp->s_state = OP(RDMA_WRITE_ONLY);
283 else {
284 qp->s_state =
285 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
286 /* Immediate data comes
287 * after RETH */
288 ohdr->u.rc.imm_data = wqe->wr.imm_data;
289 hwords += 1;
290 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
291 bth0 |= 1 << 23;
292 }
293 bth2 = 1 << 31; /* Request ACK. */
294 if (++qp->s_cur == qp->s_size)
295 qp->s_cur = 0;
296 break;
297
298 case IB_WR_RDMA_READ:
299 ohdr->u.rc.reth.vaddr =
300 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
301 ohdr->u.rc.reth.rkey =
302 cpu_to_be32(wqe->wr.wr.rdma.rkey);
303 ohdr->u.rc.reth.length = cpu_to_be32(len);
304 qp->s_state = OP(RDMA_READ_REQUEST);
305 hwords += sizeof(ohdr->u.rc.reth) / 4;
306 if (newreq) {
307 qp->s_lsn++;
308 /*
309 * Adjust s_next_psn to count the
310 * expected number of responses.
311 */
312 if (len > pmtu)
313 qp->s_next_psn += (len - 1) / pmtu;
314 wqe->lpsn = qp->s_next_psn++;
315 }
316 ss = NULL;
317 len = 0;
318 if (++qp->s_cur == qp->s_size)
319 qp->s_cur = 0;
320 break;
321
322 case IB_WR_ATOMIC_CMP_AND_SWP:
323 case IB_WR_ATOMIC_FETCH_AND_ADD:
324 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP)
325 qp->s_state = OP(COMPARE_SWAP);
326 else
327 qp->s_state = OP(FETCH_ADD);
328 ohdr->u.atomic_eth.vaddr = cpu_to_be64(
329 wqe->wr.wr.atomic.remote_addr);
330 ohdr->u.atomic_eth.rkey = cpu_to_be32(
331 wqe->wr.wr.atomic.rkey);
332 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
333 wqe->wr.wr.atomic.swap);
334 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
335 wqe->wr.wr.atomic.compare_add);
336 hwords += sizeof(struct ib_atomic_eth) / 4;
337 if (newreq) {
338 qp->s_lsn++;
339 wqe->lpsn = wqe->psn;
340 }
341 if (++qp->s_cur == qp->s_size)
342 qp->s_cur = 0;
343 ss = NULL;
344 len = 0;
345 break;
346
347 default:
348 goto done;
349 }
350 if (newreq) {
351 qp->s_tail++;
352 if (qp->s_tail >= qp->s_size)
353 qp->s_tail = 0;
354 }
355 bth2 |= qp->s_psn++ & IPS_PSN_MASK;
356 if ((int)(qp->s_psn - qp->s_next_psn) > 0)
357 qp->s_next_psn = qp->s_psn;
358 spin_lock(&dev->pending_lock);
359 if (qp->timerwait.next == LIST_POISON1)
360 list_add_tail(&qp->timerwait,
361 &dev->pending[dev->pending_index]);
362 spin_unlock(&dev->pending_lock);
363 break;
364
365 case OP(RDMA_READ_RESPONSE_FIRST):
366 /*
367 * This case can only happen if a send is restarted. See
368 * ipath_restart_rc().
369 */
370 ipath_init_restart(qp, wqe);
371 /* FALLTHROUGH */
372 case OP(SEND_FIRST):
373 qp->s_state = OP(SEND_MIDDLE);
374 /* FALLTHROUGH */
375 case OP(SEND_MIDDLE):
376 bth2 = qp->s_psn++ & IPS_PSN_MASK;
377 if ((int)(qp->s_psn - qp->s_next_psn) > 0)
378 qp->s_next_psn = qp->s_psn;
379 ss = &qp->s_sge;
380 len = qp->s_len;
381 if (len > pmtu) {
382 /*
383 * Request an ACK every 1/2 MB to avoid retransmit
384 * timeouts.
385 */
386 if (((wqe->length - len) % (512 * 1024)) == 0)
387 bth2 |= 1 << 31;
388 len = pmtu;
389 break;
390 }
391 if (wqe->wr.opcode == IB_WR_SEND)
392 qp->s_state = OP(SEND_LAST);
393 else {
394 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
395 /* Immediate data comes after the BTH */
396 ohdr->u.imm_data = wqe->wr.imm_data;
397 hwords += 1;
398 }
399 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
400 bth0 |= 1 << 23;
401 bth2 |= 1 << 31; /* Request ACK. */
402 qp->s_cur++;
403 if (qp->s_cur >= qp->s_size)
404 qp->s_cur = 0;
405 break;
406
407 case OP(RDMA_READ_RESPONSE_LAST):
408 /*
409 * This case can only happen if a RDMA write is restarted.
410 * See ipath_restart_rc().
411 */
412 ipath_init_restart(qp, wqe);
413 /* FALLTHROUGH */
414 case OP(RDMA_WRITE_FIRST):
415 qp->s_state = OP(RDMA_WRITE_MIDDLE);
416 /* FALLTHROUGH */
417 case OP(RDMA_WRITE_MIDDLE):
418 bth2 = qp->s_psn++ & IPS_PSN_MASK;
419 if ((int)(qp->s_psn - qp->s_next_psn) > 0)
420 qp->s_next_psn = qp->s_psn;
421 ss = &qp->s_sge;
422 len = qp->s_len;
423 if (len > pmtu) {
424 /*
425 * Request an ACK every 1/2 MB to avoid retransmit
426 * timeouts.
427 */
428 if (((wqe->length - len) % (512 * 1024)) == 0)
429 bth2 |= 1 << 31;
430 len = pmtu;
431 break;
432 }
433 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
434 qp->s_state = OP(RDMA_WRITE_LAST);
435 else {
436 qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
437 /* Immediate data comes after the BTH */
438 ohdr->u.imm_data = wqe->wr.imm_data;
439 hwords += 1;
440 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
441 bth0 |= 1 << 23;
442 }
443 bth2 |= 1 << 31; /* Request ACK. */
444 qp->s_cur++;
445 if (qp->s_cur >= qp->s_size)
446 qp->s_cur = 0;
447 break;
448
449 case OP(RDMA_READ_RESPONSE_MIDDLE):
450 /*
451 * This case can only happen if a RDMA read is restarted.
452 * See ipath_restart_rc().
453 */
454 ipath_init_restart(qp, wqe);
455 len = ((qp->s_psn - wqe->psn) & IPS_PSN_MASK) * pmtu;
456 ohdr->u.rc.reth.vaddr =
457 cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
458 ohdr->u.rc.reth.rkey =
459 cpu_to_be32(wqe->wr.wr.rdma.rkey);
460 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
461 qp->s_state = OP(RDMA_READ_REQUEST);
462 hwords += sizeof(ohdr->u.rc.reth) / 4;
463 bth2 = qp->s_psn++ & IPS_PSN_MASK;
464 if ((int)(qp->s_psn - qp->s_next_psn) > 0)
465 qp->s_next_psn = qp->s_psn;
466 ss = NULL;
467 len = 0;
468 qp->s_cur++;
469 if (qp->s_cur == qp->s_size)
470 qp->s_cur = 0;
471 break;
472
473 case OP(RDMA_READ_REQUEST):
474 case OP(COMPARE_SWAP):
475 case OP(FETCH_ADD):
476 /*
477 * We shouldn't start anything new until this request is
478 * finished. The ACK will handle rescheduling us. XXX The
479 * number of outstanding ones is negotiated at connection
480 * setup time (see pg. 258,289)? XXX Also, if we support
481 * multiple outstanding requests, we need to check the WQE
482 * IB_SEND_FENCE flag and not send a new request if a RDMA
483 * read or atomic is pending.
484 */
485 goto done;
486 }
487 qp->s_len -= len;
488 qp->s_hdrwords = hwords;
489 qp->s_cur_sge = ss;
490 qp->s_cur_size = len;
491 *bth0p = bth0 | (qp->s_state << 24);
492 *bth2p = bth2;
493 return 1;
494
495done:
496 return 0;
497}
498
499static inline void ipath_make_rc_grh(struct ipath_qp *qp,
500 struct ib_global_route *grh,
501 u32 nwords)
502{
503 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
504
505 /* GRH header size in 32-bit words. */
506 qp->s_hdrwords += 10;
507 qp->s_hdr.u.l.grh.version_tclass_flow =
508 cpu_to_be32((6 << 28) |
509 (grh->traffic_class << 20) |
510 grh->flow_label);
511 qp->s_hdr.u.l.grh.paylen =
512 cpu_to_be16(((qp->s_hdrwords - 12) + nwords +
513 SIZE_OF_CRC) << 2);
514 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
515 qp->s_hdr.u.l.grh.next_hdr = 0x1B;
516 qp->s_hdr.u.l.grh.hop_limit = grh->hop_limit;
517 /* The SGID is 32-bit aligned. */
518 qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = dev->gid_prefix;
519 qp->s_hdr.u.l.grh.sgid.global.interface_id =
520 ipath_layer_get_guid(dev->dd);
521 qp->s_hdr.u.l.grh.dgid = grh->dgid;
522}
523
524/**
525 * ipath_do_rc_send - perform a send on an RC QP
526 * @data: contains a pointer to the QP
527 *
528 * Process entries in the send work queue until credit or queue is
529 * exhausted. Only allow one CPU to send a packet per QP (tasklet).
530 * Otherwise, after we drop the QP s_lock, two threads could send
531 * packets out of order.
532 */
533void ipath_do_rc_send(unsigned long data)
534{
535 struct ipath_qp *qp = (struct ipath_qp *)data;
536 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
537 unsigned long flags;
538 u16 lrh0;
539 u32 nwords;
540 u32 extra_bytes;
541 u32 bth0;
542 u32 bth2;
543 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
544 struct ipath_other_headers *ohdr;
545
546 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
547 goto bail;
548
549 if (unlikely(qp->remote_ah_attr.dlid ==
550 ipath_layer_get_lid(dev->dd))) {
551 struct ib_wc wc;
552
553 /*
554 * Pass in an uninitialized ib_wc to be consistent with
555 * other places where ipath_ruc_loopback() is called.
556 */
557 ipath_ruc_loopback(qp, &wc);
558 goto clear;
559 }
560
561 ohdr = &qp->s_hdr.u.oth;
562 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
563 ohdr = &qp->s_hdr.u.l.oth;
564
565again:
566 /* Check for a constructed packet to be sent. */
567 if (qp->s_hdrwords != 0) {
568 /*
569 * If no PIO bufs are available, return. An interrupt will
570 * call ipath_ib_piobufavail() when one is available.
571 */
572 _VERBS_INFO("h %u %p\n", qp->s_hdrwords, &qp->s_hdr);
573 _VERBS_INFO("d %u %p %u %p %u %u %u %u\n", qp->s_cur_size,
574 qp->s_cur_sge->sg_list,
575 qp->s_cur_sge->num_sge,
576 qp->s_cur_sge->sge.vaddr,
577 qp->s_cur_sge->sge.sge_length,
578 qp->s_cur_sge->sge.length,
579 qp->s_cur_sge->sge.m,
580 qp->s_cur_sge->sge.n);
581 if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
582 (u32 *) &qp->s_hdr, qp->s_cur_size,
583 qp->s_cur_sge)) {
584 ipath_no_bufs_available(qp, dev);
585 goto bail;
586 }
587 dev->n_unicast_xmit++;
588 /* Record that we sent the packet and s_hdr is empty. */
589 qp->s_hdrwords = 0;
590 }
591
592 /*
593 * The lock is needed to synchronize between setting
594 * qp->s_ack_state, resend timer, and post_send().
595 */
596 spin_lock_irqsave(&qp->s_lock, flags);
597
598 /* Sending responses has higher priority over sending requests. */
599 if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
600 (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0)
601 bth2 = qp->s_ack_psn++ & IPS_PSN_MASK;
602 else if (!ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2))
603 goto done;
604
605 spin_unlock_irqrestore(&qp->s_lock, flags);
606
607 /* Construct the header. */
608 extra_bytes = (4 - qp->s_cur_size) & 3;
609 nwords = (qp->s_cur_size + extra_bytes) >> 2;
610 lrh0 = IPS_LRH_BTH;
611 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
612 ipath_make_rc_grh(qp, &qp->remote_ah_attr.grh, nwords);
613 lrh0 = IPS_LRH_GRH;
614 }
615 lrh0 |= qp->remote_ah_attr.sl << 4;
616 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
617 qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
618 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
619 SIZE_OF_CRC);
620 qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
621 bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
622 bth0 |= extra_bytes << 20;
623 ohdr->bth[0] = cpu_to_be32(bth0);
624 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
625 ohdr->bth[2] = cpu_to_be32(bth2);
626
627 /* Check for more work to do. */
628 goto again;
629
630done:
631 spin_unlock_irqrestore(&qp->s_lock, flags);
632clear:
633 clear_bit(IPATH_S_BUSY, &qp->s_flags);
634bail:
635 return;
636}
637
638static void send_rc_ack(struct ipath_qp *qp)
639{
640 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
641 u16 lrh0;
642 u32 bth0;
643 struct ipath_other_headers *ohdr;
644
645 /* Construct the header. */
646 ohdr = &qp->s_hdr.u.oth;
647 lrh0 = IPS_LRH_BTH;
648 /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
649 qp->s_hdrwords = 6;
650 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
651 ipath_make_rc_grh(qp, &qp->remote_ah_attr.grh, 0);
652 ohdr = &qp->s_hdr.u.l.oth;
653 lrh0 = IPS_LRH_GRH;
654 }
655 bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
656 ohdr->u.aeth = ipath_compute_aeth(qp);
657 if (qp->s_ack_state >= OP(COMPARE_SWAP)) {
658 bth0 |= IB_OPCODE_ATOMIC_ACKNOWLEDGE << 24;
659 ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->s_ack_atomic);
660 qp->s_hdrwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
661 }
662 else
663 bth0 |= OP(ACKNOWLEDGE) << 24;
664 lrh0 |= qp->remote_ah_attr.sl << 4;
665 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
666 qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
667 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + SIZE_OF_CRC);
668 qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
669 ohdr->bth[0] = cpu_to_be32(bth0);
670 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
671 ohdr->bth[2] = cpu_to_be32(qp->s_ack_psn & IPS_PSN_MASK);
672
673 /*
674 * If we can send the ACK, clear the ACK state.
675 */
676 if (ipath_verbs_send(dev->dd, qp->s_hdrwords, (u32 *) &qp->s_hdr,
677 0, NULL) == 0) {
678 qp->s_ack_state = OP(ACKNOWLEDGE);
679 dev->n_rc_qacks++;
680 dev->n_unicast_xmit++;
681 }
682}
683
684/**
685 * ipath_restart_rc - back up requester to resend the last un-ACKed request
686 * @qp: the QP to restart
687 * @psn: packet sequence number for the request
688 * @wc: the work completion request
689 *
690 * The QP s_lock should be held.
691 */
692void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
693{
694 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
695 struct ipath_ibdev *dev;
696 u32 n;
697
698 /*
699 * If there are no requests pending, we are done.
700 */
701 if (ipath_cmp24(psn, qp->s_next_psn) >= 0 ||
702 qp->s_last == qp->s_tail)
703 goto done;
704
705 if (qp->s_retry == 0) {
706 wc->wr_id = wqe->wr.wr_id;
707 wc->status = IB_WC_RETRY_EXC_ERR;
708 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
709 wc->vendor_err = 0;
710 wc->byte_len = 0;
711 wc->qp_num = qp->ibqp.qp_num;
712 wc->src_qp = qp->remote_qpn;
713 wc->pkey_index = 0;
714 wc->slid = qp->remote_ah_attr.dlid;
715 wc->sl = qp->remote_ah_attr.sl;
716 wc->dlid_path_bits = 0;
717 wc->port_num = 0;
718 ipath_sqerror_qp(qp, wc);
719 goto bail;
720 }
721 qp->s_retry--;
722
723 /*
724 * Remove the QP from the timeout queue.
725 * Note: it may already have been removed by ipath_ib_timer().
726 */
727 dev = to_idev(qp->ibqp.device);
728 spin_lock(&dev->pending_lock);
729 if (qp->timerwait.next != LIST_POISON1)
730 list_del(&qp->timerwait);
731 spin_unlock(&dev->pending_lock);
732
733 if (wqe->wr.opcode == IB_WR_RDMA_READ)
734 dev->n_rc_resends++;
735 else
736 dev->n_rc_resends += (int)qp->s_psn - (int)psn;
737
738 /*
739 * If we are starting the request from the beginning, let the normal
740 * send code handle initialization.
741 */
742 qp->s_cur = qp->s_last;
743 if (ipath_cmp24(psn, wqe->psn) <= 0) {
744 qp->s_state = OP(SEND_LAST);
745 qp->s_psn = wqe->psn;
746 } else {
747 n = qp->s_cur;
748 for (;;) {
749 if (++n == qp->s_size)
750 n = 0;
751 if (n == qp->s_tail) {
752 if (ipath_cmp24(psn, qp->s_next_psn) >= 0) {
753 qp->s_cur = n;
754 wqe = get_swqe_ptr(qp, n);
755 }
756 break;
757 }
758 wqe = get_swqe_ptr(qp, n);
759 if (ipath_cmp24(psn, wqe->psn) < 0)
760 break;
761 qp->s_cur = n;
762 }
763 qp->s_psn = psn;
764
765 /*
766 * Reset the state to restart in the middle of a request.
767 * Don't change the s_sge, s_cur_sge, or s_cur_size.
768 * See ipath_do_rc_send().
769 */
770 switch (wqe->wr.opcode) {
771 case IB_WR_SEND:
772 case IB_WR_SEND_WITH_IMM:
773 qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
774 break;
775
776 case IB_WR_RDMA_WRITE:
777 case IB_WR_RDMA_WRITE_WITH_IMM:
778 qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
779 break;
780
781 case IB_WR_RDMA_READ:
782 qp->s_state =
783 OP(RDMA_READ_RESPONSE_MIDDLE);
784 break;
785
786 default:
787 /*
788 * This case shouldn't happen since its only
789 * one PSN per req.
790 */
791 qp->s_state = OP(SEND_LAST);
792 }
793 }
794
795done:
796 tasklet_hi_schedule(&qp->s_task);
797
798bail:
799 return;
800}
801
802/**
803 * reset_psn - reset the QP state to send starting from PSN
804 * @qp: the QP
805 * @psn: the packet sequence number to restart at
806 *
807 * This is called from ipath_rc_rcv() to process an incoming RC ACK
808 * for the given QP.
809 * Called at interrupt level with the QP s_lock held.
810 */
811static void reset_psn(struct ipath_qp *qp, u32 psn)
812{
813 struct ipath_swqe *wqe;
814 u32 n;
815
816 n = qp->s_cur;
817 wqe = get_swqe_ptr(qp, n);
818 for (;;) {
819 if (++n == qp->s_size)
820 n = 0;
821 if (n == qp->s_tail) {
822 if (ipath_cmp24(psn, qp->s_next_psn) >= 0) {
823 qp->s_cur = n;
824 wqe = get_swqe_ptr(qp, n);
825 }
826 break;
827 }
828 wqe = get_swqe_ptr(qp, n);
829 if (ipath_cmp24(psn, wqe->psn) < 0)
830 break;
831 qp->s_cur = n;
832 }
833 qp->s_psn = psn;
834
835 /*
836 * Set the state to restart in the middle of a
837 * request. Don't change the s_sge, s_cur_sge, or
838 * s_cur_size. See ipath_do_rc_send().
839 */
840 switch (wqe->wr.opcode) {
841 case IB_WR_SEND:
842 case IB_WR_SEND_WITH_IMM:
843 qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
844 break;
845
846 case IB_WR_RDMA_WRITE:
847 case IB_WR_RDMA_WRITE_WITH_IMM:
848 qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
849 break;
850
851 case IB_WR_RDMA_READ:
852 qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
853 break;
854
855 default:
856 /*
857 * This case shouldn't happen since its only
858 * one PSN per req.
859 */
860 qp->s_state = OP(SEND_LAST);
861 }
862}
863
864/**
865 * do_rc_ack - process an incoming RC ACK
866 * @qp: the QP the ACK came in on
867 * @psn: the packet sequence number of the ACK
868 * @opcode: the opcode of the request that resulted in the ACK
869 *
870 * This is called from ipath_rc_rcv() to process an incoming RC ACK
871 * for the given QP.
872 * Called at interrupt level with the QP s_lock held.
873 * Returns 1 if OK, 0 if current operation should be aborted (NAK).
874 */
875static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
876{
877 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
878 struct ib_wc wc;
879 struct ipath_swqe *wqe;
880 int ret = 0;
881
882 /*
883 * Remove the QP from the timeout queue (or RNR timeout queue).
884 * If ipath_ib_timer() has already removed it,
885 * it's OK since we hold the QP s_lock and ipath_restart_rc()
886 * just won't find anything to restart if we ACK everything.
887 */
888 spin_lock(&dev->pending_lock);
889 if (qp->timerwait.next != LIST_POISON1)
890 list_del(&qp->timerwait);
891 spin_unlock(&dev->pending_lock);
892
893 /*
894 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
895 * requests and implicitly NAK RDMA read and atomic requests issued
896 * before the NAK'ed request. The MSN won't include the NAK'ed
897 * request but will include an ACK'ed request(s).
898 */
899 wqe = get_swqe_ptr(qp, qp->s_last);
900
901 /* Nothing is pending to ACK/NAK. */
902 if (qp->s_last == qp->s_tail)
903 goto bail;
904
905 /*
906 * The MSN might be for a later WQE than the PSN indicates so
907 * only complete WQEs that the PSN finishes.
908 */
909 while (ipath_cmp24(psn, wqe->lpsn) >= 0) {
910 /* If we are ACKing a WQE, the MSN should be >= the SSN. */
911 if (ipath_cmp24(aeth, wqe->ssn) < 0)
912 break;
913 /*
914 * If this request is a RDMA read or atomic, and the ACK is
915 * for a later operation, this ACK NAKs the RDMA read or
916 * atomic. In other words, only a RDMA_READ_LAST or ONLY
917 * can ACK a RDMA read and likewise for atomic ops. Note
918 * that the NAK case can only happen if relaxed ordering is
919 * used and requests are sent after an RDMA read or atomic
920 * is sent but before the response is received.
921 */
922 if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
923 opcode != OP(RDMA_READ_RESPONSE_LAST)) ||
924 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
925 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
926 (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
927 ipath_cmp24(wqe->psn, psn) != 0))) {
928 /*
929 * The last valid PSN seen is the previous
930 * request's.
931 */
932 qp->s_last_psn = wqe->psn - 1;
933 /* Retry this request. */
934 ipath_restart_rc(qp, wqe->psn, &wc);
935 /*
936 * No need to process the ACK/NAK since we are
937 * restarting an earlier request.
938 */
939 goto bail;
940 }
941 /* Post a send completion queue entry if requested. */
942 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
943 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
944 wc.wr_id = wqe->wr.wr_id;
945 wc.status = IB_WC_SUCCESS;
946 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
947 wc.vendor_err = 0;
948 wc.byte_len = wqe->length;
949 wc.qp_num = qp->ibqp.qp_num;
950 wc.src_qp = qp->remote_qpn;
951 wc.pkey_index = 0;
952 wc.slid = qp->remote_ah_attr.dlid;
953 wc.sl = qp->remote_ah_attr.sl;
954 wc.dlid_path_bits = 0;
955 wc.port_num = 0;
956 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
957 }
958 qp->s_retry = qp->s_retry_cnt;
959 /*
960 * If we are completing a request which is in the process of
961 * being resent, we can stop resending it since we know the
962 * responder has already seen it.
963 */
964 if (qp->s_last == qp->s_cur) {
965 if (++qp->s_cur >= qp->s_size)
966 qp->s_cur = 0;
967 wqe = get_swqe_ptr(qp, qp->s_cur);
968 qp->s_state = OP(SEND_LAST);
969 qp->s_psn = wqe->psn;
970 }
971 if (++qp->s_last >= qp->s_size)
972 qp->s_last = 0;
973 wqe = get_swqe_ptr(qp, qp->s_last);
974 if (qp->s_last == qp->s_tail)
975 break;
976 }
977
978 switch (aeth >> 29) {
979 case 0: /* ACK */
980 dev->n_rc_acks++;
981 /* If this is a partial ACK, reset the retransmit timer. */
982 if (qp->s_last != qp->s_tail) {
983 spin_lock(&dev->pending_lock);
984 list_add_tail(&qp->timerwait,
985 &dev->pending[dev->pending_index]);
986 spin_unlock(&dev->pending_lock);
987 }
988 ipath_get_credit(qp, aeth);
989 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
990 qp->s_retry = qp->s_retry_cnt;
991 qp->s_last_psn = psn;
992 ret = 1;
993 goto bail;
994
995 case 1: /* RNR NAK */
996 dev->n_rnr_naks++;
997 if (qp->s_rnr_retry == 0) {
998 if (qp->s_last == qp->s_tail)
999 goto bail;
1000
1001 wc.status = IB_WC_RNR_RETRY_EXC_ERR;
1002 goto class_b;
1003 }
1004 if (qp->s_rnr_retry_cnt < 7)
1005 qp->s_rnr_retry--;
1006 if (qp->s_last == qp->s_tail)
1007 goto bail;
1008
1009 /* The last valid PSN seen is the previous request's. */
1010 qp->s_last_psn = wqe->psn - 1;
1011
1012 dev->n_rc_resends += (int)qp->s_psn - (int)psn;
1013
1014 /*
1015 * If we are starting the request from the beginning, let
1016 * the normal send code handle initialization.
1017 */
1018 qp->s_cur = qp->s_last;
1019 wqe = get_swqe_ptr(qp, qp->s_cur);
1020 if (ipath_cmp24(psn, wqe->psn) <= 0) {
1021 qp->s_state = OP(SEND_LAST);
1022 qp->s_psn = wqe->psn;
1023 } else
1024 reset_psn(qp, psn);
1025
1026 qp->s_rnr_timeout =
1027 ib_ipath_rnr_table[(aeth >> IPS_AETH_CREDIT_SHIFT) &
1028 IPS_AETH_CREDIT_MASK];
1029 ipath_insert_rnr_queue(qp);
1030 goto bail;
1031
1032 case 3: /* NAK */
1033 /* The last valid PSN seen is the previous request's. */
1034 if (qp->s_last != qp->s_tail)
1035 qp->s_last_psn = wqe->psn - 1;
1036 switch ((aeth >> IPS_AETH_CREDIT_SHIFT) &
1037 IPS_AETH_CREDIT_MASK) {
1038 case 0: /* PSN sequence error */
1039 dev->n_seq_naks++;
1040 /*
1041 * Back up to the responder's expected PSN. XXX
1042 * Note that we might get a NAK in the middle of an
1043 * RDMA READ response which terminates the RDMA
1044 * READ.
1045 */
1046 if (qp->s_last == qp->s_tail)
1047 break;
1048
1049 if (ipath_cmp24(psn, wqe->psn) < 0)
1050 break;
1051
1052 /* Retry the request. */
1053 ipath_restart_rc(qp, psn, &wc);
1054 break;
1055
1056 case 1: /* Invalid Request */
1057 wc.status = IB_WC_REM_INV_REQ_ERR;
1058 dev->n_other_naks++;
1059 goto class_b;
1060
1061 case 2: /* Remote Access Error */
1062 wc.status = IB_WC_REM_ACCESS_ERR;
1063 dev->n_other_naks++;
1064 goto class_b;
1065
1066 case 3: /* Remote Operation Error */
1067 wc.status = IB_WC_REM_OP_ERR;
1068 dev->n_other_naks++;
1069 class_b:
1070 wc.wr_id = wqe->wr.wr_id;
1071 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
1072 wc.vendor_err = 0;
1073 wc.byte_len = 0;
1074 wc.qp_num = qp->ibqp.qp_num;
1075 wc.src_qp = qp->remote_qpn;
1076 wc.pkey_index = 0;
1077 wc.slid = qp->remote_ah_attr.dlid;
1078 wc.sl = qp->remote_ah_attr.sl;
1079 wc.dlid_path_bits = 0;
1080 wc.port_num = 0;
1081 ipath_sqerror_qp(qp, &wc);
1082 break;
1083
1084 default:
1085 /* Ignore other reserved NAK error codes */
1086 goto reserved;
1087 }
1088 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1089 goto bail;
1090
1091 default: /* 2: reserved */
1092 reserved:
1093 /* Ignore reserved NAK codes. */
1094 goto bail;
1095 }
1096
1097bail:
1098 return ret;
1099}
1100
1101/**
1102 * ipath_rc_rcv_resp - process an incoming RC response packet
1103 * @dev: the device this packet came in on
1104 * @ohdr: the other headers for this packet
1105 * @data: the packet data
1106 * @tlen: the packet length
1107 * @qp: the QP for this packet
1108 * @opcode: the opcode for this packet
1109 * @psn: the packet sequence number for this packet
1110 * @hdrsize: the header length
1111 * @pmtu: the path MTU
1112 * @header_in_data: true if part of the header data is in the data buffer
1113 *
1114 * This is called from ipath_rc_rcv() to process an incoming RC response
1115 * packet for the given QP.
1116 * Called at interrupt level.
1117 */
1118static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1119 struct ipath_other_headers *ohdr,
1120 void *data, u32 tlen,
1121 struct ipath_qp *qp,
1122 u32 opcode,
1123 u32 psn, u32 hdrsize, u32 pmtu,
1124 int header_in_data)
1125{
1126 unsigned long flags;
1127 struct ib_wc wc;
1128 int diff;
1129 u32 pad;
1130 u32 aeth;
1131
1132 spin_lock_irqsave(&qp->s_lock, flags);
1133
1134 /* Ignore invalid responses. */
1135 if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
1136 goto ack_done;
1137
1138 /* Ignore duplicate responses. */
1139 diff = ipath_cmp24(psn, qp->s_last_psn);
1140 if (unlikely(diff <= 0)) {
1141 /* Update credits for "ghost" ACKs */
1142 if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
1143 if (!header_in_data)
1144 aeth = be32_to_cpu(ohdr->u.aeth);
1145 else {
1146 aeth = be32_to_cpu(((__be32 *) data)[0]);
1147 data += sizeof(__be32);
1148 }
1149 if ((aeth >> 29) == 0)
1150 ipath_get_credit(qp, aeth);
1151 }
1152 goto ack_done;
1153 }
1154
1155 switch (opcode) {
1156 case OP(ACKNOWLEDGE):
1157 case OP(ATOMIC_ACKNOWLEDGE):
1158 case OP(RDMA_READ_RESPONSE_FIRST):
1159 if (!header_in_data)
1160 aeth = be32_to_cpu(ohdr->u.aeth);
1161 else {
1162 aeth = be32_to_cpu(((__be32 *) data)[0]);
1163 data += sizeof(__be32);
1164 }
1165 if (opcode == OP(ATOMIC_ACKNOWLEDGE))
1166 *(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data;
1167 if (!do_rc_ack(qp, aeth, psn, opcode) ||
1168 opcode != OP(RDMA_READ_RESPONSE_FIRST))
1169 goto ack_done;
1170 hdrsize += 4;
1171 /*
1172 * do_rc_ack() has already checked the PSN so skip
1173 * the sequence check.
1174 */
1175 goto rdma_read;
1176
1177 case OP(RDMA_READ_RESPONSE_MIDDLE):
1178 /* no AETH, no ACK */
1179 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1180 dev->n_rdma_seq++;
1181 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1182 goto ack_done;
1183 }
1184 rdma_read:
1185 if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
1186 goto ack_done;
1187 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1188 goto ack_done;
1189 if (unlikely(pmtu >= qp->s_len))
1190 goto ack_done;
1191 /* We got a response so update the timeout. */
1192 if (unlikely(qp->s_last == qp->s_tail ||
1193 get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
1194 IB_WR_RDMA_READ))
1195 goto ack_done;
1196 spin_lock(&dev->pending_lock);
1197 if (qp->s_rnr_timeout == 0 &&
1198 qp->timerwait.next != LIST_POISON1)
1199 list_move_tail(&qp->timerwait,
1200 &dev->pending[dev->pending_index]);
1201 spin_unlock(&dev->pending_lock);
1202 /*
1203 * Update the RDMA receive state but do the copy w/o holding the
1204 * locks and blocking interrupts. XXX Yet another place that
1205 * affects relaxed RDMA order since we don't want s_sge modified.
1206 */
1207 qp->s_len -= pmtu;
1208 qp->s_last_psn = psn;
1209 spin_unlock_irqrestore(&qp->s_lock, flags);
1210 ipath_copy_sge(&qp->s_sge, data, pmtu);
1211 goto bail;
1212
1213 case OP(RDMA_READ_RESPONSE_LAST):
1214 /* ACKs READ req. */
1215 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1216 dev->n_rdma_seq++;
1217 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
1218 goto ack_done;
1219 }
1220 /* FALLTHROUGH */
1221 case OP(RDMA_READ_RESPONSE_ONLY):
1222 if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
1223 goto ack_done;
1224 /*
1225 * Get the number of bytes the message was padded by.
1226 */
1227 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1228 /*
1229 * Check that the data size is >= 1 && <= pmtu.
1230 * Remember to account for the AETH header (4) and
1231 * ICRC (4).
1232 */
1233 if (unlikely(tlen <= (hdrsize + pad + 8))) {
1234 /*
1235 * XXX Need to generate an error CQ
1236 * entry.
1237 */
1238 goto ack_done;
1239 }
1240 tlen -= hdrsize + pad + 8;
1241 if (unlikely(tlen != qp->s_len)) {
1242 /*
1243 * XXX Need to generate an error CQ
1244 * entry.
1245 */
1246 goto ack_done;
1247 }
1248 if (!header_in_data)
1249 aeth = be32_to_cpu(ohdr->u.aeth);
1250 else {
1251 aeth = be32_to_cpu(((__be32 *) data)[0]);
1252 data += sizeof(__be32);
1253 }
1254 ipath_copy_sge(&qp->s_sge, data, tlen);
1255 if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) {
1256 /*
1257 * Change the state so we contimue
1258 * processing new requests.
1259 */
1260 qp->s_state = OP(SEND_LAST);
1261 }
1262 goto ack_done;
1263 }
1264
1265ack_done:
1266 spin_unlock_irqrestore(&qp->s_lock, flags);
1267bail:
1268 return;
1269}
1270
1271/**
1272 * ipath_rc_rcv_error - process an incoming duplicate or error RC packet
1273 * @dev: the device this packet came in on
1274 * @ohdr: the other headers for this packet
1275 * @data: the packet data
1276 * @qp: the QP for this packet
1277 * @opcode: the opcode for this packet
1278 * @psn: the packet sequence number for this packet
1279 * @diff: the difference between the PSN and the expected PSN
1280 * @header_in_data: true if part of the header data is in the data buffer
1281 *
1282 * This is called from ipath_rc_rcv() to process an unexpected
1283 * incoming RC packet for the given QP.
1284 * Called at interrupt level.
1285 * Return 1 if no more processing is needed; otherwise return 0 to
1286 * schedule a response to be sent and the s_lock unlocked.
1287 */
1288static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1289 struct ipath_other_headers *ohdr,
1290 void *data,
1291 struct ipath_qp *qp,
1292 u32 opcode,
1293 u32 psn,
1294 int diff,
1295 int header_in_data)
1296{
1297 struct ib_reth *reth;
1298
1299 if (diff > 0) {
1300 /*
1301 * Packet sequence error.
1302 * A NAK will ACK earlier sends and RDMA writes.
1303 * Don't queue the NAK if a RDMA read, atomic, or
1304 * NAK is pending though.
1305 */
1306 spin_lock(&qp->s_lock);
1307 if ((qp->s_ack_state >= OP(RDMA_READ_REQUEST) &&
1308 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) ||
1309 qp->s_nak_state != 0) {
1310 spin_unlock(&qp->s_lock);
1311 goto done;
1312 }
1313 qp->s_ack_state = OP(SEND_ONLY);
1314 qp->s_nak_state = IB_NAK_PSN_ERROR;
1315 /* Use the expected PSN. */
1316 qp->s_ack_psn = qp->r_psn;
1317 goto resched;
1318 }
1319
1320 /*
1321 * Handle a duplicate request. Don't re-execute SEND, RDMA
1322 * write or atomic op. Don't NAK errors, just silently drop
1323 * the duplicate request. Note that r_sge, r_len, and
1324 * r_rcv_len may be in use so don't modify them.
1325 *
1326 * We are supposed to ACK the earliest duplicate PSN but we
1327 * can coalesce an outstanding duplicate ACK. We have to
1328 * send the earliest so that RDMA reads can be restarted at
1329 * the requester's expected PSN.
1330 */
1331 spin_lock(&qp->s_lock);
1332 if (qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE &&
1333 ipath_cmp24(psn, qp->s_ack_psn) >= 0) {
1334 if (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST)
1335 qp->s_ack_psn = psn;
1336 spin_unlock(&qp->s_lock);
1337 goto done;
1338 }
1339 switch (opcode) {
1340 case OP(RDMA_READ_REQUEST):
1341 /*
1342 * We have to be careful to not change s_rdma_sge
1343 * while ipath_do_rc_send() is using it and not
1344 * holding the s_lock.
1345 */
1346 if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
1347 qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) {
1348 spin_unlock(&qp->s_lock);
1349 dev->n_rdma_dup_busy++;
1350 goto done;
1351 }
1352 /* RETH comes after BTH */
1353 if (!header_in_data)
1354 reth = &ohdr->u.rc.reth;
1355 else {
1356 reth = (struct ib_reth *)data;
1357 data += sizeof(*reth);
1358 }
1359 qp->s_rdma_len = be32_to_cpu(reth->length);
1360 if (qp->s_rdma_len != 0) {
1361 u32 rkey = be32_to_cpu(reth->rkey);
1362 u64 vaddr = be64_to_cpu(reth->vaddr);
1363 int ok;
1364
1365 /*
1366 * Address range must be a subset of the original
1367 * request and start on pmtu boundaries.
1368 */
1369 ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
1370 qp->s_rdma_len, vaddr, rkey,
1371 IB_ACCESS_REMOTE_READ);
1372 if (unlikely(!ok))
1373 goto done;
1374 } else {
1375 qp->s_rdma_sge.sg_list = NULL;
1376 qp->s_rdma_sge.num_sge = 0;
1377 qp->s_rdma_sge.sge.mr = NULL;
1378 qp->s_rdma_sge.sge.vaddr = NULL;
1379 qp->s_rdma_sge.sge.length = 0;
1380 qp->s_rdma_sge.sge.sge_length = 0;
1381 }
1382 break;
1383
1384 case OP(COMPARE_SWAP):
1385 case OP(FETCH_ADD):
1386 /*
1387 * Check for the PSN of the last atomic operations
1388 * performed and resend the result if found.
1389 */
1390 if ((psn & IPS_PSN_MASK) != qp->r_atomic_psn) {
1391 spin_unlock(&qp->s_lock);
1392 goto done;
1393 }
1394 qp->s_ack_atomic = qp->r_atomic_data;
1395 break;
1396 }
1397 qp->s_ack_state = opcode;
1398 qp->s_nak_state = 0;
1399 qp->s_ack_psn = psn;
1400resched:
1401 return 0;
1402
1403done:
1404 return 1;
1405}
1406
1407/**
1408 * ipath_rc_rcv - process an incoming RC packet
1409 * @dev: the device this packet came in on
1410 * @hdr: the header of this packet
1411 * @has_grh: true if the header has a GRH
1412 * @data: the packet data
1413 * @tlen: the packet length
1414 * @qp: the QP for this packet
1415 *
1416 * This is called from ipath_qp_rcv() to process an incoming RC packet
1417 * for the given QP.
1418 * Called at interrupt level.
1419 */
1420void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1421 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
1422{
1423 struct ipath_other_headers *ohdr;
1424 u32 opcode;
1425 u32 hdrsize;
1426 u32 psn;
1427 u32 pad;
1428 unsigned long flags;
1429 struct ib_wc wc;
1430 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
1431 int diff;
1432 struct ib_reth *reth;
1433 int header_in_data;
1434
1435 /* Check for GRH */
1436 if (!has_grh) {
1437 ohdr = &hdr->u.oth;
1438 hdrsize = 8 + 12; /* LRH + BTH */
1439 psn = be32_to_cpu(ohdr->bth[2]);
1440 header_in_data = 0;
1441 } else {
1442 ohdr = &hdr->u.l.oth;
1443 hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
1444 /*
1445 * The header with GRH is 60 bytes and the core driver sets
1446 * the eager header buffer size to 56 bytes so the last 4
1447 * bytes of the BTH header (PSN) is in the data buffer.
1448 */
1449 header_in_data =
1450 ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
1451 if (header_in_data) {
1452 psn = be32_to_cpu(((__be32 *) data)[0]);
1453 data += sizeof(__be32);
1454 } else
1455 psn = be32_to_cpu(ohdr->bth[2]);
1456 }
1457 /*
1458 * The opcode is in the low byte when its in network order
1459 * (top byte when in host order).
1460 */
1461 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
1462
1463 /*
1464 * Process responses (ACKs) before anything else. Note that the
1465 * packet sequence number will be for something in the send work
1466 * queue rather than the expected receive packet sequence number.
1467 * In other words, this QP is the requester.
1468 */
1469 if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
1470 opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1471 ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
1472 hdrsize, pmtu, header_in_data);
1473 goto bail;
1474 }
1475
1476 spin_lock_irqsave(&qp->r_rq.lock, flags);
1477
1478 /* Compute 24 bits worth of difference. */
1479 diff = ipath_cmp24(psn, qp->r_psn);
1480 if (unlikely(diff)) {
1481 if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
1482 psn, diff, header_in_data))
1483 goto done;
1484 goto resched;
1485 }
1486
1487 /* Check for opcode sequence errors. */
1488 switch (qp->r_state) {
1489 case OP(SEND_FIRST):
1490 case OP(SEND_MIDDLE):
1491 if (opcode == OP(SEND_MIDDLE) ||
1492 opcode == OP(SEND_LAST) ||
1493 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1494 break;
1495 nack_inv:
1496 /*
1497 * A NAK will ACK earlier sends and RDMA writes. Don't queue the
1498 * NAK if a RDMA read, atomic, or NAK is pending though.
1499 */
1500 spin_lock(&qp->s_lock);
1501 if (qp->s_ack_state >= OP(RDMA_READ_REQUEST) &&
1502 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) {
1503 spin_unlock(&qp->s_lock);
1504 goto done;
1505 }
1506 /* XXX Flush WQEs */
1507 qp->state = IB_QPS_ERR;
1508 qp->s_ack_state = OP(SEND_ONLY);
1509 qp->s_nak_state = IB_NAK_INVALID_REQUEST;
1510 qp->s_ack_psn = qp->r_psn;
1511 goto resched;
1512
1513 case OP(RDMA_WRITE_FIRST):
1514 case OP(RDMA_WRITE_MIDDLE):
1515 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
1516 opcode == OP(RDMA_WRITE_LAST) ||
1517 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1518 break;
1519 goto nack_inv;
1520
1521 case OP(RDMA_READ_REQUEST):
1522 case OP(COMPARE_SWAP):
1523 case OP(FETCH_ADD):
1524 /*
1525 * Drop all new requests until a response has been sent. A
1526 * new request then ACKs the RDMA response we sent. Relaxed
1527 * ordering would allow new requests to be processed but we
1528 * would need to keep a queue of rwqe's for all that are in
1529 * progress. Note that we can't RNR NAK this request since
1530 * the RDMA READ or atomic response is already queued to be
1531 * sent (unless we implement a response send queue).
1532 */
1533 goto done;
1534
1535 default:
1536 if (opcode == OP(SEND_MIDDLE) ||
1537 opcode == OP(SEND_LAST) ||
1538 opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
1539 opcode == OP(RDMA_WRITE_MIDDLE) ||
1540 opcode == OP(RDMA_WRITE_LAST) ||
1541 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1542 goto nack_inv;
1543 break;
1544 }
1545
1546 wc.imm_data = 0;
1547 wc.wc_flags = 0;
1548
1549 /* OK, process the packet. */
1550 switch (opcode) {
1551 case OP(SEND_FIRST):
1552 if (!ipath_get_rwqe(qp, 0)) {
1553 rnr_nak:
1554 /*
1555 * A RNR NAK will ACK earlier sends and RDMA writes.
1556 * Don't queue the NAK if a RDMA read or atomic
1557 * is pending though.
1558 */
1559 spin_lock(&qp->s_lock);
1560 if (qp->s_ack_state >=
1561 OP(RDMA_READ_REQUEST) &&
1562 qp->s_ack_state != IB_OPCODE_ACKNOWLEDGE) {
1563 spin_unlock(&qp->s_lock);
1564 goto done;
1565 }
1566 qp->s_ack_state = OP(SEND_ONLY);
1567 qp->s_nak_state = IB_RNR_NAK | qp->s_min_rnr_timer;
1568 qp->s_ack_psn = qp->r_psn;
1569 goto resched;
1570 }
1571 qp->r_rcv_len = 0;
1572 /* FALLTHROUGH */
1573 case OP(SEND_MIDDLE):
1574 case OP(RDMA_WRITE_MIDDLE):
1575 send_middle:
1576 /* Check for invalid length PMTU or posted rwqe len. */
1577 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1578 goto nack_inv;
1579 qp->r_rcv_len += pmtu;
1580 if (unlikely(qp->r_rcv_len > qp->r_len))
1581 goto nack_inv;
1582 ipath_copy_sge(&qp->r_sge, data, pmtu);
1583 break;
1584
1585 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
1586 /* consume RWQE */
1587 if (!ipath_get_rwqe(qp, 1))
1588 goto rnr_nak;
1589 goto send_last_imm;
1590
1591 case OP(SEND_ONLY):
1592 case OP(SEND_ONLY_WITH_IMMEDIATE):
1593 if (!ipath_get_rwqe(qp, 0))
1594 goto rnr_nak;
1595 qp->r_rcv_len = 0;
1596 if (opcode == OP(SEND_ONLY))
1597 goto send_last;
1598 /* FALLTHROUGH */
1599 case OP(SEND_LAST_WITH_IMMEDIATE):
1600 send_last_imm:
1601 if (header_in_data) {
1602 wc.imm_data = *(__be32 *) data;
1603 data += sizeof(__be32);
1604 } else {
1605 /* Immediate data comes after BTH */
1606 wc.imm_data = ohdr->u.imm_data;
1607 }
1608 hdrsize += 4;
1609 wc.wc_flags = IB_WC_WITH_IMM;
1610 /* FALLTHROUGH */
1611 case OP(SEND_LAST):
1612 case OP(RDMA_WRITE_LAST):
1613 send_last:
1614 /* Get the number of bytes the message was padded by. */
1615 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1616 /* Check for invalid length. */
1617 /* XXX LAST len should be >= 1 */
1618 if (unlikely(tlen < (hdrsize + pad + 4)))
1619 goto nack_inv;
1620 /* Don't count the CRC. */
1621 tlen -= (hdrsize + pad + 4);
1622 wc.byte_len = tlen + qp->r_rcv_len;
1623 if (unlikely(wc.byte_len > qp->r_len))
1624 goto nack_inv;
1625 ipath_copy_sge(&qp->r_sge, data, tlen);
1626 atomic_inc(&qp->msn);
1627 if (opcode == OP(RDMA_WRITE_LAST) ||
1628 opcode == OP(RDMA_WRITE_ONLY))
1629 break;
1630 wc.wr_id = qp->r_wr_id;
1631 wc.status = IB_WC_SUCCESS;
1632 wc.opcode = IB_WC_RECV;
1633 wc.vendor_err = 0;
1634 wc.qp_num = qp->ibqp.qp_num;
1635 wc.src_qp = qp->remote_qpn;
1636 wc.pkey_index = 0;
1637 wc.slid = qp->remote_ah_attr.dlid;
1638 wc.sl = qp->remote_ah_attr.sl;
1639 wc.dlid_path_bits = 0;
1640 wc.port_num = 0;
1641 /* Signal completion event if the solicited bit is set. */
1642 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
1643 (ohdr->bth[0] &
1644 __constant_cpu_to_be32(1 << 23)) != 0);
1645 break;
1646
1647 case OP(RDMA_WRITE_FIRST):
1648 case OP(RDMA_WRITE_ONLY):
1649 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
1650 /* consume RWQE */
1651 /* RETH comes after BTH */
1652 if (!header_in_data)
1653 reth = &ohdr->u.rc.reth;
1654 else {
1655 reth = (struct ib_reth *)data;
1656 data += sizeof(*reth);
1657 }
1658 hdrsize += sizeof(*reth);
1659 qp->r_len = be32_to_cpu(reth->length);
1660 qp->r_rcv_len = 0;
1661 if (qp->r_len != 0) {
1662 u32 rkey = be32_to_cpu(reth->rkey);
1663 u64 vaddr = be64_to_cpu(reth->vaddr);
1664 int ok;
1665
1666 /* Check rkey & NAK */
1667 ok = ipath_rkey_ok(dev, &qp->r_sge,
1668 qp->r_len, vaddr, rkey,
1669 IB_ACCESS_REMOTE_WRITE);
1670 if (unlikely(!ok)) {
1671 nack_acc:
1672 /*
1673 * A NAK will ACK earlier sends and RDMA
1674 * writes. Don't queue the NAK if a RDMA
1675 * read, atomic, or NAK is pending though.
1676 */
1677 spin_lock(&qp->s_lock);
1678 if (qp->s_ack_state >=
1679 OP(RDMA_READ_REQUEST) &&
1680 qp->s_ack_state !=
1681 IB_OPCODE_ACKNOWLEDGE) {
1682 spin_unlock(&qp->s_lock);
1683 goto done;
1684 }
1685 /* XXX Flush WQEs */
1686 qp->state = IB_QPS_ERR;
1687 qp->s_ack_state = OP(RDMA_WRITE_ONLY);
1688 qp->s_nak_state =
1689 IB_NAK_REMOTE_ACCESS_ERROR;
1690 qp->s_ack_psn = qp->r_psn;
1691 goto resched;
1692 }
1693 } else {
1694 qp->r_sge.sg_list = NULL;
1695 qp->r_sge.sge.mr = NULL;
1696 qp->r_sge.sge.vaddr = NULL;
1697 qp->r_sge.sge.length = 0;
1698 qp->r_sge.sge.sge_length = 0;
1699 }
1700 if (unlikely(!(qp->qp_access_flags &
1701 IB_ACCESS_REMOTE_WRITE)))
1702 goto nack_acc;
1703 if (opcode == OP(RDMA_WRITE_FIRST))
1704 goto send_middle;
1705 else if (opcode == OP(RDMA_WRITE_ONLY))
1706 goto send_last;
1707 if (!ipath_get_rwqe(qp, 1))
1708 goto rnr_nak;
1709 goto send_last_imm;
1710
1711 case OP(RDMA_READ_REQUEST):
1712 /* RETH comes after BTH */
1713 if (!header_in_data)
1714 reth = &ohdr->u.rc.reth;
1715 else {
1716 reth = (struct ib_reth *)data;
1717 data += sizeof(*reth);
1718 }
1719 spin_lock(&qp->s_lock);
1720 if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
1721 qp->s_ack_state >= IB_OPCODE_RDMA_READ_REQUEST) {
1722 spin_unlock(&qp->s_lock);
1723 goto done;
1724 }
1725 qp->s_rdma_len = be32_to_cpu(reth->length);
1726 if (qp->s_rdma_len != 0) {
1727 u32 rkey = be32_to_cpu(reth->rkey);
1728 u64 vaddr = be64_to_cpu(reth->vaddr);
1729 int ok;
1730
1731 /* Check rkey & NAK */
1732 ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
1733 qp->s_rdma_len, vaddr, rkey,
1734 IB_ACCESS_REMOTE_READ);
1735 if (unlikely(!ok)) {
1736 spin_unlock(&qp->s_lock);
1737 goto nack_acc;
1738 }
1739 /*
1740 * Update the next expected PSN. We add 1 later
1741 * below, so only add the remainder here.
1742 */
1743 if (qp->s_rdma_len > pmtu)
1744 qp->r_psn += (qp->s_rdma_len - 1) / pmtu;
1745 } else {
1746 qp->s_rdma_sge.sg_list = NULL;
1747 qp->s_rdma_sge.num_sge = 0;
1748 qp->s_rdma_sge.sge.mr = NULL;
1749 qp->s_rdma_sge.sge.vaddr = NULL;
1750 qp->s_rdma_sge.sge.length = 0;
1751 qp->s_rdma_sge.sge.sge_length = 0;
1752 }
1753 if (unlikely(!(qp->qp_access_flags &
1754 IB_ACCESS_REMOTE_READ)))
1755 goto nack_acc;
1756 /*
1757 * We need to increment the MSN here instead of when we
1758 * finish sending the result since a duplicate request would
1759 * increment it more than once.
1760 */
1761 atomic_inc(&qp->msn);
1762 qp->s_ack_state = opcode;
1763 qp->s_nak_state = 0;
1764 qp->s_ack_psn = psn;
1765 qp->r_psn++;
1766 qp->r_state = opcode;
1767 goto rdmadone;
1768
1769 case OP(COMPARE_SWAP):
1770 case OP(FETCH_ADD): {
1771 struct ib_atomic_eth *ateth;
1772 u64 vaddr;
1773 u64 sdata;
1774 u32 rkey;
1775
1776 if (!header_in_data)
1777 ateth = &ohdr->u.atomic_eth;
1778 else {
1779 ateth = (struct ib_atomic_eth *)data;
1780 data += sizeof(*ateth);
1781 }
1782 vaddr = be64_to_cpu(ateth->vaddr);
1783 if (unlikely(vaddr & (sizeof(u64) - 1)))
1784 goto nack_inv;
1785 rkey = be32_to_cpu(ateth->rkey);
1786 /* Check rkey & NAK */
1787 if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge,
1788 sizeof(u64), vaddr, rkey,
1789 IB_ACCESS_REMOTE_ATOMIC)))
1790 goto nack_acc;
1791 if (unlikely(!(qp->qp_access_flags &
1792 IB_ACCESS_REMOTE_ATOMIC)))
1793 goto nack_acc;
1794 /* Perform atomic OP and save result. */
1795 sdata = be64_to_cpu(ateth->swap_data);
1796 spin_lock(&dev->pending_lock);
1797 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
1798 if (opcode == OP(FETCH_ADD))
1799 *(u64 *) qp->r_sge.sge.vaddr =
1800 qp->r_atomic_data + sdata;
1801 else if (qp->r_atomic_data ==
1802 be64_to_cpu(ateth->compare_data))
1803 *(u64 *) qp->r_sge.sge.vaddr = sdata;
1804 spin_unlock(&dev->pending_lock);
1805 atomic_inc(&qp->msn);
1806 qp->r_atomic_psn = psn & IPS_PSN_MASK;
1807 psn |= 1 << 31;
1808 break;
1809 }
1810
1811 default:
1812 /* Drop packet for unknown opcodes. */
1813 goto done;
1814 }
1815 qp->r_psn++;
1816 qp->r_state = opcode;
1817 /* Send an ACK if requested or required. */
1818 if (psn & (1 << 31)) {
1819 /*
1820 * Coalesce ACKs unless there is a RDMA READ or
1821 * ATOMIC pending.
1822 */
1823 spin_lock(&qp->s_lock);
1824 if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
1825 qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST) {
1826 qp->s_ack_state = opcode;
1827 qp->s_nak_state = 0;
1828 qp->s_ack_psn = psn;
1829 qp->s_ack_atomic = qp->r_atomic_data;
1830 goto resched;
1831 }
1832 spin_unlock(&qp->s_lock);
1833 }
1834done:
1835 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1836 goto bail;
1837
1838resched:
1839 /*
1840 * Try to send ACK right away but not if ipath_do_rc_send() is
1841 * active.
1842 */
1843 if (qp->s_hdrwords == 0 &&
1844 (qp->s_ack_state < IB_OPCODE_RDMA_READ_REQUEST ||
1845 qp->s_ack_state >= IB_OPCODE_COMPARE_SWAP))
1846 send_rc_ack(qp);
1847
1848rdmadone:
1849 spin_unlock(&qp->s_lock);
1850 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1851
1852 /* Call ipath_do_rc_send() in another thread. */
1853 tasklet_hi_schedule(&qp->s_task);
1854
1855bail:
1856 return;
1857}
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
new file mode 100644
index 000000000000..1e59750c5f63
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -0,0 +1,446 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef _IPATH_REGISTERS_H
34#define _IPATH_REGISTERS_H
35
36/*
37 * This file should only be included by kernel source, and by the diags.
38 * It defines the registers, and their contents, for the InfiniPath HT-400 chip
39 */
40
41/*
42 * These are the InfiniPath register and buffer bit definitions,
43 * that are visible to software, and needed only by the kernel
44 * and diag code. A few, that are visible to protocol and user
45 * code are in ipath_common.h. Some bits are specific
46 * to a given chip implementation, and have been moved to the
47 * chip-specific source file
48 */
49
50/* kr_revision bits */
51#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF
52#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0
53#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF
54#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8
55#define INFINIPATH_R_ARCH_MASK 0xFF
56#define INFINIPATH_R_ARCH_SHIFT 16
57#define INFINIPATH_R_SOFTWARE_MASK 0xFF
58#define INFINIPATH_R_SOFTWARE_SHIFT 24
59#define INFINIPATH_R_BOARDID_MASK 0xFF
60#define INFINIPATH_R_BOARDID_SHIFT 32
61
62/* kr_control bits */
63#define INFINIPATH_C_FREEZEMODE 0x00000002
64#define INFINIPATH_C_LINKENABLE 0x00000004
65#define INFINIPATH_C_RESET 0x00000001
66
67/* kr_sendctrl bits */
68#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
69
70#define IPATH_S_ABORT 0
71#define IPATH_S_PIOINTBUFAVAIL 1
72#define IPATH_S_PIOBUFAVAILUPD 2
73#define IPATH_S_PIOENABLE 3
74#define IPATH_S_DISARM 31
75
76#define INFINIPATH_S_ABORT (1U << IPATH_S_ABORT)
77#define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL)
78#define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD)
79#define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE)
80#define INFINIPATH_S_DISARM (1U << IPATH_S_DISARM)
81
82/* kr_rcvctrl bits */
83#define INFINIPATH_R_PORTENABLE_SHIFT 0
84#define INFINIPATH_R_INTRAVAIL_SHIFT 16
85#define INFINIPATH_R_TAILUPD 0x80000000
86
87/* kr_intstatus, kr_intclear, kr_intmask bits */
88#define INFINIPATH_I_RCVURG_SHIFT 0
89#define INFINIPATH_I_RCVAVAIL_SHIFT 12
90#define INFINIPATH_I_ERROR 0x80000000
91#define INFINIPATH_I_SPIOSENT 0x40000000
92#define INFINIPATH_I_SPIOBUFAVAIL 0x20000000
93#define INFINIPATH_I_GPIO 0x10000000
94
95/* kr_errorstatus, kr_errorclear, kr_errormask bits */
96#define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL
97#define INFINIPATH_E_RVCRC 0x0000000000000002ULL
98#define INFINIPATH_E_RICRC 0x0000000000000004ULL
99#define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL
100#define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL
101#define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL
102#define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL
103#define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL
104#define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL
105#define INFINIPATH_E_REBP 0x0000000000000200ULL
106#define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL
107#define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL
108#define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL
109#define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL
110#define INFINIPATH_E_RBADTID 0x0000000000004000ULL
111#define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL
112#define INFINIPATH_E_RHDR 0x0000000000010000ULL
113#define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL
114#define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL
115#define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL
116#define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL
117#define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL
118#define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL
119#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL
120#define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL
121#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL
122#define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL
123#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL
124#define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL
125#define INFINIPATH_E_RESET 0x0004000000000000ULL
126#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
127
128/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
129/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
130 * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID
131 * bit 4: flag buffer, 5: datainfo, 6: header info */
132#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
133#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
134#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
135#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
136#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
137#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
138#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
139#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
140
141/* kr_hwdiagctrl bits */
142#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
143#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40
144#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL
145#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44
146#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR 0x0000000400000000ULL
147#define INFINIPATH_DC_COUNTERDISABLE 0x1000000000000000ULL
148#define INFINIPATH_DC_COUNTERWREN 0x2000000000000000ULL
149#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL
150#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL
151
152/* kr_ibcctrl bits */
153#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL
154#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0
155#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL
156#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
157#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
158#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
159#define INFINIPATH_IBCC_LINKINITCMD_POLL 2 /* cycle through TS1/TS2 till OK */
160#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3 /* wait for TS1, then go on */
161#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
162#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
163#define INFINIPATH_IBCC_LINKCMD_INIT 1 /* move to 0x11 */
164#define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */
165#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */
166#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
167#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
168#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20
169#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL
170#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32
171#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL
172#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36
173#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL
174#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40
175#define INFINIPATH_IBCC_LOOPBACK 0x8000000000000000ULL
176#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
177
178/* kr_ibcstatus bits */
179#define INFINIPATH_IBCS_LINKTRAININGSTATE_MASK 0xF
180#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
181#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
182#define INFINIPATH_IBCS_LINKSTATE_SHIFT 4
183#define INFINIPATH_IBCS_TXREADY 0x40000000
184#define INFINIPATH_IBCS_TXCREDITOK 0x80000000
185/* link training states (shift by INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
186#define INFINIPATH_IBCS_LT_STATE_DISABLED 0x00
187#define INFINIPATH_IBCS_LT_STATE_LINKUP 0x01
188#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE 0x02
189#define INFINIPATH_IBCS_LT_STATE_POLLQUIET 0x03
190#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY 0x04
191#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET 0x05
192#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE 0x08
193#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG 0x09
194#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT 0x0a
195#define INFINIPATH_IBCS_LT_STATE_CFGIDLE 0x0b
196#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c
197#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e
198#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f
199/* link state machine states (shift by INFINIPATH_IBCS_LINKSTATE_SHIFT) */
200#define INFINIPATH_IBCS_L_STATE_DOWN 0x0
201#define INFINIPATH_IBCS_L_STATE_INIT 0x1
202#define INFINIPATH_IBCS_L_STATE_ARM 0x2
203#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3
204#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4
205
206/* combination link status states that we use with some frequency */
207#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
208 << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
209 (INFINIPATH_IBCS_LINKSTATE_MASK \
210 <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
211#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
212 << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
213 (INFINIPATH_IBCS_LT_STATE_LINKUP \
214 <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
215#define IPATH_IBSTATE_ARM ((INFINIPATH_IBCS_L_STATE_ARM \
216 << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
217 (INFINIPATH_IBCS_LT_STATE_LINKUP \
218 <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
219#define IPATH_IBSTATE_ACTIVE ((INFINIPATH_IBCS_L_STATE_ACTIVE \
220 << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
221 (INFINIPATH_IBCS_LT_STATE_LINKUP \
222 <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
223
224/* kr_extstatus bits */
225#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
226#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL
227#define INFINIPATH_EXTS_GPIOIN_SHIFT 48
228
229/* kr_extctrl bits */
230#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL
231#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32
232#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL
233#define INFINIPATH_EXTC_GPIOOE_SHIFT 48
234#define INFINIPATH_EXTC_SERDESENABLE 0x80000000ULL
235#define INFINIPATH_EXTC_SERDESCONNECT 0x40000000ULL
236#define INFINIPATH_EXTC_SERDESENTRUNKING 0x20000000ULL
237#define INFINIPATH_EXTC_SERDESDISRXFIFO 0x10000000ULL
238#define INFINIPATH_EXTC_SERDESENPLPBK1 0x08000000ULL
239#define INFINIPATH_EXTC_SERDESENPLPBK2 0x04000000ULL
240#define INFINIPATH_EXTC_SERDESENENCDEC 0x02000000ULL
241#define INFINIPATH_EXTC_LED1SECPORT_ON 0x00000020ULL
242#define INFINIPATH_EXTC_LED2SECPORT_ON 0x00000010ULL
243#define INFINIPATH_EXTC_LED1PRIPORT_ON 0x00000008ULL
244#define INFINIPATH_EXTC_LED2PRIPORT_ON 0x00000004ULL
245#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL
246#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL
247
248/* kr_mdio bits */
249#define INFINIPATH_MDIO_CLKDIV_MASK 0x7FULL
250#define INFINIPATH_MDIO_CLKDIV_SHIFT 32
251#define INFINIPATH_MDIO_COMMAND_MASK 0x7ULL
252#define INFINIPATH_MDIO_COMMAND_SHIFT 26
253#define INFINIPATH_MDIO_DEVADDR_MASK 0x1FULL
254#define INFINIPATH_MDIO_DEVADDR_SHIFT 21
255#define INFINIPATH_MDIO_REGADDR_MASK 0x1FULL
256#define INFINIPATH_MDIO_REGADDR_SHIFT 16
257#define INFINIPATH_MDIO_DATA_MASK 0xFFFFULL
258#define INFINIPATH_MDIO_DATA_SHIFT 0
259#define INFINIPATH_MDIO_CMDVALID 0x0000000040000000ULL
260#define INFINIPATH_MDIO_RDDATAVALID 0x0000000080000000ULL
261
262/* kr_partitionkey bits */
263#define INFINIPATH_PKEY_SIZE 16
264#define INFINIPATH_PKEY_MASK 0xFFFF
265#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF
266
267/* kr_serdesconfig0 bits */
268#define INFINIPATH_SERDC0_RESET_MASK 0xfULL /* overal reset bits */
269#define INFINIPATH_SERDC0_RESET_PLL 0x10000000ULL /* pll reset */
270#define INFINIPATH_SERDC0_TXIDLE 0xF000ULL /* tx idle enables (per lane) */
271#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL /* rx detect enables (per lane) */
272#define INFINIPATH_SERDC0_L1PWR_DN 0xF0ULL /* L1 Power down; use with RXDETECT,
273 Otherwise not used on IB side */
274
275/* kr_xgxsconfig bits */
276#define INFINIPATH_XGXS_RESET 0x7ULL
277#define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL
278#define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
279
280#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
281
282/* TID entries (memory), HT400-only */
283#define INFINIPATH_RT_VALID 0x8000000000000000ULL
284#define INFINIPATH_RT_ADDR_SHIFT 0
285#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
286#define INFINIPATH_RT_BUFSIZE_SHIFT 48
287
288/*
289 * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
290 * PIO send buffers. This is well beyond anything currently
291 * defined in the InfiniBand spec.
292 */
293#define IPATH_PIO_MAXIBHDR 128
294
295typedef u64 ipath_err_t;
296
297/* mask of defined bits for various registers */
298extern u64 infinipath_i_bitsextant;
299extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
300
301/* masks that are different in various chips, or only exist in some chips */
302extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
303
304/*
305 * register bits for selecting i2c direction and values, used for I2C serial
306 * flash
307 */
308extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
309extern u64 ipath_gpio_sda, ipath_gpio_scl;
310
311/*
312 * These are the infinipath general register numbers (not offsets).
313 * The kernel registers are used directly, those beyond the kernel
314 * registers are calculated from one of the base registers. The use of
315 * an integer type doesn't allow type-checking as thorough as, say,
316 * an enum but allows for better hiding of chip differences.
317 */
318typedef const u16 ipath_kreg, /* infinipath general registers */
319 ipath_creg, /* infinipath counter registers */
320 ipath_sreg; /* kernel-only, infinipath send registers */
321
322/*
323 * These are the chip registers common to all infinipath chips, and
324 * used both by the kernel and the diagnostics or other user code.
325 * They are all implemented such that 64 bit accesses work.
326 * Some implement no more than 32 bits. Because 64 bit reads
327 * require 2 HT cmds on opteron, we access those with 32 bit
328 * reads for efficiency (they are written as 64 bits, since
329 * the extra 32 bits are nearly free on writes, and it slightly reduces
330 * complexity). The rest are all accessed as 64 bits.
331 */
332struct ipath_kregs {
333 /* These are the 32 bit group */
334 ipath_kreg kr_control;
335 ipath_kreg kr_counterregbase;
336 ipath_kreg kr_intmask;
337 ipath_kreg kr_intstatus;
338 ipath_kreg kr_pagealign;
339 ipath_kreg kr_portcnt;
340 ipath_kreg kr_rcvtidbase;
341 ipath_kreg kr_rcvtidcnt;
342 ipath_kreg kr_rcvegrbase;
343 ipath_kreg kr_rcvegrcnt;
344 ipath_kreg kr_scratch;
345 ipath_kreg kr_sendctrl;
346 ipath_kreg kr_sendpiobufbase;
347 ipath_kreg kr_sendpiobufcnt;
348 ipath_kreg kr_sendpiosize;
349 ipath_kreg kr_sendregbase;
350 ipath_kreg kr_userregbase;
351 /* These are the 64 bit group */
352 ipath_kreg kr_debugport;
353 ipath_kreg kr_debugportselect;
354 ipath_kreg kr_errorclear;
355 ipath_kreg kr_errormask;
356 ipath_kreg kr_errorstatus;
357 ipath_kreg kr_extctrl;
358 ipath_kreg kr_extstatus;
359 ipath_kreg kr_gpio_clear;
360 ipath_kreg kr_gpio_mask;
361 ipath_kreg kr_gpio_out;
362 ipath_kreg kr_gpio_status;
363 ipath_kreg kr_hwdiagctrl;
364 ipath_kreg kr_hwerrclear;
365 ipath_kreg kr_hwerrmask;
366 ipath_kreg kr_hwerrstatus;
367 ipath_kreg kr_ibcctrl;
368 ipath_kreg kr_ibcstatus;
369 ipath_kreg kr_intblocked;
370 ipath_kreg kr_intclear;
371 ipath_kreg kr_interruptconfig;
372 ipath_kreg kr_mdio;
373 ipath_kreg kr_partitionkey;
374 ipath_kreg kr_rcvbthqp;
375 ipath_kreg kr_rcvbufbase;
376 ipath_kreg kr_rcvbufsize;
377 ipath_kreg kr_rcvctrl;
378 ipath_kreg kr_rcvhdrcnt;
379 ipath_kreg kr_rcvhdrentsize;
380 ipath_kreg kr_rcvhdrsize;
381 ipath_kreg kr_rcvintmembase;
382 ipath_kreg kr_rcvintmemsize;
383 ipath_kreg kr_revision;
384 ipath_kreg kr_sendbuffererror;
385 ipath_kreg kr_sendpioavailaddr;
386 ipath_kreg kr_serdesconfig0;
387 ipath_kreg kr_serdesconfig1;
388 ipath_kreg kr_serdesstatus;
389 ipath_kreg kr_txintmembase;
390 ipath_kreg kr_txintmemsize;
391 ipath_kreg kr_xgxsconfig;
392 ipath_kreg kr_ibpllcfg;
393 /* use these two (and the following N ports) only with ipath_k*_kreg64_port();
394 * not *kreg64() */
395 ipath_kreg kr_rcvhdraddr;
396 ipath_kreg kr_rcvhdrtailaddr;
397
398 /* remaining registers are not present on all types of infinipath chips */
399 ipath_kreg kr_rcvpktledcnt;
400 ipath_kreg kr_pcierbuftestreg0;
401 ipath_kreg kr_pcierbuftestreg1;
402 ipath_kreg kr_pcieq0serdesconfig0;
403 ipath_kreg kr_pcieq0serdesconfig1;
404 ipath_kreg kr_pcieq0serdesstatus;
405 ipath_kreg kr_pcieq1serdesconfig0;
406 ipath_kreg kr_pcieq1serdesconfig1;
407 ipath_kreg kr_pcieq1serdesstatus;
408};
409
410struct ipath_cregs {
411 ipath_creg cr_badformatcnt;
412 ipath_creg cr_erricrccnt;
413 ipath_creg cr_errlinkcnt;
414 ipath_creg cr_errlpcrccnt;
415 ipath_creg cr_errpkey;
416 ipath_creg cr_errrcvflowctrlcnt;
417 ipath_creg cr_err_rlencnt;
418 ipath_creg cr_errslencnt;
419 ipath_creg cr_errtidfull;
420 ipath_creg cr_errtidvalid;
421 ipath_creg cr_errvcrccnt;
422 ipath_creg cr_ibstatuschange;
423 ipath_creg cr_intcnt;
424 ipath_creg cr_invalidrlencnt;
425 ipath_creg cr_invalidslencnt;
426 ipath_creg cr_lbflowstallcnt;
427 ipath_creg cr_iblinkdowncnt;
428 ipath_creg cr_iblinkerrrecovcnt;
429 ipath_creg cr_ibsymbolerrcnt;
430 ipath_creg cr_pktrcvcnt;
431 ipath_creg cr_pktrcvflowctrlcnt;
432 ipath_creg cr_pktsendcnt;
433 ipath_creg cr_pktsendflowcnt;
434 ipath_creg cr_portovflcnt;
435 ipath_creg cr_rcvebpcnt;
436 ipath_creg cr_rcvovflcnt;
437 ipath_creg cr_rxdroppktcnt;
438 ipath_creg cr_senddropped;
439 ipath_creg cr_sendstallcnt;
440 ipath_creg cr_sendunderruncnt;
441 ipath_creg cr_unsupvlcnt;
442 ipath_creg cr_wordrcvcnt;
443 ipath_creg cr_wordsendcnt;
444};
445
446#endif /* _IPATH_REGISTERS_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
new file mode 100644
index 000000000000..f232e77b78ee
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -0,0 +1,552 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "ipath_verbs.h"
34
35/*
36 * Convert the AETH RNR timeout code into the number of milliseconds.
37 */
38const u32 ib_ipath_rnr_table[32] = {
39 656, /* 0 */
40 1, /* 1 */
41 1, /* 2 */
42 1, /* 3 */
43 1, /* 4 */
44 1, /* 5 */
45 1, /* 6 */
46 1, /* 7 */
47 1, /* 8 */
48 1, /* 9 */
49 1, /* A */
50 1, /* B */
51 1, /* C */
52 1, /* D */
53 2, /* E */
54 2, /* F */
55 3, /* 10 */
56 4, /* 11 */
57 6, /* 12 */
58 8, /* 13 */
59 11, /* 14 */
60 16, /* 15 */
61 21, /* 16 */
62 31, /* 17 */
63 41, /* 18 */
64 62, /* 19 */
65 82, /* 1A */
66 123, /* 1B */
67 164, /* 1C */
68 246, /* 1D */
69 328, /* 1E */
70 492 /* 1F */
71};
72
73/**
74 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
75 * @qp: the QP
76 *
77 * XXX Use a simple list for now. We might need a priority
78 * queue if we have lots of QPs waiting for RNR timeouts
79 * but that should be rare.
80 */
81void ipath_insert_rnr_queue(struct ipath_qp *qp)
82{
83 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
84 unsigned long flags;
85
86 spin_lock_irqsave(&dev->pending_lock, flags);
87 if (list_empty(&dev->rnrwait))
88 list_add(&qp->timerwait, &dev->rnrwait);
89 else {
90 struct list_head *l = &dev->rnrwait;
91 struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
92 timerwait);
93
94 while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
95 qp->s_rnr_timeout -= nqp->s_rnr_timeout;
96 l = l->next;
97 if (l->next == &dev->rnrwait)
98 break;
99 nqp = list_entry(l->next, struct ipath_qp,
100 timerwait);
101 }
102 list_add(&qp->timerwait, l);
103 }
104 spin_unlock_irqrestore(&dev->pending_lock, flags);
105}
106
107/**
108 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
109 * @qp: the QP
110 * @wr_id_only: update wr_id only, not SGEs
111 *
112 * Return 0 if no RWQE is available, otherwise return 1.
113 *
114 * Called at interrupt level with the QP r_rq.lock held.
115 */
116int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
117{
118 struct ipath_rq *rq;
119 struct ipath_srq *srq;
120 struct ipath_rwqe *wqe;
121 int ret;
122
123 if (!qp->ibqp.srq) {
124 rq = &qp->r_rq;
125 if (unlikely(rq->tail == rq->head)) {
126 ret = 0;
127 goto bail;
128 }
129 wqe = get_rwqe_ptr(rq, rq->tail);
130 qp->r_wr_id = wqe->wr_id;
131 if (!wr_id_only) {
132 qp->r_sge.sge = wqe->sg_list[0];
133 qp->r_sge.sg_list = wqe->sg_list + 1;
134 qp->r_sge.num_sge = wqe->num_sge;
135 qp->r_len = wqe->length;
136 }
137 if (++rq->tail >= rq->size)
138 rq->tail = 0;
139 ret = 1;
140 goto bail;
141 }
142
143 srq = to_isrq(qp->ibqp.srq);
144 rq = &srq->rq;
145 spin_lock(&rq->lock);
146 if (unlikely(rq->tail == rq->head)) {
147 spin_unlock(&rq->lock);
148 ret = 0;
149 goto bail;
150 }
151 wqe = get_rwqe_ptr(rq, rq->tail);
152 qp->r_wr_id = wqe->wr_id;
153 if (!wr_id_only) {
154 qp->r_sge.sge = wqe->sg_list[0];
155 qp->r_sge.sg_list = wqe->sg_list + 1;
156 qp->r_sge.num_sge = wqe->num_sge;
157 qp->r_len = wqe->length;
158 }
159 if (++rq->tail >= rq->size)
160 rq->tail = 0;
161 if (srq->ibsrq.event_handler) {
162 struct ib_event ev;
163 u32 n;
164
165 if (rq->head < rq->tail)
166 n = rq->size + rq->head - rq->tail;
167 else
168 n = rq->head - rq->tail;
169 if (n < srq->limit) {
170 srq->limit = 0;
171 spin_unlock(&rq->lock);
172 ev.device = qp->ibqp.device;
173 ev.element.srq = qp->ibqp.srq;
174 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
175 srq->ibsrq.event_handler(&ev,
176 srq->ibsrq.srq_context);
177 } else
178 spin_unlock(&rq->lock);
179 } else
180 spin_unlock(&rq->lock);
181 ret = 1;
182
183bail:
184 return ret;
185}
186
187/**
188 * ipath_ruc_loopback - handle UC and RC lookback requests
189 * @sqp: the loopback QP
190 * @wc: the work completion entry
191 *
192 * This is called from ipath_do_uc_send() or ipath_do_rc_send() to
193 * forward a WQE addressed to the same HCA.
194 * Note that although we are single threaded due to the tasklet, we still
195 * have to protect against post_send(). We don't have to worry about
196 * receive interrupts since this is a connected protocol and all packets
197 * will pass through here.
198 */
199void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc)
200{
201 struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
202 struct ipath_qp *qp;
203 struct ipath_swqe *wqe;
204 struct ipath_sge *sge;
205 unsigned long flags;
206 u64 sdata;
207
208 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
209 if (!qp) {
210 dev->n_pkt_drops++;
211 return;
212 }
213
214again:
215 spin_lock_irqsave(&sqp->s_lock, flags);
216
217 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) {
218 spin_unlock_irqrestore(&sqp->s_lock, flags);
219 goto done;
220 }
221
222 /* Get the next send request. */
223 if (sqp->s_last == sqp->s_head) {
224 /* Send work queue is empty. */
225 spin_unlock_irqrestore(&sqp->s_lock, flags);
226 goto done;
227 }
228
229 /*
230 * We can rely on the entry not changing without the s_lock
231 * being held until we update s_last.
232 */
233 wqe = get_swqe_ptr(sqp, sqp->s_last);
234 spin_unlock_irqrestore(&sqp->s_lock, flags);
235
236 wc->wc_flags = 0;
237 wc->imm_data = 0;
238
239 sqp->s_sge.sge = wqe->sg_list[0];
240 sqp->s_sge.sg_list = wqe->sg_list + 1;
241 sqp->s_sge.num_sge = wqe->wr.num_sge;
242 sqp->s_len = wqe->length;
243 switch (wqe->wr.opcode) {
244 case IB_WR_SEND_WITH_IMM:
245 wc->wc_flags = IB_WC_WITH_IMM;
246 wc->imm_data = wqe->wr.imm_data;
247 /* FALLTHROUGH */
248 case IB_WR_SEND:
249 spin_lock_irqsave(&qp->r_rq.lock, flags);
250 if (!ipath_get_rwqe(qp, 0)) {
251 rnr_nak:
252 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
253 /* Handle RNR NAK */
254 if (qp->ibqp.qp_type == IB_QPT_UC)
255 goto send_comp;
256 if (sqp->s_rnr_retry == 0) {
257 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
258 goto err;
259 }
260 if (sqp->s_rnr_retry_cnt < 7)
261 sqp->s_rnr_retry--;
262 dev->n_rnr_naks++;
263 sqp->s_rnr_timeout =
264 ib_ipath_rnr_table[sqp->s_min_rnr_timer];
265 ipath_insert_rnr_queue(sqp);
266 goto done;
267 }
268 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
269 break;
270
271 case IB_WR_RDMA_WRITE_WITH_IMM:
272 wc->wc_flags = IB_WC_WITH_IMM;
273 wc->imm_data = wqe->wr.imm_data;
274 spin_lock_irqsave(&qp->r_rq.lock, flags);
275 if (!ipath_get_rwqe(qp, 1))
276 goto rnr_nak;
277 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
278 /* FALLTHROUGH */
279 case IB_WR_RDMA_WRITE:
280 if (wqe->length == 0)
281 break;
282 if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
283 wqe->wr.wr.rdma.remote_addr,
284 wqe->wr.wr.rdma.rkey,
285 IB_ACCESS_REMOTE_WRITE))) {
286 acc_err:
287 wc->status = IB_WC_REM_ACCESS_ERR;
288 err:
289 wc->wr_id = wqe->wr.wr_id;
290 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
291 wc->vendor_err = 0;
292 wc->byte_len = 0;
293 wc->qp_num = sqp->ibqp.qp_num;
294 wc->src_qp = sqp->remote_qpn;
295 wc->pkey_index = 0;
296 wc->slid = sqp->remote_ah_attr.dlid;
297 wc->sl = sqp->remote_ah_attr.sl;
298 wc->dlid_path_bits = 0;
299 wc->port_num = 0;
300 ipath_sqerror_qp(sqp, wc);
301 goto done;
302 }
303 break;
304
305 case IB_WR_RDMA_READ:
306 if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
307 wqe->wr.wr.rdma.remote_addr,
308 wqe->wr.wr.rdma.rkey,
309 IB_ACCESS_REMOTE_READ)))
310 goto acc_err;
311 if (unlikely(!(qp->qp_access_flags &
312 IB_ACCESS_REMOTE_READ)))
313 goto acc_err;
314 qp->r_sge.sge = wqe->sg_list[0];
315 qp->r_sge.sg_list = wqe->sg_list + 1;
316 qp->r_sge.num_sge = wqe->wr.num_sge;
317 break;
318
319 case IB_WR_ATOMIC_CMP_AND_SWP:
320 case IB_WR_ATOMIC_FETCH_AND_ADD:
321 if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
322 wqe->wr.wr.rdma.remote_addr,
323 wqe->wr.wr.rdma.rkey,
324 IB_ACCESS_REMOTE_ATOMIC)))
325 goto acc_err;
326 /* Perform atomic OP and save result. */
327 sdata = wqe->wr.wr.atomic.swap;
328 spin_lock_irqsave(&dev->pending_lock, flags);
329 qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
330 if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
331 *(u64 *) qp->r_sge.sge.vaddr =
332 qp->r_atomic_data + sdata;
333 else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add)
334 *(u64 *) qp->r_sge.sge.vaddr = sdata;
335 spin_unlock_irqrestore(&dev->pending_lock, flags);
336 *(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
337 goto send_comp;
338
339 default:
340 goto done;
341 }
342
343 sge = &sqp->s_sge.sge;
344 while (sqp->s_len) {
345 u32 len = sqp->s_len;
346
347 if (len > sge->length)
348 len = sge->length;
349 BUG_ON(len == 0);
350 ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
351 sge->vaddr += len;
352 sge->length -= len;
353 sge->sge_length -= len;
354 if (sge->sge_length == 0) {
355 if (--sqp->s_sge.num_sge)
356 *sge = *sqp->s_sge.sg_list++;
357 } else if (sge->length == 0 && sge->mr != NULL) {
358 if (++sge->n >= IPATH_SEGSZ) {
359 if (++sge->m >= sge->mr->mapsz)
360 break;
361 sge->n = 0;
362 }
363 sge->vaddr =
364 sge->mr->map[sge->m]->segs[sge->n].vaddr;
365 sge->length =
366 sge->mr->map[sge->m]->segs[sge->n].length;
367 }
368 sqp->s_len -= len;
369 }
370
371 if (wqe->wr.opcode == IB_WR_RDMA_WRITE ||
372 wqe->wr.opcode == IB_WR_RDMA_READ)
373 goto send_comp;
374
375 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
376 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
377 else
378 wc->opcode = IB_WC_RECV;
379 wc->wr_id = qp->r_wr_id;
380 wc->status = IB_WC_SUCCESS;
381 wc->vendor_err = 0;
382 wc->byte_len = wqe->length;
383 wc->qp_num = qp->ibqp.qp_num;
384 wc->src_qp = qp->remote_qpn;
385 /* XXX do we know which pkey matched? Only needed for GSI. */
386 wc->pkey_index = 0;
387 wc->slid = qp->remote_ah_attr.dlid;
388 wc->sl = qp->remote_ah_attr.sl;
389 wc->dlid_path_bits = 0;
390 /* Signal completion event if the solicited bit is set. */
391 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
392 wqe->wr.send_flags & IB_SEND_SOLICITED);
393
394send_comp:
395 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
396
397 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
398 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
399 wc->wr_id = wqe->wr.wr_id;
400 wc->status = IB_WC_SUCCESS;
401 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
402 wc->vendor_err = 0;
403 wc->byte_len = wqe->length;
404 wc->qp_num = sqp->ibqp.qp_num;
405 wc->src_qp = 0;
406 wc->pkey_index = 0;
407 wc->slid = 0;
408 wc->sl = 0;
409 wc->dlid_path_bits = 0;
410 wc->port_num = 0;
411 ipath_cq_enter(to_icq(sqp->ibqp.send_cq), wc, 0);
412 }
413
414 /* Update s_last now that we are finished with the SWQE */
415 spin_lock_irqsave(&sqp->s_lock, flags);
416 if (++sqp->s_last >= sqp->s_size)
417 sqp->s_last = 0;
418 spin_unlock_irqrestore(&sqp->s_lock, flags);
419 goto again;
420
421done:
422 if (atomic_dec_and_test(&qp->refcount))
423 wake_up(&qp->wait);
424}
425
426/**
427 * ipath_no_bufs_available - tell the layer driver we need buffers
428 * @qp: the QP that caused the problem
429 * @dev: the device we ran out of buffers on
430 *
431 * Called when we run out of PIO buffers.
432 */
433void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
434{
435 unsigned long flags;
436
437 spin_lock_irqsave(&dev->pending_lock, flags);
438 if (qp->piowait.next == LIST_POISON1)
439 list_add_tail(&qp->piowait, &dev->piowait);
440 spin_unlock_irqrestore(&dev->pending_lock, flags);
441 /*
442 * Note that as soon as ipath_layer_want_buffer() is called and
443 * possibly before it returns, ipath_ib_piobufavail()
444 * could be called. If we are still in the tasklet function,
445 * tasklet_hi_schedule() will not call us until the next time
446 * tasklet_hi_schedule() is called.
447 * We clear the tasklet flag now since we are committing to return
448 * from the tasklet function.
449 */
450 clear_bit(IPATH_S_BUSY, &qp->s_flags);
451 tasklet_unlock(&qp->s_task);
452 ipath_layer_want_buffer(dev->dd);
453 dev->n_piowait++;
454}
455
456/**
457 * ipath_post_rc_send - post RC and UC sends
458 * @qp: the QP to post on
459 * @wr: the work request to send
460 */
461int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
462{
463 struct ipath_swqe *wqe;
464 unsigned long flags;
465 u32 next;
466 int i, j;
467 int acc;
468 int ret;
469
470 /*
471 * Don't allow RDMA reads or atomic operations on UC or
472 * undefined operations.
473 * Make sure buffer is large enough to hold the result for atomics.
474 */
475 if (qp->ibqp.qp_type == IB_QPT_UC) {
476 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) {
477 ret = -EINVAL;
478 goto bail;
479 }
480 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
481 ret = -EINVAL;
482 goto bail;
483 } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
484 (wr->num_sge == 0 ||
485 wr->sg_list[0].length < sizeof(u64) ||
486 wr->sg_list[0].addr & (sizeof(u64) - 1))) {
487 ret = -EINVAL;
488 goto bail;
489 }
490 /* IB spec says that num_sge == 0 is OK. */
491 if (wr->num_sge > qp->s_max_sge) {
492 ret = -ENOMEM;
493 goto bail;
494 }
495 spin_lock_irqsave(&qp->s_lock, flags);
496 next = qp->s_head + 1;
497 if (next >= qp->s_size)
498 next = 0;
499 if (next == qp->s_last) {
500 spin_unlock_irqrestore(&qp->s_lock, flags);
501 ret = -EINVAL;
502 goto bail;
503 }
504
505 wqe = get_swqe_ptr(qp, qp->s_head);
506 wqe->wr = *wr;
507 wqe->ssn = qp->s_ssn++;
508 wqe->sg_list[0].mr = NULL;
509 wqe->sg_list[0].vaddr = NULL;
510 wqe->sg_list[0].length = 0;
511 wqe->sg_list[0].sge_length = 0;
512 wqe->length = 0;
513 acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0;
514 for (i = 0, j = 0; i < wr->num_sge; i++) {
515 if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
516 spin_unlock_irqrestore(&qp->s_lock, flags);
517 ret = -EINVAL;
518 goto bail;
519 }
520 if (wr->sg_list[i].length == 0)
521 continue;
522 if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
523 &wqe->sg_list[j], &wr->sg_list[i],
524 acc)) {
525 spin_unlock_irqrestore(&qp->s_lock, flags);
526 ret = -EINVAL;
527 goto bail;
528 }
529 wqe->length += wr->sg_list[i].length;
530 j++;
531 }
532 wqe->wr.num_sge = j;
533 qp->s_head = next;
534 /*
535 * Wake up the send tasklet if the QP is not waiting
536 * for an RNR timeout.
537 */
538 next = qp->s_rnr_timeout;
539 spin_unlock_irqrestore(&qp->s_lock, flags);
540
541 if (next == 0) {
542 if (qp->ibqp.qp_type == IB_QPT_UC)
543 ipath_do_uc_send((unsigned long) qp);
544 else
545 ipath_do_rc_send((unsigned long) qp);
546 }
547
548 ret = 0;
549
550bail:
551 return ret;
552}
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
new file mode 100644
index 000000000000..01c4c6c56118
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -0,0 +1,273 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/err.h>
34#include <linux/vmalloc.h>
35
36#include "ipath_verbs.h"
37
38/**
39 * ipath_post_srq_receive - post a receive on a shared receive queue
40 * @ibsrq: the SRQ to post the receive on
41 * @wr: the list of work requests to post
42 * @bad_wr: the first WR to cause a problem is put here
43 *
44 * This may be called from interrupt context.
45 */
46int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
47 struct ib_recv_wr **bad_wr)
48{
49 struct ipath_srq *srq = to_isrq(ibsrq);
50 struct ipath_ibdev *dev = to_idev(ibsrq->device);
51 unsigned long flags;
52 int ret;
53
54 for (; wr; wr = wr->next) {
55 struct ipath_rwqe *wqe;
56 u32 next;
57 int i, j;
58
59 if (wr->num_sge > srq->rq.max_sge) {
60 *bad_wr = wr;
61 ret = -ENOMEM;
62 goto bail;
63 }
64
65 spin_lock_irqsave(&srq->rq.lock, flags);
66 next = srq->rq.head + 1;
67 if (next >= srq->rq.size)
68 next = 0;
69 if (next == srq->rq.tail) {
70 spin_unlock_irqrestore(&srq->rq.lock, flags);
71 *bad_wr = wr;
72 ret = -ENOMEM;
73 goto bail;
74 }
75
76 wqe = get_rwqe_ptr(&srq->rq, srq->rq.head);
77 wqe->wr_id = wr->wr_id;
78 wqe->sg_list[0].mr = NULL;
79 wqe->sg_list[0].vaddr = NULL;
80 wqe->sg_list[0].length = 0;
81 wqe->sg_list[0].sge_length = 0;
82 wqe->length = 0;
83 for (i = 0, j = 0; i < wr->num_sge; i++) {
84 /* Check LKEY */
85 if (to_ipd(srq->ibsrq.pd)->user &&
86 wr->sg_list[i].lkey == 0) {
87 spin_unlock_irqrestore(&srq->rq.lock,
88 flags);
89 *bad_wr = wr;
90 ret = -EINVAL;
91 goto bail;
92 }
93 if (wr->sg_list[i].length == 0)
94 continue;
95 if (!ipath_lkey_ok(&dev->lk_table,
96 &wqe->sg_list[j],
97 &wr->sg_list[i],
98 IB_ACCESS_LOCAL_WRITE)) {
99 spin_unlock_irqrestore(&srq->rq.lock,
100 flags);
101 *bad_wr = wr;
102 ret = -EINVAL;
103 goto bail;
104 }
105 wqe->length += wr->sg_list[i].length;
106 j++;
107 }
108 wqe->num_sge = j;
109 srq->rq.head = next;
110 spin_unlock_irqrestore(&srq->rq.lock, flags);
111 }
112 ret = 0;
113
114bail:
115 return ret;
116}
117
118/**
119 * ipath_create_srq - create a shared receive queue
120 * @ibpd: the protection domain of the SRQ to create
121 * @attr: the attributes of the SRQ
122 * @udata: not used by the InfiniPath verbs driver
123 */
124struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
125 struct ib_srq_init_attr *srq_init_attr,
126 struct ib_udata *udata)
127{
128 struct ipath_srq *srq;
129 u32 sz;
130 struct ib_srq *ret;
131
132 if (srq_init_attr->attr.max_sge < 1) {
133 ret = ERR_PTR(-EINVAL);
134 goto bail;
135 }
136
137 srq = kmalloc(sizeof(*srq), GFP_KERNEL);
138 if (!srq) {
139 ret = ERR_PTR(-ENOMEM);
140 goto bail;
141 }
142
143 /*
144 * Need to use vmalloc() if we want to support large #s of entries.
145 */
146 srq->rq.size = srq_init_attr->attr.max_wr + 1;
147 sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge +
148 sizeof(struct ipath_rwqe);
149 srq->rq.wq = vmalloc(srq->rq.size * sz);
150 if (!srq->rq.wq) {
151 kfree(srq);
152 ret = ERR_PTR(-ENOMEM);
153 goto bail;
154 }
155
156 /*
157 * ib_create_srq() will initialize srq->ibsrq.
158 */
159 spin_lock_init(&srq->rq.lock);
160 srq->rq.head = 0;
161 srq->rq.tail = 0;
162 srq->rq.max_sge = srq_init_attr->attr.max_sge;
163 srq->limit = srq_init_attr->attr.srq_limit;
164
165 ret = &srq->ibsrq;
166
167bail:
168 return ret;
169}
170
171/**
172 * ipath_modify_srq - modify a shared receive queue
173 * @ibsrq: the SRQ to modify
174 * @attr: the new attributes of the SRQ
175 * @attr_mask: indicates which attributes to modify
176 */
177int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
178 enum ib_srq_attr_mask attr_mask)
179{
180 struct ipath_srq *srq = to_isrq(ibsrq);
181 unsigned long flags;
182 int ret;
183
184 if (attr_mask & IB_SRQ_LIMIT) {
185 spin_lock_irqsave(&srq->rq.lock, flags);
186 srq->limit = attr->srq_limit;
187 spin_unlock_irqrestore(&srq->rq.lock, flags);
188 }
189 if (attr_mask & IB_SRQ_MAX_WR) {
190 u32 size = attr->max_wr + 1;
191 struct ipath_rwqe *wq, *p;
192 u32 n;
193 u32 sz;
194
195 if (attr->max_sge < srq->rq.max_sge) {
196 ret = -EINVAL;
197 goto bail;
198 }
199
200 sz = sizeof(struct ipath_rwqe) +
201 attr->max_sge * sizeof(struct ipath_sge);
202 wq = vmalloc(size * sz);
203 if (!wq) {
204 ret = -ENOMEM;
205 goto bail;
206 }
207
208 spin_lock_irqsave(&srq->rq.lock, flags);
209 if (srq->rq.head < srq->rq.tail)
210 n = srq->rq.size + srq->rq.head - srq->rq.tail;
211 else
212 n = srq->rq.head - srq->rq.tail;
213 if (size <= n || size <= srq->limit) {
214 spin_unlock_irqrestore(&srq->rq.lock, flags);
215 vfree(wq);
216 ret = -EINVAL;
217 goto bail;
218 }
219 n = 0;
220 p = wq;
221 while (srq->rq.tail != srq->rq.head) {
222 struct ipath_rwqe *wqe;
223 int i;
224
225 wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail);
226 p->wr_id = wqe->wr_id;
227 p->length = wqe->length;
228 p->num_sge = wqe->num_sge;
229 for (i = 0; i < wqe->num_sge; i++)
230 p->sg_list[i] = wqe->sg_list[i];
231 n++;
232 p = (struct ipath_rwqe *)((char *) p + sz);
233 if (++srq->rq.tail >= srq->rq.size)
234 srq->rq.tail = 0;
235 }
236 vfree(srq->rq.wq);
237 srq->rq.wq = wq;
238 srq->rq.size = size;
239 srq->rq.head = n;
240 srq->rq.tail = 0;
241 srq->rq.max_sge = attr->max_sge;
242 spin_unlock_irqrestore(&srq->rq.lock, flags);
243 }
244
245 ret = 0;
246
247bail:
248 return ret;
249}
250
251int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
252{
253 struct ipath_srq *srq = to_isrq(ibsrq);
254
255 attr->max_wr = srq->rq.size - 1;
256 attr->max_sge = srq->rq.max_sge;
257 attr->srq_limit = srq->limit;
258 return 0;
259}
260
261/**
262 * ipath_destroy_srq - destroy a shared receive queue
263 * @ibsrq: the SRQ to destroy
264 */
265int ipath_destroy_srq(struct ib_srq *ibsrq)
266{
267 struct ipath_srq *srq = to_isrq(ibsrq);
268
269 vfree(srq->rq.wq);
270 kfree(srq);
271
272 return 0;
273}
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
new file mode 100644
index 000000000000..fe209137ee74
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -0,0 +1,303 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/pci.h>
34
35#include "ipath_kernel.h"
36
37struct infinipath_stats ipath_stats;
38
39/**
40 * ipath_snap_cntr - snapshot a chip counter
41 * @dd: the infinipath device
42 * @creg: the counter to snapshot
43 *
44 * called from add_timer and user counter read calls, to deal with
45 * counters that wrap in "human time". The words sent and received, and
46 * the packets sent and received are all that we worry about. For now,
47 * at least, we don't worry about error counters, because if they wrap
48 * that quickly, we probably don't care. We may eventually just make this
49 * handle all the counters. word counters can wrap in about 20 seconds
50 * of full bandwidth traffic, packet counters in a few hours.
51 */
52
53u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
54{
55 u32 val, reg64 = 0;
56 u64 val64;
57 unsigned long t0, t1;
58 u64 ret;
59
60 t0 = jiffies;
61 /* If fast increment counters are only 32 bits, snapshot them,
62 * and maintain them as 64bit values in the driver */
63 if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
64 (creg == dd->ipath_cregs->cr_wordsendcnt ||
65 creg == dd->ipath_cregs->cr_wordrcvcnt ||
66 creg == dd->ipath_cregs->cr_pktsendcnt ||
67 creg == dd->ipath_cregs->cr_pktrcvcnt)) {
68 val64 = ipath_read_creg(dd, creg);
69 val = val64 == ~0ULL ? ~0U : 0;
70 reg64 = 1;
71 } else /* val64 just to keep gcc quiet... */
72 val64 = val = ipath_read_creg32(dd, creg);
73 /*
74 * See if a second has passed. This is just a way to detect things
75 * that are quite broken. Normally this should take just a few
76 * cycles (the check is for long enough that we don't care if we get
77 * pre-empted.) An Opteron HT O read timeout is 4 seconds with
78 * normal NB values
79 */
80 t1 = jiffies;
81 if (time_before(t0 + HZ, t1) && val == -1) {
82 ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n",
83 creg);
84 ret = 0ULL;
85 goto bail;
86 }
87 if (reg64) {
88 ret = val64;
89 goto bail;
90 }
91
92 if (creg == dd->ipath_cregs->cr_wordsendcnt) {
93 if (val != dd->ipath_lastsword) {
94 dd->ipath_sword += val - dd->ipath_lastsword;
95 dd->ipath_lastsword = val;
96 }
97 val64 = dd->ipath_sword;
98 } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
99 if (val != dd->ipath_lastrword) {
100 dd->ipath_rword += val - dd->ipath_lastrword;
101 dd->ipath_lastrword = val;
102 }
103 val64 = dd->ipath_rword;
104 } else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
105 if (val != dd->ipath_lastspkts) {
106 dd->ipath_spkts += val - dd->ipath_lastspkts;
107 dd->ipath_lastspkts = val;
108 }
109 val64 = dd->ipath_spkts;
110 } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
111 if (val != dd->ipath_lastrpkts) {
112 dd->ipath_rpkts += val - dd->ipath_lastrpkts;
113 dd->ipath_lastrpkts = val;
114 }
115 val64 = dd->ipath_rpkts;
116 } else
117 val64 = (u64) val;
118
119 ret = val64;
120
121bail:
122 return ret;
123}
124
125/**
126 * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
127 * @dd: the infinipath device
128 *
129 * print the delta of egrfull/hdrqfull errors for kernel ports no more than
130 * every 5 seconds. User processes are printed at close, but kernel doesn't
131 * close, so... Separate routine so may call from other places someday, and
132 * so function name when printed by _IPATH_INFO is meaningfull
133 */
134static void ipath_qcheck(struct ipath_devdata *dd)
135{
136 static u64 last_tot_hdrqfull;
137 size_t blen = 0;
138 char buf[128];
139
140 *buf = 0;
141 if (dd->ipath_pd[0]->port_hdrqfull != dd->ipath_p0_hdrqfull) {
142 blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
143 dd->ipath_pd[0]->port_hdrqfull -
144 dd->ipath_p0_hdrqfull);
145 dd->ipath_p0_hdrqfull = dd->ipath_pd[0]->port_hdrqfull;
146 }
147 if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
148 blen += snprintf(buf + blen, sizeof buf - blen,
149 "%srcvegrfull %llu",
150 blen ? ", " : "",
151 (unsigned long long)
152 (ipath_stats.sps_etidfull -
153 dd->ipath_last_tidfull));
154 dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
155 }
156
157 /*
158 * this is actually the number of hdrq full interrupts, not actual
159 * events, but at the moment that's mostly what I'm interested in.
160 * Actual count, etc. is in the counters, if needed. For production
161 * users this won't ordinarily be printed.
162 */
163
164 if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
165 ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
166 blen += snprintf(buf + blen, sizeof buf - blen,
167 "%shdrqfull %llu (all ports)",
168 blen ? ", " : "",
169 (unsigned long long)
170 (ipath_stats.sps_hdrqfull -
171 last_tot_hdrqfull));
172 last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
173 }
174 if (blen)
175 ipath_dbg("%s\n", buf);
176
177 if (dd->ipath_port0head != (u32)
178 le64_to_cpu(*dd->ipath_hdrqtailptr)) {
179 if (dd->ipath_lastport0rcv_cnt ==
180 ipath_stats.sps_port0pkts) {
181 ipath_cdbg(PKT, "missing rcv interrupts? "
182 "port0 hd=%llx tl=%x; port0pkts %llx\n",
183 (unsigned long long)
184 le64_to_cpu(*dd->ipath_hdrqtailptr),
185 dd->ipath_port0head,
186 (unsigned long long)
187 ipath_stats.sps_port0pkts);
188 ipath_kreceive(dd);
189 }
190 dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
191 }
192}
193
194/**
195 * ipath_get_faststats - get word counters from chip before they overflow
196 * @opaque - contains a pointer to the infinipath device ipath_devdata
197 *
198 * called from add_timer
199 */
200void ipath_get_faststats(unsigned long opaque)
201{
202 struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
203 u32 val;
204 static unsigned cnt;
205
206 /*
207 * don't access the chip while running diags, or memory diags can
208 * fail
209 */
210 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) ||
211 ipath_diag_inuse)
212 /* but re-arm the timer, for diags case; won't hurt other */
213 goto done;
214
215 if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
216 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
217 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
218 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
219 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
220 }
221
222 ipath_qcheck(dd);
223
224 /*
225 * deal with repeat error suppression. Doesn't really matter if
226 * last error was almost a full interval ago, or just a few usecs
227 * ago; still won't get more than 2 per interval. We may want
228 * longer intervals for this eventually, could do with mod, counter
229 * or separate timer. Also see code in ipath_handle_errors() and
230 * ipath_handle_hwerrors().
231 */
232
233 if (dd->ipath_lasterror)
234 dd->ipath_lasterror = 0;
235 if (dd->ipath_lasthwerror)
236 dd->ipath_lasthwerror = 0;
237 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
238 && time_after(jiffies, dd->ipath_unmasktime)) {
239 char ebuf[256];
240 ipath_decode_err(ebuf, sizeof ebuf,
241 (dd->ipath_maskederrs & ~dd->
242 ipath_ignorederrs));
243 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
244 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
245 ipath_dev_err(dd, "Re-enabling masked errors "
246 "(%s)\n", ebuf);
247 else {
248 /*
249 * rcvegrfull and rcvhdrqfull are "normal", for some
250 * types of processes (mostly benchmarks) that send
251 * huge numbers of messages, while not processing
252 * them. So only complain about these at debug
253 * level.
254 */
255 ipath_dbg("Disabling frequent queue full errors "
256 "(%s)\n", ebuf);
257 }
258 dd->ipath_maskederrs = dd->ipath_ignorederrs;
259 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
260 ~dd->ipath_maskederrs);
261 }
262
263 /* limit qfull messages to ~one per minute per port */
264 if ((++cnt & 0x10)) {
265 for (val = dd->ipath_cfgports - 1; ((int)val) >= 0;
266 val--) {
267 if (dd->ipath_lastegrheads[val] != -1)
268 dd->ipath_lastegrheads[val] = -1;
269 if (dd->ipath_lastrcvhdrqtails[val] != -1)
270 dd->ipath_lastrcvhdrqtails[val] = -1;
271 }
272 }
273
274 if (dd->ipath_nosma_bufs) {
275 dd->ipath_nosma_secs += 5;
276 if (dd->ipath_nosma_secs >= 30) {
277 ipath_cdbg(SMA, "No SMA bufs avail %u seconds; "
278 "cancelling pending sends\n",
279 dd->ipath_nosma_secs);
280 /*
281 * issue an abort as well, in case we have a packet
282 * stuck in launch fifo. This could corrupt an
283 * outgoing user packet in the worst case,
284 * but this is a pretty catastrophic, anyway.
285 */
286 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
287 INFINIPATH_S_ABORT);
288 ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
289 dd->ipath_piobcnt2k +
290 dd->ipath_piobcnt4k -
291 dd->ipath_lastport_piobuf);
292 /* start again, if necessary */
293 dd->ipath_nosma_secs = 0;
294 } else
295 ipath_cdbg(SMA, "No SMA bufs avail %u tries, "
296 "after %u seconds\n",
297 dd->ipath_nosma_bufs,
298 dd->ipath_nosma_secs);
299 }
300
301done:
302 mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
303}
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
new file mode 100644
index 000000000000..32acd8048b49
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -0,0 +1,778 @@
1/*
2 * Copyright (c) 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/ctype.h>
34#include <linux/pci.h>
35
36#include "ipath_kernel.h"
37#include "ips_common.h"
38#include "ipath_layer.h"
39
40/**
41 * ipath_parse_ushort - parse an unsigned short value in an arbitrary base
42 * @str: the string containing the number
43 * @valp: where to put the result
44 *
45 * returns the number of bytes consumed, or negative value on error
46 */
47int ipath_parse_ushort(const char *str, unsigned short *valp)
48{
49 unsigned long val;
50 char *end;
51 int ret;
52
53 if (!isdigit(str[0])) {
54 ret = -EINVAL;
55 goto bail;
56 }
57
58 val = simple_strtoul(str, &end, 0);
59
60 if (val > 0xffff) {
61 ret = -EINVAL;
62 goto bail;
63 }
64
65 *valp = val;
66
67 ret = end + 1 - str;
68 if (ret == 0)
69 ret = -EINVAL;
70
71bail:
72 return ret;
73}
74
75static ssize_t show_version(struct device_driver *dev, char *buf)
76{
77 /* The string printed here is already newline-terminated. */
78 return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version);
79}
80
81static ssize_t show_num_units(struct device_driver *dev, char *buf)
82{
83 return scnprintf(buf, PAGE_SIZE, "%d\n",
84 ipath_count_units(NULL, NULL, NULL));
85}
86
87#define DRIVER_STAT(name, attr) \
88 static ssize_t show_stat_##name(struct device_driver *dev, \
89 char *buf) \
90 { \
91 return scnprintf( \
92 buf, PAGE_SIZE, "%llu\n", \
93 (unsigned long long) ipath_stats.sps_ ##attr); \
94 } \
95 static DRIVER_ATTR(name, S_IRUGO, show_stat_##name, NULL)
96
97DRIVER_STAT(intrs, ints);
98DRIVER_STAT(err_intrs, errints);
99DRIVER_STAT(errs, errs);
100DRIVER_STAT(pkt_errs, pkterrs);
101DRIVER_STAT(crc_errs, crcerrs);
102DRIVER_STAT(hw_errs, hwerrs);
103DRIVER_STAT(ib_link, iblink);
104DRIVER_STAT(port0_pkts, port0pkts);
105DRIVER_STAT(ether_spkts, ether_spkts);
106DRIVER_STAT(ether_rpkts, ether_rpkts);
107DRIVER_STAT(sma_spkts, sma_spkts);
108DRIVER_STAT(sma_rpkts, sma_rpkts);
109DRIVER_STAT(hdrq_full, hdrqfull);
110DRIVER_STAT(etid_full, etidfull);
111DRIVER_STAT(no_piobufs, nopiobufs);
112DRIVER_STAT(ports, ports);
113DRIVER_STAT(pkey0, pkeys[0]);
114DRIVER_STAT(pkey1, pkeys[1]);
115DRIVER_STAT(pkey2, pkeys[2]);
116DRIVER_STAT(pkey3, pkeys[3]);
117/* XXX fix the following when dynamic table of devices used */
118DRIVER_STAT(lid0, lid[0]);
119DRIVER_STAT(lid1, lid[1]);
120DRIVER_STAT(lid2, lid[2]);
121DRIVER_STAT(lid3, lid[3]);
122
123DRIVER_STAT(nports, nports);
124DRIVER_STAT(null_intr, nullintr);
125DRIVER_STAT(max_pkts_call, maxpkts_call);
126DRIVER_STAT(avg_pkts_call, avgpkts_call);
127DRIVER_STAT(page_locks, pagelocks);
128DRIVER_STAT(page_unlocks, pageunlocks);
129DRIVER_STAT(krdrops, krdrops);
130/* XXX fix the following when dynamic table of devices used */
131DRIVER_STAT(mlid0, mlid[0]);
132DRIVER_STAT(mlid1, mlid[1]);
133DRIVER_STAT(mlid2, mlid[2]);
134DRIVER_STAT(mlid3, mlid[3]);
135
136static struct attribute *driver_stat_attributes[] = {
137 &driver_attr_intrs.attr,
138 &driver_attr_err_intrs.attr,
139 &driver_attr_errs.attr,
140 &driver_attr_pkt_errs.attr,
141 &driver_attr_crc_errs.attr,
142 &driver_attr_hw_errs.attr,
143 &driver_attr_ib_link.attr,
144 &driver_attr_port0_pkts.attr,
145 &driver_attr_ether_spkts.attr,
146 &driver_attr_ether_rpkts.attr,
147 &driver_attr_sma_spkts.attr,
148 &driver_attr_sma_rpkts.attr,
149 &driver_attr_hdrq_full.attr,
150 &driver_attr_etid_full.attr,
151 &driver_attr_no_piobufs.attr,
152 &driver_attr_ports.attr,
153 &driver_attr_pkey0.attr,
154 &driver_attr_pkey1.attr,
155 &driver_attr_pkey2.attr,
156 &driver_attr_pkey3.attr,
157 &driver_attr_lid0.attr,
158 &driver_attr_lid1.attr,
159 &driver_attr_lid2.attr,
160 &driver_attr_lid3.attr,
161 &driver_attr_nports.attr,
162 &driver_attr_null_intr.attr,
163 &driver_attr_max_pkts_call.attr,
164 &driver_attr_avg_pkts_call.attr,
165 &driver_attr_page_locks.attr,
166 &driver_attr_page_unlocks.attr,
167 &driver_attr_krdrops.attr,
168 &driver_attr_mlid0.attr,
169 &driver_attr_mlid1.attr,
170 &driver_attr_mlid2.attr,
171 &driver_attr_mlid3.attr,
172 NULL
173};
174
175static struct attribute_group driver_stat_attr_group = {
176 .name = "stats",
177 .attrs = driver_stat_attributes
178};
179
180static ssize_t show_status(struct device *dev,
181 struct device_attribute *attr,
182 char *buf)
183{
184 struct ipath_devdata *dd = dev_get_drvdata(dev);
185 ssize_t ret;
186
187 if (!dd->ipath_statusp) {
188 ret = -EINVAL;
189 goto bail;
190 }
191
192 ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
193 (unsigned long long) *(dd->ipath_statusp));
194
195bail:
196 return ret;
197}
198
199static const char *ipath_status_str[] = {
200 "Initted",
201 "Disabled",
202 "Admin_Disabled",
203 "OIB_SMA",
204 "SMA",
205 "Present",
206 "IB_link_up",
207 "IB_configured",
208 "NoIBcable",
209 "Fatal_Hardware_Error",
210 NULL,
211};
212
213static ssize_t show_status_str(struct device *dev,
214 struct device_attribute *attr,
215 char *buf)
216{
217 struct ipath_devdata *dd = dev_get_drvdata(dev);
218 int i, any;
219 u64 s;
220 ssize_t ret;
221
222 if (!dd->ipath_statusp) {
223 ret = -EINVAL;
224 goto bail;
225 }
226
227 s = *(dd->ipath_statusp);
228 *buf = '\0';
229 for (any = i = 0; s && ipath_status_str[i]; i++) {
230 if (s & 1) {
231 if (any && strlcat(buf, " ", PAGE_SIZE) >=
232 PAGE_SIZE)
233 /* overflow */
234 break;
235 if (strlcat(buf, ipath_status_str[i],
236 PAGE_SIZE) >= PAGE_SIZE)
237 break;
238 any = 1;
239 }
240 s >>= 1;
241 }
242 if (any)
243 strlcat(buf, "\n", PAGE_SIZE);
244
245 ret = strlen(buf);
246
247bail:
248 return ret;
249}
250
251static ssize_t show_boardversion(struct device *dev,
252 struct device_attribute *attr,
253 char *buf)
254{
255 struct ipath_devdata *dd = dev_get_drvdata(dev);
256 /* The string printed here is already newline-terminated. */
257 return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
258}
259
260static ssize_t show_lid(struct device *dev,
261 struct device_attribute *attr,
262 char *buf)
263{
264 struct ipath_devdata *dd = dev_get_drvdata(dev);
265
266 return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid);
267}
268
269static ssize_t store_lid(struct device *dev,
270 struct device_attribute *attr,
271 const char *buf,
272 size_t count)
273{
274 struct ipath_devdata *dd = dev_get_drvdata(dev);
275 u16 lid;
276 int ret;
277
278 ret = ipath_parse_ushort(buf, &lid);
279 if (ret < 0)
280 goto invalid;
281
282 if (lid == 0 || lid >= 0xc000) {
283 ret = -EINVAL;
284 goto invalid;
285 }
286
287 ipath_set_sps_lid(dd, lid, 0);
288
289 goto bail;
290invalid:
291 ipath_dev_err(dd, "attempt to set invalid LID\n");
292bail:
293 return ret;
294}
295
296static ssize_t show_mlid(struct device *dev,
297 struct device_attribute *attr,
298 char *buf)
299{
300 struct ipath_devdata *dd = dev_get_drvdata(dev);
301
302 return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid);
303}
304
305static ssize_t store_mlid(struct device *dev,
306 struct device_attribute *attr,
307 const char *buf,
308 size_t count)
309{
310 struct ipath_devdata *dd = dev_get_drvdata(dev);
311 int unit;
312 u16 mlid;
313 int ret;
314
315 ret = ipath_parse_ushort(buf, &mlid);
316 if (ret < 0)
317 goto invalid;
318
319 unit = dd->ipath_unit;
320
321 dd->ipath_mlid = mlid;
322 ipath_stats.sps_mlid[unit] = mlid;
323 ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST);
324
325 goto bail;
326invalid:
327 ipath_dev_err(dd, "attempt to set invalid MLID\n");
328bail:
329 return ret;
330}
331
332static ssize_t show_guid(struct device *dev,
333 struct device_attribute *attr,
334 char *buf)
335{
336 struct ipath_devdata *dd = dev_get_drvdata(dev);
337 u8 *guid;
338
339 guid = (u8 *) & (dd->ipath_guid);
340
341 return scnprintf(buf, PAGE_SIZE,
342 "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
343 guid[0], guid[1], guid[2], guid[3],
344 guid[4], guid[5], guid[6], guid[7]);
345}
346
347static ssize_t store_guid(struct device *dev,
348 struct device_attribute *attr,
349 const char *buf,
350 size_t count)
351{
352 struct ipath_devdata *dd = dev_get_drvdata(dev);
353 ssize_t ret;
354 unsigned short guid[8];
355 __be64 nguid;
356 u8 *ng;
357 int i;
358
359 if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
360 &guid[0], &guid[1], &guid[2], &guid[3],
361 &guid[4], &guid[5], &guid[6], &guid[7]) != 8)
362 goto invalid;
363
364 ng = (u8 *) &nguid;
365
366 for (i = 0; i < 8; i++) {
367 if (guid[i] > 0xff)
368 goto invalid;
369 ng[i] = guid[i];
370 }
371
372 dd->ipath_guid = nguid;
373 dd->ipath_nguid = 1;
374
375 ret = strlen(buf);
376 goto bail;
377
378invalid:
379 ipath_dev_err(dd, "attempt to set invalid GUID\n");
380 ret = -EINVAL;
381
382bail:
383 return ret;
384}
385
386static ssize_t show_nguid(struct device *dev,
387 struct device_attribute *attr,
388 char *buf)
389{
390 struct ipath_devdata *dd = dev_get_drvdata(dev);
391
392 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
393}
394
395static ssize_t show_serial(struct device *dev,
396 struct device_attribute *attr,
397 char *buf)
398{
399 struct ipath_devdata *dd = dev_get_drvdata(dev);
400
401 buf[sizeof dd->ipath_serial] = '\0';
402 memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial);
403 strcat(buf, "\n");
404 return strlen(buf);
405}
406
407static ssize_t show_unit(struct device *dev,
408 struct device_attribute *attr,
409 char *buf)
410{
411 struct ipath_devdata *dd = dev_get_drvdata(dev);
412
413 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
414}
415
416#define DEVICE_COUNTER(name, attr) \
417 static ssize_t show_counter_##name(struct device *dev, \
418 struct device_attribute *attr, \
419 char *buf) \
420 { \
421 struct ipath_devdata *dd = dev_get_drvdata(dev); \
422 return scnprintf(\
423 buf, PAGE_SIZE, "%llu\n", (unsigned long long) \
424 ipath_snap_cntr( \
425 dd, offsetof(struct infinipath_counters, \
426 attr) / sizeof(u64))); \
427 } \
428 static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL);
429
430DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt);
431DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt);
432DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt);
433DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt);
434DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt);
435DEVICE_COUNTER(lb_ints, LBIntCnt);
436DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt);
437DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt);
438DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt);
439DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt);
440DEVICE_COUNTER(rx_dwords, RxDwordCnt);
441DEVICE_COUNTER(rx_ebps, RxEBPCnt);
442DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt);
443DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt);
444DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt);
445DEVICE_COUNTER(rx_len_errs, RxLenErrCnt);
446DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt);
447DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt);
448DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt);
449DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt);
450DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt);
451DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt);
452DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt);
453DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt);
454DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt);
455DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt);
456DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt);
457DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt);
458DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt);
459DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt);
460DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt);
461DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt);
462DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt);
463DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt);
464DEVICE_COUNTER(tx_dwords, TxDwordCnt);
465DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt);
466DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt);
467DEVICE_COUNTER(tx_len_errs, TxLenErrCnt);
468DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt);
469DEVICE_COUNTER(tx_underruns, TxUnderrunCnt);
470DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt);
471
472static struct attribute *dev_counter_attributes[] = {
473 &dev_attr_ib_link_downeds.attr,
474 &dev_attr_ib_link_err_recoveries.attr,
475 &dev_attr_ib_status_changes.attr,
476 &dev_attr_ib_symbol_errs.attr,
477 &dev_attr_lb_flow_stalls.attr,
478 &dev_attr_lb_ints.attr,
479 &dev_attr_rx_bad_formats.attr,
480 &dev_attr_rx_buf_ovfls.attr,
481 &dev_attr_rx_data_pkts.attr,
482 &dev_attr_rx_dropped_pkts.attr,
483 &dev_attr_rx_dwords.attr,
484 &dev_attr_rx_ebps.attr,
485 &dev_attr_rx_flow_ctrl_errs.attr,
486 &dev_attr_rx_flow_pkts.attr,
487 &dev_attr_rx_icrc_errs.attr,
488 &dev_attr_rx_len_errs.attr,
489 &dev_attr_rx_link_problems.attr,
490 &dev_attr_rx_lpcrc_errs.attr,
491 &dev_attr_rx_max_min_len_errs.attr,
492 &dev_attr_rx_p0_hdr_egr_ovfls.attr,
493 &dev_attr_rx_p1_hdr_egr_ovfls.attr,
494 &dev_attr_rx_p2_hdr_egr_ovfls.attr,
495 &dev_attr_rx_p3_hdr_egr_ovfls.attr,
496 &dev_attr_rx_p4_hdr_egr_ovfls.attr,
497 &dev_attr_rx_p5_hdr_egr_ovfls.attr,
498 &dev_attr_rx_p6_hdr_egr_ovfls.attr,
499 &dev_attr_rx_p7_hdr_egr_ovfls.attr,
500 &dev_attr_rx_p8_hdr_egr_ovfls.attr,
501 &dev_attr_rx_pkey_mismatches.attr,
502 &dev_attr_rx_tid_full_errs.attr,
503 &dev_attr_rx_tid_valid_errs.attr,
504 &dev_attr_rx_vcrc_errs.attr,
505 &dev_attr_tx_data_pkts.attr,
506 &dev_attr_tx_dropped_pkts.attr,
507 &dev_attr_tx_dwords.attr,
508 &dev_attr_tx_flow_pkts.attr,
509 &dev_attr_tx_flow_stalls.attr,
510 &dev_attr_tx_len_errs.attr,
511 &dev_attr_tx_max_min_len_errs.attr,
512 &dev_attr_tx_underruns.attr,
513 &dev_attr_tx_unsup_vl_errs.attr,
514 NULL
515};
516
517static struct attribute_group dev_counter_attr_group = {
518 .name = "counters",
519 .attrs = dev_counter_attributes
520};
521
522static ssize_t store_reset(struct device *dev,
523 struct device_attribute *attr,
524 const char *buf,
525 size_t count)
526{
527 struct ipath_devdata *dd = dev_get_drvdata(dev);
528 int ret;
529
530 if (count < 5 || memcmp(buf, "reset", 5)) {
531 ret = -EINVAL;
532 goto bail;
533 }
534
535 if (dd->ipath_flags & IPATH_DISABLED) {
536 /*
537 * post-reset init would re-enable interrupts, etc.
538 * so don't allow reset on disabled devices. Not
539 * perfect error, but about the best choice.
540 */
541 dev_info(dev,"Unit %d is disabled, can't reset\n",
542 dd->ipath_unit);
543 ret = -EINVAL;
544 }
545 ret = ipath_reset_device(dd->ipath_unit);
546bail:
547 return ret<0 ? ret : count;
548}
549
550static ssize_t store_link_state(struct device *dev,
551 struct device_attribute *attr,
552 const char *buf,
553 size_t count)
554{
555 struct ipath_devdata *dd = dev_get_drvdata(dev);
556 int ret, r;
557 u16 state;
558
559 ret = ipath_parse_ushort(buf, &state);
560 if (ret < 0)
561 goto invalid;
562
563 r = ipath_layer_set_linkstate(dd, state);
564 if (r < 0) {
565 ret = r;
566 goto bail;
567 }
568
569 goto bail;
570invalid:
571 ipath_dev_err(dd, "attempt to set invalid link state\n");
572bail:
573 return ret;
574}
575
576static ssize_t show_mtu(struct device *dev,
577 struct device_attribute *attr,
578 char *buf)
579{
580 struct ipath_devdata *dd = dev_get_drvdata(dev);
581 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu);
582}
583
584static ssize_t store_mtu(struct device *dev,
585 struct device_attribute *attr,
586 const char *buf,
587 size_t count)
588{
589 struct ipath_devdata *dd = dev_get_drvdata(dev);
590 ssize_t ret;
591 u16 mtu = 0;
592 int r;
593
594 ret = ipath_parse_ushort(buf, &mtu);
595 if (ret < 0)
596 goto invalid;
597
598 r = ipath_layer_set_mtu(dd, mtu);
599 if (r < 0)
600 ret = r;
601
602 goto bail;
603invalid:
604 ipath_dev_err(dd, "attempt to set invalid MTU\n");
605bail:
606 return ret;
607}
608
609static ssize_t show_enabled(struct device *dev,
610 struct device_attribute *attr,
611 char *buf)
612{
613 struct ipath_devdata *dd = dev_get_drvdata(dev);
614 return scnprintf(buf, PAGE_SIZE, "%u\n",
615 (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1);
616}
617
618static ssize_t store_enabled(struct device *dev,
619 struct device_attribute *attr,
620 const char *buf,
621 size_t count)
622{
623 struct ipath_devdata *dd = dev_get_drvdata(dev);
624 ssize_t ret;
625 u16 enable = 0;
626
627 ret = ipath_parse_ushort(buf, &enable);
628 if (ret < 0) {
629 ipath_dev_err(dd, "attempt to use non-numeric on enable\n");
630 goto bail;
631 }
632
633 if (enable) {
634 if (!(dd->ipath_flags & IPATH_DISABLED))
635 goto bail;
636
637 dev_info(dev, "Enabling unit %d\n", dd->ipath_unit);
638 /* same as post-reset */
639 ret = ipath_init_chip(dd, 1);
640 if (ret)
641 ipath_dev_err(dd, "Failed to enable unit %d\n",
642 dd->ipath_unit);
643 else {
644 dd->ipath_flags &= ~IPATH_DISABLED;
645 *dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED;
646 }
647 }
648 else if (!(dd->ipath_flags & IPATH_DISABLED)) {
649 dev_info(dev, "Disabling unit %d\n", dd->ipath_unit);
650 ipath_shutdown_device(dd);
651 dd->ipath_flags |= IPATH_DISABLED;
652 *dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED;
653 }
654
655bail:
656 return ret;
657}
658
659static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
660static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
661
662static struct attribute *driver_attributes[] = {
663 &driver_attr_num_units.attr,
664 &driver_attr_version.attr,
665 NULL
666};
667
668static struct attribute_group driver_attr_group = {
669 .attrs = driver_attributes
670};
671
672static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
673static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
674static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
675static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
676static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
677static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
678static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
679static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
680static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
681static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
682static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
683static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
684static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
685
686static struct attribute *dev_attributes[] = {
687 &dev_attr_guid.attr,
688 &dev_attr_lid.attr,
689 &dev_attr_link_state.attr,
690 &dev_attr_mlid.attr,
691 &dev_attr_mtu.attr,
692 &dev_attr_nguid.attr,
693 &dev_attr_serial.attr,
694 &dev_attr_status.attr,
695 &dev_attr_status_str.attr,
696 &dev_attr_boardversion.attr,
697 &dev_attr_unit.attr,
698 &dev_attr_enabled.attr,
699 NULL
700};
701
702static struct attribute_group dev_attr_group = {
703 .attrs = dev_attributes
704};
705
706/**
707 * ipath_expose_reset - create a device reset file
708 * @dev: the device structure
709 *
710 * Only expose a file that lets us reset the device after someone
711 * enters diag mode. A device reset is quite likely to crash the
712 * machine entirely, so we don't want to normally make it
713 * available.
714 */
715int ipath_expose_reset(struct device *dev)
716{
717 return device_create_file(dev, &dev_attr_reset);
718}
719
720int ipath_driver_create_group(struct device_driver *drv)
721{
722 int ret;
723
724 ret = sysfs_create_group(&drv->kobj, &driver_attr_group);
725 if (ret)
726 goto bail;
727
728 ret = sysfs_create_group(&drv->kobj, &driver_stat_attr_group);
729 if (ret)
730 sysfs_remove_group(&drv->kobj, &driver_attr_group);
731
732bail:
733 return ret;
734}
735
736void ipath_driver_remove_group(struct device_driver *drv)
737{
738 sysfs_remove_group(&drv->kobj, &driver_stat_attr_group);
739 sysfs_remove_group(&drv->kobj, &driver_attr_group);
740}
741
742int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
743{
744 int ret;
745 char unit[5];
746
747 ret = sysfs_create_group(&dev->kobj, &dev_attr_group);
748 if (ret)
749 goto bail;
750
751 ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group);
752 if (ret)
753 goto bail_attrs;
754
755 snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit);
756 ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, unit);
757 if (ret == 0)
758 goto bail;
759
760 sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
761bail_attrs:
762 sysfs_remove_group(&dev->kobj, &dev_attr_group);
763bail:
764 return ret;
765}
766
767void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
768{
769 char unit[5];
770
771 snprintf(unit, sizeof(unit), "%02d", dd->ipath_unit);
772 sysfs_remove_link(&dev->driver->kobj, unit);
773
774 sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
775 sysfs_remove_group(&dev->kobj, &dev_attr_group);
776
777 device_remove_file(dev, &dev_attr_reset);
778}
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
new file mode 100644
index 000000000000..0d6dbc0a541e
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -0,0 +1,645 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "ipath_verbs.h"
34#include "ips_common.h"
35
36/* cut down ridiculously long IB macro names */
37#define OP(x) IB_OPCODE_UC_##x
38
39static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
40 struct ib_wc *wc)
41{
42 if (++qp->s_last == qp->s_size)
43 qp->s_last = 0;
44 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
45 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
46 wc->wr_id = wqe->wr.wr_id;
47 wc->status = IB_WC_SUCCESS;
48 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
49 wc->vendor_err = 0;
50 wc->byte_len = wqe->length;
51 wc->qp_num = qp->ibqp.qp_num;
52 wc->src_qp = qp->remote_qpn;
53 wc->pkey_index = 0;
54 wc->slid = qp->remote_ah_attr.dlid;
55 wc->sl = qp->remote_ah_attr.sl;
56 wc->dlid_path_bits = 0;
57 wc->port_num = 0;
58 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
59 }
60 wqe = get_swqe_ptr(qp, qp->s_last);
61}
62
63/**
64 * ipath_do_uc_send - do a send on a UC queue
65 * @data: contains a pointer to the QP to send on
66 *
67 * Process entries in the send work queue until the queue is exhausted.
68 * Only allow one CPU to send a packet per QP (tasklet).
69 * Otherwise, after we drop the QP lock, two threads could send
70 * packets out of order.
71 * This is similar to ipath_do_rc_send() below except we don't have
72 * timeouts or resends.
73 */
74void ipath_do_uc_send(unsigned long data)
75{
76 struct ipath_qp *qp = (struct ipath_qp *)data;
77 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
78 struct ipath_swqe *wqe;
79 unsigned long flags;
80 u16 lrh0;
81 u32 hwords;
82 u32 nwords;
83 u32 extra_bytes;
84 u32 bth0;
85 u32 bth2;
86 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
87 u32 len;
88 struct ipath_other_headers *ohdr;
89 struct ib_wc wc;
90
91 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
92 goto bail;
93
94 if (unlikely(qp->remote_ah_attr.dlid ==
95 ipath_layer_get_lid(dev->dd))) {
96 /* Pass in an uninitialized ib_wc to save stack space. */
97 ipath_ruc_loopback(qp, &wc);
98 clear_bit(IPATH_S_BUSY, &qp->s_flags);
99 goto bail;
100 }
101
102 ohdr = &qp->s_hdr.u.oth;
103 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
104 ohdr = &qp->s_hdr.u.l.oth;
105
106again:
107 /* Check for a constructed packet to be sent. */
108 if (qp->s_hdrwords != 0) {
109 /*
110 * If no PIO bufs are available, return.
111 * An interrupt will call ipath_ib_piobufavail()
112 * when one is available.
113 */
114 if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
115 (u32 *) &qp->s_hdr,
116 qp->s_cur_size,
117 qp->s_cur_sge)) {
118 ipath_no_bufs_available(qp, dev);
119 goto bail;
120 }
121 dev->n_unicast_xmit++;
122 /* Record that we sent the packet and s_hdr is empty. */
123 qp->s_hdrwords = 0;
124 }
125
126 lrh0 = IPS_LRH_BTH;
127 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
128 hwords = 5;
129
130 /*
131 * The lock is needed to synchronize between
132 * setting qp->s_ack_state and post_send().
133 */
134 spin_lock_irqsave(&qp->s_lock, flags);
135
136 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
137 goto done;
138
139 bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
140
141 /* Send a request. */
142 wqe = get_swqe_ptr(qp, qp->s_last);
143 switch (qp->s_state) {
144 default:
145 /*
146 * Signal the completion of the last send (if there is
147 * one).
148 */
149 if (qp->s_last != qp->s_tail)
150 complete_last_send(qp, wqe, &wc);
151
152 /* Check if send work queue is empty. */
153 if (qp->s_tail == qp->s_head)
154 goto done;
155 /*
156 * Start a new request.
157 */
158 qp->s_psn = wqe->psn = qp->s_next_psn;
159 qp->s_sge.sge = wqe->sg_list[0];
160 qp->s_sge.sg_list = wqe->sg_list + 1;
161 qp->s_sge.num_sge = wqe->wr.num_sge;
162 qp->s_len = len = wqe->length;
163 switch (wqe->wr.opcode) {
164 case IB_WR_SEND:
165 case IB_WR_SEND_WITH_IMM:
166 if (len > pmtu) {
167 qp->s_state = OP(SEND_FIRST);
168 len = pmtu;
169 break;
170 }
171 if (wqe->wr.opcode == IB_WR_SEND)
172 qp->s_state = OP(SEND_ONLY);
173 else {
174 qp->s_state =
175 OP(SEND_ONLY_WITH_IMMEDIATE);
176 /* Immediate data comes after the BTH */
177 ohdr->u.imm_data = wqe->wr.imm_data;
178 hwords += 1;
179 }
180 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
181 bth0 |= 1 << 23;
182 break;
183
184 case IB_WR_RDMA_WRITE:
185 case IB_WR_RDMA_WRITE_WITH_IMM:
186 ohdr->u.rc.reth.vaddr =
187 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
188 ohdr->u.rc.reth.rkey =
189 cpu_to_be32(wqe->wr.wr.rdma.rkey);
190 ohdr->u.rc.reth.length = cpu_to_be32(len);
191 hwords += sizeof(struct ib_reth) / 4;
192 if (len > pmtu) {
193 qp->s_state = OP(RDMA_WRITE_FIRST);
194 len = pmtu;
195 break;
196 }
197 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
198 qp->s_state = OP(RDMA_WRITE_ONLY);
199 else {
200 qp->s_state =
201 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
202 /* Immediate data comes after the RETH */
203 ohdr->u.rc.imm_data = wqe->wr.imm_data;
204 hwords += 1;
205 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
206 bth0 |= 1 << 23;
207 }
208 break;
209
210 default:
211 goto done;
212 }
213 if (++qp->s_tail >= qp->s_size)
214 qp->s_tail = 0;
215 break;
216
217 case OP(SEND_FIRST):
218 qp->s_state = OP(SEND_MIDDLE);
219 /* FALLTHROUGH */
220 case OP(SEND_MIDDLE):
221 len = qp->s_len;
222 if (len > pmtu) {
223 len = pmtu;
224 break;
225 }
226 if (wqe->wr.opcode == IB_WR_SEND)
227 qp->s_state = OP(SEND_LAST);
228 else {
229 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
230 /* Immediate data comes after the BTH */
231 ohdr->u.imm_data = wqe->wr.imm_data;
232 hwords += 1;
233 }
234 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
235 bth0 |= 1 << 23;
236 break;
237
238 case OP(RDMA_WRITE_FIRST):
239 qp->s_state = OP(RDMA_WRITE_MIDDLE);
240 /* FALLTHROUGH */
241 case OP(RDMA_WRITE_MIDDLE):
242 len = qp->s_len;
243 if (len > pmtu) {
244 len = pmtu;
245 break;
246 }
247 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
248 qp->s_state = OP(RDMA_WRITE_LAST);
249 else {
250 qp->s_state =
251 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
252 /* Immediate data comes after the BTH */
253 ohdr->u.imm_data = wqe->wr.imm_data;
254 hwords += 1;
255 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
256 bth0 |= 1 << 23;
257 }
258 break;
259 }
260 bth2 = qp->s_next_psn++ & IPS_PSN_MASK;
261 qp->s_len -= len;
262 bth0 |= qp->s_state << 24;
263
264 spin_unlock_irqrestore(&qp->s_lock, flags);
265
266 /* Construct the header. */
267 extra_bytes = (4 - len) & 3;
268 nwords = (len + extra_bytes) >> 2;
269 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
270 /* Header size in 32-bit words. */
271 hwords += 10;
272 lrh0 = IPS_LRH_GRH;
273 qp->s_hdr.u.l.grh.version_tclass_flow =
274 cpu_to_be32((6 << 28) |
275 (qp->remote_ah_attr.grh.traffic_class
276 << 20) |
277 qp->remote_ah_attr.grh.flow_label);
278 qp->s_hdr.u.l.grh.paylen =
279 cpu_to_be16(((hwords - 12) + nwords +
280 SIZE_OF_CRC) << 2);
281 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
282 qp->s_hdr.u.l.grh.next_hdr = 0x1B;
283 qp->s_hdr.u.l.grh.hop_limit =
284 qp->remote_ah_attr.grh.hop_limit;
285 /* The SGID is 32-bit aligned. */
286 qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
287 dev->gid_prefix;
288 qp->s_hdr.u.l.grh.sgid.global.interface_id =
289 ipath_layer_get_guid(dev->dd);
290 qp->s_hdr.u.l.grh.dgid = qp->remote_ah_attr.grh.dgid;
291 }
292 qp->s_hdrwords = hwords;
293 qp->s_cur_sge = &qp->s_sge;
294 qp->s_cur_size = len;
295 lrh0 |= qp->remote_ah_attr.sl << 4;
296 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
297 /* DEST LID */
298 qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
299 qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
300 qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
301 bth0 |= extra_bytes << 20;
302 ohdr->bth[0] = cpu_to_be32(bth0);
303 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
304 ohdr->bth[2] = cpu_to_be32(bth2);
305
306 /* Check for more work to do. */
307 goto again;
308
309done:
310 spin_unlock_irqrestore(&qp->s_lock, flags);
311 clear_bit(IPATH_S_BUSY, &qp->s_flags);
312
313bail:
314 return;
315}
316
317/**
318 * ipath_uc_rcv - handle an incoming UC packet
319 * @dev: the device the packet came in on
320 * @hdr: the header of the packet
321 * @has_grh: true if the packet has a GRH
322 * @data: the packet data
323 * @tlen: the length of the packet
324 * @qp: the QP for this packet.
325 *
326 * This is called from ipath_qp_rcv() to process an incoming UC packet
327 * for the given QP.
328 * Called at interrupt level.
329 */
330void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
331 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
332{
333 struct ipath_other_headers *ohdr;
334 int opcode;
335 u32 hdrsize;
336 u32 psn;
337 u32 pad;
338 unsigned long flags;
339 struct ib_wc wc;
340 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
341 struct ib_reth *reth;
342 int header_in_data;
343
344 /* Check for GRH */
345 if (!has_grh) {
346 ohdr = &hdr->u.oth;
347 hdrsize = 8 + 12; /* LRH + BTH */
348 psn = be32_to_cpu(ohdr->bth[2]);
349 header_in_data = 0;
350 } else {
351 ohdr = &hdr->u.l.oth;
352 hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
353 /*
354 * The header with GRH is 60 bytes and the
355 * core driver sets the eager header buffer
356 * size to 56 bytes so the last 4 bytes of
357 * the BTH header (PSN) is in the data buffer.
358 */
359 header_in_data =
360 ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
361 if (header_in_data) {
362 psn = be32_to_cpu(((__be32 *) data)[0]);
363 data += sizeof(__be32);
364 } else
365 psn = be32_to_cpu(ohdr->bth[2]);
366 }
367 /*
368 * The opcode is in the low byte when its in network order
369 * (top byte when in host order).
370 */
371 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
372
373 wc.imm_data = 0;
374 wc.wc_flags = 0;
375
376 spin_lock_irqsave(&qp->r_rq.lock, flags);
377
378 /* Compare the PSN verses the expected PSN. */
379 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
380 /*
381 * Handle a sequence error.
382 * Silently drop any current message.
383 */
384 qp->r_psn = psn;
385 inv:
386 qp->r_state = OP(SEND_LAST);
387 switch (opcode) {
388 case OP(SEND_FIRST):
389 case OP(SEND_ONLY):
390 case OP(SEND_ONLY_WITH_IMMEDIATE):
391 goto send_first;
392
393 case OP(RDMA_WRITE_FIRST):
394 case OP(RDMA_WRITE_ONLY):
395 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
396 goto rdma_first;
397
398 default:
399 dev->n_pkt_drops++;
400 goto done;
401 }
402 }
403
404 /* Check for opcode sequence errors. */
405 switch (qp->r_state) {
406 case OP(SEND_FIRST):
407 case OP(SEND_MIDDLE):
408 if (opcode == OP(SEND_MIDDLE) ||
409 opcode == OP(SEND_LAST) ||
410 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
411 break;
412 goto inv;
413
414 case OP(RDMA_WRITE_FIRST):
415 case OP(RDMA_WRITE_MIDDLE):
416 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
417 opcode == OP(RDMA_WRITE_LAST) ||
418 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
419 break;
420 goto inv;
421
422 default:
423 if (opcode == OP(SEND_FIRST) ||
424 opcode == OP(SEND_ONLY) ||
425 opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
426 opcode == OP(RDMA_WRITE_FIRST) ||
427 opcode == OP(RDMA_WRITE_ONLY) ||
428 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
429 break;
430 goto inv;
431 }
432
433 /* OK, process the packet. */
434 switch (opcode) {
435 case OP(SEND_FIRST):
436 case OP(SEND_ONLY):
437 case OP(SEND_ONLY_WITH_IMMEDIATE):
438 send_first:
439 if (qp->r_reuse_sge) {
440 qp->r_reuse_sge = 0;
441 qp->r_sge = qp->s_rdma_sge;
442 } else if (!ipath_get_rwqe(qp, 0)) {
443 dev->n_pkt_drops++;
444 goto done;
445 }
446 /* Save the WQE so we can reuse it in case of an error. */
447 qp->s_rdma_sge = qp->r_sge;
448 qp->r_rcv_len = 0;
449 if (opcode == OP(SEND_ONLY))
450 goto send_last;
451 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
452 goto send_last_imm;
453 /* FALLTHROUGH */
454 case OP(SEND_MIDDLE):
455 /* Check for invalid length PMTU or posted rwqe len. */
456 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
457 qp->r_reuse_sge = 1;
458 dev->n_pkt_drops++;
459 goto done;
460 }
461 qp->r_rcv_len += pmtu;
462 if (unlikely(qp->r_rcv_len > qp->r_len)) {
463 qp->r_reuse_sge = 1;
464 dev->n_pkt_drops++;
465 goto done;
466 }
467 ipath_copy_sge(&qp->r_sge, data, pmtu);
468 break;
469
470 case OP(SEND_LAST_WITH_IMMEDIATE):
471 send_last_imm:
472 if (header_in_data) {
473 wc.imm_data = *(__be32 *) data;
474 data += sizeof(__be32);
475 } else {
476 /* Immediate data comes after BTH */
477 wc.imm_data = ohdr->u.imm_data;
478 }
479 hdrsize += 4;
480 wc.wc_flags = IB_WC_WITH_IMM;
481 /* FALLTHROUGH */
482 case OP(SEND_LAST):
483 send_last:
484 /* Get the number of bytes the message was padded by. */
485 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
486 /* Check for invalid length. */
487 /* XXX LAST len should be >= 1 */
488 if (unlikely(tlen < (hdrsize + pad + 4))) {
489 qp->r_reuse_sge = 1;
490 dev->n_pkt_drops++;
491 goto done;
492 }
493 /* Don't count the CRC. */
494 tlen -= (hdrsize + pad + 4);
495 wc.byte_len = tlen + qp->r_rcv_len;
496 if (unlikely(wc.byte_len > qp->r_len)) {
497 qp->r_reuse_sge = 1;
498 dev->n_pkt_drops++;
499 goto done;
500 }
501 /* XXX Need to free SGEs */
502 last_imm:
503 ipath_copy_sge(&qp->r_sge, data, tlen);
504 wc.wr_id = qp->r_wr_id;
505 wc.status = IB_WC_SUCCESS;
506 wc.opcode = IB_WC_RECV;
507 wc.vendor_err = 0;
508 wc.qp_num = qp->ibqp.qp_num;
509 wc.src_qp = qp->remote_qpn;
510 wc.pkey_index = 0;
511 wc.slid = qp->remote_ah_attr.dlid;
512 wc.sl = qp->remote_ah_attr.sl;
513 wc.dlid_path_bits = 0;
514 wc.port_num = 0;
515 /* Signal completion event if the solicited bit is set. */
516 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
517 (ohdr->bth[0] &
518 __constant_cpu_to_be32(1 << 23)) != 0);
519 break;
520
521 case OP(RDMA_WRITE_FIRST):
522 case OP(RDMA_WRITE_ONLY):
523 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
524 rdma_first:
525 /* RETH comes after BTH */
526 if (!header_in_data)
527 reth = &ohdr->u.rc.reth;
528 else {
529 reth = (struct ib_reth *)data;
530 data += sizeof(*reth);
531 }
532 hdrsize += sizeof(*reth);
533 qp->r_len = be32_to_cpu(reth->length);
534 qp->r_rcv_len = 0;
535 if (qp->r_len != 0) {
536 u32 rkey = be32_to_cpu(reth->rkey);
537 u64 vaddr = be64_to_cpu(reth->vaddr);
538
539 /* Check rkey */
540 if (unlikely(!ipath_rkey_ok(
541 dev, &qp->r_sge, qp->r_len,
542 vaddr, rkey,
543 IB_ACCESS_REMOTE_WRITE))) {
544 dev->n_pkt_drops++;
545 goto done;
546 }
547 } else {
548 qp->r_sge.sg_list = NULL;
549 qp->r_sge.sge.mr = NULL;
550 qp->r_sge.sge.vaddr = NULL;
551 qp->r_sge.sge.length = 0;
552 qp->r_sge.sge.sge_length = 0;
553 }
554 if (unlikely(!(qp->qp_access_flags &
555 IB_ACCESS_REMOTE_WRITE))) {
556 dev->n_pkt_drops++;
557 goto done;
558 }
559 if (opcode == OP(RDMA_WRITE_ONLY))
560 goto rdma_last;
561 else if (opcode ==
562 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
563 goto rdma_last_imm;
564 /* FALLTHROUGH */
565 case OP(RDMA_WRITE_MIDDLE):
566 /* Check for invalid length PMTU or posted rwqe len. */
567 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
568 dev->n_pkt_drops++;
569 goto done;
570 }
571 qp->r_rcv_len += pmtu;
572 if (unlikely(qp->r_rcv_len > qp->r_len)) {
573 dev->n_pkt_drops++;
574 goto done;
575 }
576 ipath_copy_sge(&qp->r_sge, data, pmtu);
577 break;
578
579 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
580 rdma_last_imm:
581 /* Get the number of bytes the message was padded by. */
582 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
583 /* Check for invalid length. */
584 /* XXX LAST len should be >= 1 */
585 if (unlikely(tlen < (hdrsize + pad + 4))) {
586 dev->n_pkt_drops++;
587 goto done;
588 }
589 /* Don't count the CRC. */
590 tlen -= (hdrsize + pad + 4);
591 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
592 dev->n_pkt_drops++;
593 goto done;
594 }
595 if (qp->r_reuse_sge) {
596 qp->r_reuse_sge = 0;
597 } else if (!ipath_get_rwqe(qp, 1)) {
598 dev->n_pkt_drops++;
599 goto done;
600 }
601 if (header_in_data) {
602 wc.imm_data = *(__be32 *) data;
603 data += sizeof(__be32);
604 } else {
605 /* Immediate data comes after BTH */
606 wc.imm_data = ohdr->u.imm_data;
607 }
608 hdrsize += 4;
609 wc.wc_flags = IB_WC_WITH_IMM;
610 wc.byte_len = 0;
611 goto last_imm;
612
613 case OP(RDMA_WRITE_LAST):
614 rdma_last:
615 /* Get the number of bytes the message was padded by. */
616 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
617 /* Check for invalid length. */
618 /* XXX LAST len should be >= 1 */
619 if (unlikely(tlen < (hdrsize + pad + 4))) {
620 dev->n_pkt_drops++;
621 goto done;
622 }
623 /* Don't count the CRC. */
624 tlen -= (hdrsize + pad + 4);
625 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
626 dev->n_pkt_drops++;
627 goto done;
628 }
629 ipath_copy_sge(&qp->r_sge, data, tlen);
630 break;
631
632 default:
633 /* Drop packet for unknown opcodes. */
634 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
635 dev->n_pkt_drops++;
636 goto bail;
637 }
638 qp->r_psn++;
639 qp->r_state = opcode;
640done:
641 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
642
643bail:
644 return;
645}
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
new file mode 100644
index 000000000000..01cfb30ee160
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -0,0 +1,621 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_smi.h>
34
35#include "ipath_verbs.h"
36#include "ips_common.h"
37
38/**
39 * ipath_ud_loopback - handle send on loopback QPs
40 * @sqp: the QP
41 * @ss: the SGE state
42 * @length: the length of the data to send
43 * @wr: the work request
44 * @wc: the work completion entry
45 *
46 * This is called from ipath_post_ud_send() to forward a WQE addressed
47 * to the same HCA.
48 */
49static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_sge_state *ss,
50 u32 length, struct ib_send_wr *wr, struct ib_wc *wc)
51{
52 struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
53 struct ipath_qp *qp;
54 struct ib_ah_attr *ah_attr;
55 unsigned long flags;
56 struct ipath_rq *rq;
57 struct ipath_srq *srq;
58 struct ipath_sge_state rsge;
59 struct ipath_sge *sge;
60 struct ipath_rwqe *wqe;
61
62 qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
63 if (!qp)
64 return;
65
66 /*
67 * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
68 * Qkeys with the high order bit set mean use the
69 * qkey from the QP context instead of the WR (see 10.2.5).
70 */
71 if (unlikely(qp->ibqp.qp_num &&
72 ((int) wr->wr.ud.remote_qkey < 0
73 ? qp->qkey : wr->wr.ud.remote_qkey) != qp->qkey)) {
74 /* XXX OK to lose a count once in a while. */
75 dev->qkey_violations++;
76 dev->n_pkt_drops++;
77 goto done;
78 }
79
80 /*
81 * A GRH is expected to preceed the data even if not
82 * present on the wire.
83 */
84 wc->byte_len = length + sizeof(struct ib_grh);
85
86 if (wr->opcode == IB_WR_SEND_WITH_IMM) {
87 wc->wc_flags = IB_WC_WITH_IMM;
88 wc->imm_data = wr->imm_data;
89 } else {
90 wc->wc_flags = 0;
91 wc->imm_data = 0;
92 }
93
94 /*
95 * Get the next work request entry to find where to put the data.
96 * Note that it is safe to drop the lock after changing rq->tail
97 * since ipath_post_receive() won't fill the empty slot.
98 */
99 if (qp->ibqp.srq) {
100 srq = to_isrq(qp->ibqp.srq);
101 rq = &srq->rq;
102 } else {
103 srq = NULL;
104 rq = &qp->r_rq;
105 }
106 spin_lock_irqsave(&rq->lock, flags);
107 if (rq->tail == rq->head) {
108 spin_unlock_irqrestore(&rq->lock, flags);
109 dev->n_pkt_drops++;
110 goto done;
111 }
112 /* Silently drop packets which are too big. */
113 wqe = get_rwqe_ptr(rq, rq->tail);
114 if (wc->byte_len > wqe->length) {
115 spin_unlock_irqrestore(&rq->lock, flags);
116 dev->n_pkt_drops++;
117 goto done;
118 }
119 wc->wr_id = wqe->wr_id;
120 rsge.sge = wqe->sg_list[0];
121 rsge.sg_list = wqe->sg_list + 1;
122 rsge.num_sge = wqe->num_sge;
123 if (++rq->tail >= rq->size)
124 rq->tail = 0;
125 if (srq && srq->ibsrq.event_handler) {
126 u32 n;
127
128 if (rq->head < rq->tail)
129 n = rq->size + rq->head - rq->tail;
130 else
131 n = rq->head - rq->tail;
132 if (n < srq->limit) {
133 struct ib_event ev;
134
135 srq->limit = 0;
136 spin_unlock_irqrestore(&rq->lock, flags);
137 ev.device = qp->ibqp.device;
138 ev.element.srq = qp->ibqp.srq;
139 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
140 srq->ibsrq.event_handler(&ev,
141 srq->ibsrq.srq_context);
142 } else
143 spin_unlock_irqrestore(&rq->lock, flags);
144 } else
145 spin_unlock_irqrestore(&rq->lock, flags);
146 ah_attr = &to_iah(wr->wr.ud.ah)->attr;
147 if (ah_attr->ah_flags & IB_AH_GRH) {
148 ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
149 wc->wc_flags |= IB_WC_GRH;
150 } else
151 ipath_skip_sge(&rsge, sizeof(struct ib_grh));
152 sge = &ss->sge;
153 while (length) {
154 u32 len = sge->length;
155
156 if (len > length)
157 len = length;
158 BUG_ON(len == 0);
159 ipath_copy_sge(&rsge, sge->vaddr, len);
160 sge->vaddr += len;
161 sge->length -= len;
162 sge->sge_length -= len;
163 if (sge->sge_length == 0) {
164 if (--ss->num_sge)
165 *sge = *ss->sg_list++;
166 } else if (sge->length == 0 && sge->mr != NULL) {
167 if (++sge->n >= IPATH_SEGSZ) {
168 if (++sge->m >= sge->mr->mapsz)
169 break;
170 sge->n = 0;
171 }
172 sge->vaddr =
173 sge->mr->map[sge->m]->segs[sge->n].vaddr;
174 sge->length =
175 sge->mr->map[sge->m]->segs[sge->n].length;
176 }
177 length -= len;
178 }
179 wc->status = IB_WC_SUCCESS;
180 wc->opcode = IB_WC_RECV;
181 wc->vendor_err = 0;
182 wc->qp_num = qp->ibqp.qp_num;
183 wc->src_qp = sqp->ibqp.qp_num;
184 /* XXX do we know which pkey matched? Only needed for GSI. */
185 wc->pkey_index = 0;
186 wc->slid = ipath_layer_get_lid(dev->dd) |
187 (ah_attr->src_path_bits &
188 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
189 wc->sl = ah_attr->sl;
190 wc->dlid_path_bits =
191 ah_attr->dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
192 /* Signal completion event if the solicited bit is set. */
193 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
194 wr->send_flags & IB_SEND_SOLICITED);
195
196done:
197 if (atomic_dec_and_test(&qp->refcount))
198 wake_up(&qp->wait);
199}
200
201/**
202 * ipath_post_ud_send - post a UD send on QP
203 * @qp: the QP
204 * @wr: the work request
205 *
206 * Note that we actually send the data as it is posted instead of putting
207 * the request into a ring buffer. If we wanted to use a ring buffer,
208 * we would need to save a reference to the destination address in the SWQE.
209 */
210int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
211{
212 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
213 struct ipath_other_headers *ohdr;
214 struct ib_ah_attr *ah_attr;
215 struct ipath_sge_state ss;
216 struct ipath_sge *sg_list;
217 struct ib_wc wc;
218 u32 hwords;
219 u32 nwords;
220 u32 len;
221 u32 extra_bytes;
222 u32 bth0;
223 u16 lrh0;
224 u16 lid;
225 int i;
226 int ret;
227
228 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
229 ret = 0;
230 goto bail;
231 }
232
233 /* IB spec says that num_sge == 0 is OK. */
234 if (wr->num_sge > qp->s_max_sge) {
235 ret = -EINVAL;
236 goto bail;
237 }
238
239 if (wr->num_sge > 1) {
240 sg_list = kmalloc((qp->s_max_sge - 1) * sizeof(*sg_list),
241 GFP_ATOMIC);
242 if (!sg_list) {
243 ret = -ENOMEM;
244 goto bail;
245 }
246 } else
247 sg_list = NULL;
248
249 /* Check the buffer to send. */
250 ss.sg_list = sg_list;
251 ss.sge.mr = NULL;
252 ss.sge.vaddr = NULL;
253 ss.sge.length = 0;
254 ss.sge.sge_length = 0;
255 ss.num_sge = 0;
256 len = 0;
257 for (i = 0; i < wr->num_sge; i++) {
258 /* Check LKEY */
259 if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
260 ret = -EINVAL;
261 goto bail;
262 }
263
264 if (wr->sg_list[i].length == 0)
265 continue;
266 if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ?
267 sg_list + ss.num_sge - 1 : &ss.sge,
268 &wr->sg_list[i], 0)) {
269 ret = -EINVAL;
270 goto bail;
271 }
272 len += wr->sg_list[i].length;
273 ss.num_sge++;
274 }
275 extra_bytes = (4 - len) & 3;
276 nwords = (len + extra_bytes) >> 2;
277
278 /* Construct the header. */
279 ah_attr = &to_iah(wr->wr.ud.ah)->attr;
280 if (ah_attr->dlid == 0) {
281 ret = -EINVAL;
282 goto bail;
283 }
284 if (ah_attr->dlid >= IPS_MULTICAST_LID_BASE) {
285 if (ah_attr->dlid != IPS_PERMISSIVE_LID)
286 dev->n_multicast_xmit++;
287 else
288 dev->n_unicast_xmit++;
289 } else {
290 dev->n_unicast_xmit++;
291 lid = ah_attr->dlid &
292 ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
293 if (unlikely(lid == ipath_layer_get_lid(dev->dd))) {
294 /*
295 * Pass in an uninitialized ib_wc to save stack
296 * space.
297 */
298 ipath_ud_loopback(qp, &ss, len, wr, &wc);
299 goto done;
300 }
301 }
302 if (ah_attr->ah_flags & IB_AH_GRH) {
303 /* Header size in 32-bit words. */
304 hwords = 17;
305 lrh0 = IPS_LRH_GRH;
306 ohdr = &qp->s_hdr.u.l.oth;
307 qp->s_hdr.u.l.grh.version_tclass_flow =
308 cpu_to_be32((6 << 28) |
309 (ah_attr->grh.traffic_class << 20) |
310 ah_attr->grh.flow_label);
311 qp->s_hdr.u.l.grh.paylen =
312 cpu_to_be16(((wr->opcode ==
313 IB_WR_SEND_WITH_IMM ? 6 : 5) +
314 nwords + SIZE_OF_CRC) << 2);
315 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
316 qp->s_hdr.u.l.grh.next_hdr = 0x1B;
317 qp->s_hdr.u.l.grh.hop_limit = ah_attr->grh.hop_limit;
318 /* The SGID is 32-bit aligned. */
319 qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
320 dev->gid_prefix;
321 qp->s_hdr.u.l.grh.sgid.global.interface_id =
322 ipath_layer_get_guid(dev->dd);
323 qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
324 /*
325 * Don't worry about sending to locally attached multicast
326 * QPs. It is unspecified by the spec. what happens.
327 */
328 } else {
329 /* Header size in 32-bit words. */
330 hwords = 7;
331 lrh0 = IPS_LRH_BTH;
332 ohdr = &qp->s_hdr.u.oth;
333 }
334 if (wr->opcode == IB_WR_SEND_WITH_IMM) {
335 ohdr->u.ud.imm_data = wr->imm_data;
336 wc.imm_data = wr->imm_data;
337 hwords += 1;
338 bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
339 } else if (wr->opcode == IB_WR_SEND) {
340 wc.imm_data = 0;
341 bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
342 } else {
343 ret = -EINVAL;
344 goto bail;
345 }
346 lrh0 |= ah_attr->sl << 4;
347 if (qp->ibqp.qp_type == IB_QPT_SMI)
348 lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
349 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
350 qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
351 qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
352 lid = ipath_layer_get_lid(dev->dd);
353 if (lid) {
354 lid |= ah_attr->src_path_bits &
355 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
356 qp->s_hdr.lrh[3] = cpu_to_be16(lid);
357 } else
358 qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
359 if (wr->send_flags & IB_SEND_SOLICITED)
360 bth0 |= 1 << 23;
361 bth0 |= extra_bytes << 20;
362 bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPS_DEFAULT_P_KEY :
363 ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
364 ohdr->bth[0] = cpu_to_be32(bth0);
365 /*
366 * Use the multicast QP if the destination LID is a multicast LID.
367 */
368 ohdr->bth[1] = ah_attr->dlid >= IPS_MULTICAST_LID_BASE &&
369 ah_attr->dlid != IPS_PERMISSIVE_LID ?
370 __constant_cpu_to_be32(IPS_MULTICAST_QPN) :
371 cpu_to_be32(wr->wr.ud.remote_qpn);
372 /* XXX Could lose a PSN count but not worth locking */
373 ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPS_PSN_MASK);
374 /*
375 * Qkeys with the high order bit set mean use the
376 * qkey from the QP context instead of the WR (see 10.2.5).
377 */
378 ohdr->u.ud.deth[0] = cpu_to_be32((int)wr->wr.ud.remote_qkey < 0 ?
379 qp->qkey : wr->wr.ud.remote_qkey);
380 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
381 if (ipath_verbs_send(dev->dd, hwords, (u32 *) &qp->s_hdr,
382 len, &ss))
383 dev->n_no_piobuf++;
384
385done:
386 /* Queue the completion status entry. */
387 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
388 (wr->send_flags & IB_SEND_SIGNALED)) {
389 wc.wr_id = wr->wr_id;
390 wc.status = IB_WC_SUCCESS;
391 wc.vendor_err = 0;
392 wc.opcode = IB_WC_SEND;
393 wc.byte_len = len;
394 wc.qp_num = qp->ibqp.qp_num;
395 wc.src_qp = 0;
396 wc.wc_flags = 0;
397 /* XXX initialize other fields? */
398 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
399 }
400 kfree(sg_list);
401
402 ret = 0;
403
404bail:
405 return ret;
406}
407
408/**
409 * ipath_ud_rcv - receive an incoming UD packet
410 * @dev: the device the packet came in on
411 * @hdr: the packet header
412 * @has_grh: true if the packet has a GRH
413 * @data: the packet data
414 * @tlen: the packet length
415 * @qp: the QP the packet came on
416 *
417 * This is called from ipath_qp_rcv() to process an incoming UD packet
418 * for the given QP.
419 * Called at interrupt level.
420 */
421void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
422 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
423{
424 struct ipath_other_headers *ohdr;
425 int opcode;
426 u32 hdrsize;
427 u32 pad;
428 unsigned long flags;
429 struct ib_wc wc;
430 u32 qkey;
431 u32 src_qp;
432 struct ipath_rq *rq;
433 struct ipath_srq *srq;
434 struct ipath_rwqe *wqe;
435 u16 dlid;
436 int header_in_data;
437
438 /* Check for GRH */
439 if (!has_grh) {
440 ohdr = &hdr->u.oth;
441 hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */
442 qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
443 src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
444 header_in_data = 0;
445 } else {
446 ohdr = &hdr->u.l.oth;
447 hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
448 /*
449 * The header with GRH is 68 bytes and the core driver sets
450 * the eager header buffer size to 56 bytes so the last 12
451 * bytes of the IB header is in the data buffer.
452 */
453 header_in_data =
454 ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
455 if (header_in_data) {
456 qkey = be32_to_cpu(((__be32 *) data)[1]);
457 src_qp = be32_to_cpu(((__be32 *) data)[2]);
458 data += 12;
459 } else {
460 qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
461 src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
462 }
463 }
464 src_qp &= IPS_QPN_MASK;
465
466 /*
467 * Check that the permissive LID is only used on QP0
468 * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
469 */
470 if (qp->ibqp.qp_num) {
471 if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
472 hdr->lrh[3] == IB_LID_PERMISSIVE)) {
473 dev->n_pkt_drops++;
474 goto bail;
475 }
476 if (unlikely(qkey != qp->qkey)) {
477 /* XXX OK to lose a count once in a while. */
478 dev->qkey_violations++;
479 dev->n_pkt_drops++;
480 goto bail;
481 }
482 } else if (hdr->lrh[1] == IB_LID_PERMISSIVE ||
483 hdr->lrh[3] == IB_LID_PERMISSIVE) {
484 struct ib_smp *smp = (struct ib_smp *) data;
485
486 if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
487 dev->n_pkt_drops++;
488 goto bail;
489 }
490 }
491
492 /* Get the number of bytes the message was padded by. */
493 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
494 if (unlikely(tlen < (hdrsize + pad + 4))) {
495 /* Drop incomplete packets. */
496 dev->n_pkt_drops++;
497 goto bail;
498 }
499 tlen -= hdrsize + pad + 4;
500
501 /* Drop invalid MAD packets (see 13.5.3.1). */
502 if (unlikely((qp->ibqp.qp_num == 0 &&
503 (tlen != 256 ||
504 (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) ||
505 (qp->ibqp.qp_num == 1 &&
506 (tlen != 256 ||
507 (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) {
508 dev->n_pkt_drops++;
509 goto bail;
510 }
511
512 /*
513 * A GRH is expected to preceed the data even if not
514 * present on the wire.
515 */
516 wc.byte_len = tlen + sizeof(struct ib_grh);
517
518 /*
519 * The opcode is in the low byte when its in network order
520 * (top byte when in host order).
521 */
522 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
523 if (qp->ibqp.qp_num > 1 &&
524 opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
525 if (header_in_data) {
526 wc.imm_data = *(__be32 *) data;
527 data += sizeof(__be32);
528 } else
529 wc.imm_data = ohdr->u.ud.imm_data;
530 wc.wc_flags = IB_WC_WITH_IMM;
531 hdrsize += sizeof(u32);
532 } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
533 wc.imm_data = 0;
534 wc.wc_flags = 0;
535 } else {
536 dev->n_pkt_drops++;
537 goto bail;
538 }
539
540 /*
541 * Get the next work request entry to find where to put the data.
542 * Note that it is safe to drop the lock after changing rq->tail
543 * since ipath_post_receive() won't fill the empty slot.
544 */
545 if (qp->ibqp.srq) {
546 srq = to_isrq(qp->ibqp.srq);
547 rq = &srq->rq;
548 } else {
549 srq = NULL;
550 rq = &qp->r_rq;
551 }
552 spin_lock_irqsave(&rq->lock, flags);
553 if (rq->tail == rq->head) {
554 spin_unlock_irqrestore(&rq->lock, flags);
555 dev->n_pkt_drops++;
556 goto bail;
557 }
558 /* Silently drop packets which are too big. */
559 wqe = get_rwqe_ptr(rq, rq->tail);
560 if (wc.byte_len > wqe->length) {
561 spin_unlock_irqrestore(&rq->lock, flags);
562 dev->n_pkt_drops++;
563 goto bail;
564 }
565 wc.wr_id = wqe->wr_id;
566 qp->r_sge.sge = wqe->sg_list[0];
567 qp->r_sge.sg_list = wqe->sg_list + 1;
568 qp->r_sge.num_sge = wqe->num_sge;
569 if (++rq->tail >= rq->size)
570 rq->tail = 0;
571 if (srq && srq->ibsrq.event_handler) {
572 u32 n;
573
574 if (rq->head < rq->tail)
575 n = rq->size + rq->head - rq->tail;
576 else
577 n = rq->head - rq->tail;
578 if (n < srq->limit) {
579 struct ib_event ev;
580
581 srq->limit = 0;
582 spin_unlock_irqrestore(&rq->lock, flags);
583 ev.device = qp->ibqp.device;
584 ev.element.srq = qp->ibqp.srq;
585 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
586 srq->ibsrq.event_handler(&ev,
587 srq->ibsrq.srq_context);
588 } else
589 spin_unlock_irqrestore(&rq->lock, flags);
590 } else
591 spin_unlock_irqrestore(&rq->lock, flags);
592 if (has_grh) {
593 ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
594 sizeof(struct ib_grh));
595 wc.wc_flags |= IB_WC_GRH;
596 } else
597 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
598 ipath_copy_sge(&qp->r_sge, data,
599 wc.byte_len - sizeof(struct ib_grh));
600 wc.status = IB_WC_SUCCESS;
601 wc.opcode = IB_WC_RECV;
602 wc.vendor_err = 0;
603 wc.qp_num = qp->ibqp.qp_num;
604 wc.src_qp = src_qp;
605 /* XXX do we know which pkey matched? Only needed for GSI. */
606 wc.pkey_index = 0;
607 wc.slid = be16_to_cpu(hdr->lrh[3]);
608 wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
609 dlid = be16_to_cpu(hdr->lrh[1]);
610 /*
611 * Save the LMC lower bits if the destination LID is a unicast LID.
612 */
613 wc.dlid_path_bits = dlid >= IPS_MULTICAST_LID_BASE ? 0 :
614 dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
615 /* Signal completion event if the solicited bit is set. */
616 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
617 (ohdr->bth[0] &
618 __constant_cpu_to_be32(1 << 23)) != 0);
619
620bail:;
621}
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
new file mode 100644
index 000000000000..2bb08afc86d0
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -0,0 +1,207 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mm.h>
34#include <linux/device.h>
35
36#include "ipath_kernel.h"
37
38static void __ipath_release_user_pages(struct page **p, size_t num_pages,
39 int dirty)
40{
41 size_t i;
42
43 for (i = 0; i < num_pages; i++) {
44 ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i,
45 (unsigned long) num_pages, p[i]);
46 if (dirty)
47 set_page_dirty_lock(p[i]);
48 put_page(p[i]);
49 }
50}
51
52/* call with current->mm->mmap_sem held */
53static int __get_user_pages(unsigned long start_page, size_t num_pages,
54 struct page **p, struct vm_area_struct **vma)
55{
56 unsigned long lock_limit;
57 size_t got;
58 int ret;
59
60#if 0
61 /*
62 * XXX - causes MPI programs to fail, haven't had time to check
63 * yet
64 */
65 if (!capable(CAP_IPC_LOCK)) {
66 ret = -EPERM;
67 goto bail;
68 }
69#endif
70
71 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
72 PAGE_SHIFT;
73
74 if (num_pages > lock_limit) {
75 ret = -ENOMEM;
76 goto bail;
77 }
78
79 ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n",
80 (unsigned long) num_pages, start_page);
81
82 for (got = 0; got < num_pages; got += ret) {
83 ret = get_user_pages(current, current->mm,
84 start_page + got * PAGE_SIZE,
85 num_pages - got, 1, 1,
86 p + got, vma);
87 if (ret < 0)
88 goto bail_release;
89 }
90
91 current->mm->locked_vm += num_pages;
92
93 ret = 0;
94 goto bail;
95
96bail_release:
97 __ipath_release_user_pages(p, got, 0);
98bail:
99 return ret;
100}
101
102/**
103 * ipath_get_user_pages - lock user pages into memory
104 * @start_page: the start page
105 * @num_pages: the number of pages
106 * @p: the output page structures
107 *
108 * This function takes a given start page (page aligned user virtual
109 * address) and pins it and the following specified number of pages. For
110 * now, num_pages is always 1, but that will probably change at some point
111 * (because caller is doing expected sends on a single virtually contiguous
112 * buffer, so we can do all pages at once).
113 */
114int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
115 struct page **p)
116{
117 int ret;
118
119 down_write(&current->mm->mmap_sem);
120
121 ret = __get_user_pages(start_page, num_pages, p, NULL);
122
123 up_write(&current->mm->mmap_sem);
124
125 return ret;
126}
127
128/**
129 * ipath_get_user_pages_nocopy - lock a single page for I/O and mark shared
130 * @start_page: the page to lock
131 * @p: the output page structure
132 *
133 * This is similar to ipath_get_user_pages, but it's always one page, and we
134 * mark the page as locked for I/O, and shared. This is used for the user
135 * process page that contains the destination address for the rcvhdrq tail
136 * update, so we need to have the vma. If we don't do this, the page can be
137 * taken away from us on fork, even if the child never touches it, and then
138 * the user process never sees the tail register updates.
139 */
140int ipath_get_user_pages_nocopy(unsigned long page, struct page **p)
141{
142 struct vm_area_struct *vma;
143 int ret;
144
145 down_write(&current->mm->mmap_sem);
146
147 ret = __get_user_pages(page, 1, p, &vma);
148
149 up_write(&current->mm->mmap_sem);
150
151 return ret;
152}
153
154void ipath_release_user_pages(struct page **p, size_t num_pages)
155{
156 down_write(&current->mm->mmap_sem);
157
158 __ipath_release_user_pages(p, num_pages, 1);
159
160 current->mm->locked_vm -= num_pages;
161
162 up_write(&current->mm->mmap_sem);
163}
164
165struct ipath_user_pages_work {
166 struct work_struct work;
167 struct mm_struct *mm;
168 unsigned long num_pages;
169};
170
171static void user_pages_account(void *ptr)
172{
173 struct ipath_user_pages_work *work = ptr;
174
175 down_write(&work->mm->mmap_sem);
176 work->mm->locked_vm -= work->num_pages;
177 up_write(&work->mm->mmap_sem);
178 mmput(work->mm);
179 kfree(work);
180}
181
182void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
183{
184 struct ipath_user_pages_work *work;
185 struct mm_struct *mm;
186
187 __ipath_release_user_pages(p, num_pages, 1);
188
189 mm = get_task_mm(current);
190 if (!mm)
191 goto bail;
192
193 work = kmalloc(sizeof(*work), GFP_KERNEL);
194 if (!work)
195 goto bail_mm;
196
197 goto bail;
198
199 INIT_WORK(&work->work, user_pages_account, work);
200 work->mm = mm;
201 work->num_pages = num_pages;
202
203bail_mm:
204 mmput(mm);
205bail:
206 return;
207}
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
new file mode 100644
index 000000000000..8d2558a01f35
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -0,0 +1,1222 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_mad.h>
34#include <rdma/ib_user_verbs.h>
35#include <linux/utsname.h>
36
37#include "ipath_kernel.h"
38#include "ipath_verbs.h"
39#include "ips_common.h"
40
41/* Not static, because we don't want the compiler removing it */
42const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
43
44static unsigned int ib_ipath_qp_table_size = 251;
45module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
46MODULE_PARM_DESC(qp_table_size, "QP table size");
47
48unsigned int ib_ipath_lkey_table_size = 12;
49module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
50 S_IRUGO);
51MODULE_PARM_DESC(lkey_table_size,
52 "LKEY table size in bits (2^n, 1 <= n <= 23)");
53
54unsigned int ib_ipath_debug; /* debug mask */
55module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
56MODULE_PARM_DESC(debug, "Verbs debug mask");
57
58MODULE_LICENSE("GPL");
59MODULE_AUTHOR("PathScale <support@pathscale.com>");
60MODULE_DESCRIPTION("Pathscale InfiniPath driver");
61
62const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
63 [IB_QPS_RESET] = 0,
64 [IB_QPS_INIT] = IPATH_POST_RECV_OK,
65 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
66 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
67 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
68 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
69 IPATH_POST_SEND_OK,
70 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
71 [IB_QPS_ERR] = 0,
72};
73
74/*
75 * Translate ib_wr_opcode into ib_wc_opcode.
76 */
77const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
78 [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
79 [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
80 [IB_WR_SEND] = IB_WC_SEND,
81 [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
82 [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
83 [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
84 [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
85};
86
87/*
88 * System image GUID.
89 */
90static __be64 sys_image_guid;
91
92/**
93 * ipath_copy_sge - copy data to SGE memory
94 * @ss: the SGE state
95 * @data: the data to copy
96 * @length: the length of the data
97 */
98void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
99{
100 struct ipath_sge *sge = &ss->sge;
101
102 while (length) {
103 u32 len = sge->length;
104
105 BUG_ON(len == 0);
106 if (len > length)
107 len = length;
108 memcpy(sge->vaddr, data, len);
109 sge->vaddr += len;
110 sge->length -= len;
111 sge->sge_length -= len;
112 if (sge->sge_length == 0) {
113 if (--ss->num_sge)
114 *sge = *ss->sg_list++;
115 } else if (sge->length == 0 && sge->mr != NULL) {
116 if (++sge->n >= IPATH_SEGSZ) {
117 if (++sge->m >= sge->mr->mapsz)
118 break;
119 sge->n = 0;
120 }
121 sge->vaddr =
122 sge->mr->map[sge->m]->segs[sge->n].vaddr;
123 sge->length =
124 sge->mr->map[sge->m]->segs[sge->n].length;
125 }
126 data += len;
127 length -= len;
128 }
129}
130
131/**
132 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
133 * @ss: the SGE state
134 * @length: the number of bytes to skip
135 */
136void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
137{
138 struct ipath_sge *sge = &ss->sge;
139
140 while (length > sge->sge_length) {
141 length -= sge->sge_length;
142 ss->sge = *ss->sg_list++;
143 }
144 while (length) {
145 u32 len = sge->length;
146
147 BUG_ON(len == 0);
148 if (len > length)
149 len = length;
150 sge->vaddr += len;
151 sge->length -= len;
152 sge->sge_length -= len;
153 if (sge->sge_length == 0) {
154 if (--ss->num_sge)
155 *sge = *ss->sg_list++;
156 } else if (sge->length == 0 && sge->mr != NULL) {
157 if (++sge->n >= IPATH_SEGSZ) {
158 if (++sge->m >= sge->mr->mapsz)
159 break;
160 sge->n = 0;
161 }
162 sge->vaddr =
163 sge->mr->map[sge->m]->segs[sge->n].vaddr;
164 sge->length =
165 sge->mr->map[sge->m]->segs[sge->n].length;
166 }
167 length -= len;
168 }
169}
170
171/**
172 * ipath_post_send - post a send on a QP
173 * @ibqp: the QP to post the send on
174 * @wr: the list of work requests to post
175 * @bad_wr: the first bad WR is put here
176 *
177 * This may be called from interrupt context.
178 */
179static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
180 struct ib_send_wr **bad_wr)
181{
182 struct ipath_qp *qp = to_iqp(ibqp);
183 int err = 0;
184
185 /* Check that state is OK to post send. */
186 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)) {
187 *bad_wr = wr;
188 err = -EINVAL;
189 goto bail;
190 }
191
192 for (; wr; wr = wr->next) {
193 switch (qp->ibqp.qp_type) {
194 case IB_QPT_UC:
195 case IB_QPT_RC:
196 err = ipath_post_rc_send(qp, wr);
197 break;
198
199 case IB_QPT_SMI:
200 case IB_QPT_GSI:
201 case IB_QPT_UD:
202 err = ipath_post_ud_send(qp, wr);
203 break;
204
205 default:
206 err = -EINVAL;
207 }
208 if (err) {
209 *bad_wr = wr;
210 break;
211 }
212 }
213
214bail:
215 return err;
216}
217
218/**
219 * ipath_post_receive - post a receive on a QP
220 * @ibqp: the QP to post the receive on
221 * @wr: the WR to post
222 * @bad_wr: the first bad WR is put here
223 *
224 * This may be called from interrupt context.
225 */
226static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
227 struct ib_recv_wr **bad_wr)
228{
229 struct ipath_qp *qp = to_iqp(ibqp);
230 unsigned long flags;
231 int ret;
232
233 /* Check that state is OK to post receive. */
234 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
235 *bad_wr = wr;
236 ret = -EINVAL;
237 goto bail;
238 }
239
240 for (; wr; wr = wr->next) {
241 struct ipath_rwqe *wqe;
242 u32 next;
243 int i, j;
244
245 if (wr->num_sge > qp->r_rq.max_sge) {
246 *bad_wr = wr;
247 ret = -ENOMEM;
248 goto bail;
249 }
250
251 spin_lock_irqsave(&qp->r_rq.lock, flags);
252 next = qp->r_rq.head + 1;
253 if (next >= qp->r_rq.size)
254 next = 0;
255 if (next == qp->r_rq.tail) {
256 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
257 *bad_wr = wr;
258 ret = -ENOMEM;
259 goto bail;
260 }
261
262 wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
263 wqe->wr_id = wr->wr_id;
264 wqe->sg_list[0].mr = NULL;
265 wqe->sg_list[0].vaddr = NULL;
266 wqe->sg_list[0].length = 0;
267 wqe->sg_list[0].sge_length = 0;
268 wqe->length = 0;
269 for (i = 0, j = 0; i < wr->num_sge; i++) {
270 /* Check LKEY */
271 if (to_ipd(qp->ibqp.pd)->user &&
272 wr->sg_list[i].lkey == 0) {
273 spin_unlock_irqrestore(&qp->r_rq.lock,
274 flags);
275 *bad_wr = wr;
276 ret = -EINVAL;
277 goto bail;
278 }
279 if (wr->sg_list[i].length == 0)
280 continue;
281 if (!ipath_lkey_ok(
282 &to_idev(qp->ibqp.device)->lk_table,
283 &wqe->sg_list[j], &wr->sg_list[i],
284 IB_ACCESS_LOCAL_WRITE)) {
285 spin_unlock_irqrestore(&qp->r_rq.lock,
286 flags);
287 *bad_wr = wr;
288 ret = -EINVAL;
289 goto bail;
290 }
291 wqe->length += wr->sg_list[i].length;
292 j++;
293 }
294 wqe->num_sge = j;
295 qp->r_rq.head = next;
296 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
297 }
298 ret = 0;
299
300bail:
301 return ret;
302}
303
304/**
305 * ipath_qp_rcv - processing an incoming packet on a QP
306 * @dev: the device the packet came on
307 * @hdr: the packet header
308 * @has_grh: true if the packet has a GRH
309 * @data: the packet data
310 * @tlen: the packet length
311 * @qp: the QP the packet came on
312 *
313 * This is called from ipath_ib_rcv() to process an incoming packet
314 * for the given QP.
315 * Called at interrupt level.
316 */
317static void ipath_qp_rcv(struct ipath_ibdev *dev,
318 struct ipath_ib_header *hdr, int has_grh,
319 void *data, u32 tlen, struct ipath_qp *qp)
320{
321 /* Check for valid receive state. */
322 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
323 dev->n_pkt_drops++;
324 return;
325 }
326
327 switch (qp->ibqp.qp_type) {
328 case IB_QPT_SMI:
329 case IB_QPT_GSI:
330 case IB_QPT_UD:
331 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
332 break;
333
334 case IB_QPT_RC:
335 ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
336 break;
337
338 case IB_QPT_UC:
339 ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
340 break;
341
342 default:
343 break;
344 }
345}
346
347/**
348 * ipath_ib_rcv - process and incoming packet
349 * @arg: the device pointer
350 * @rhdr: the header of the packet
351 * @data: the packet data
352 * @tlen: the packet length
353 *
354 * This is called from ipath_kreceive() to process an incoming packet at
355 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
356 */
357static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
358{
359 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
360 struct ipath_ib_header *hdr = rhdr;
361 struct ipath_other_headers *ohdr;
362 struct ipath_qp *qp;
363 u32 qp_num;
364 int lnh;
365 u8 opcode;
366 u16 lid;
367
368 if (unlikely(dev == NULL))
369 goto bail;
370
371 if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */
372 dev->rcv_errors++;
373 goto bail;
374 }
375
376 /* Check for a valid destination LID (see ch. 7.11.1). */
377 lid = be16_to_cpu(hdr->lrh[1]);
378 if (lid < IPS_MULTICAST_LID_BASE) {
379 lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
380 if (unlikely(lid != ipath_layer_get_lid(dev->dd))) {
381 dev->rcv_errors++;
382 goto bail;
383 }
384 }
385
386 /* Check for GRH */
387 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
388 if (lnh == IPS_LRH_BTH)
389 ohdr = &hdr->u.oth;
390 else if (lnh == IPS_LRH_GRH)
391 ohdr = &hdr->u.l.oth;
392 else {
393 dev->rcv_errors++;
394 goto bail;
395 }
396
397 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
398 dev->opstats[opcode].n_bytes += tlen;
399 dev->opstats[opcode].n_packets++;
400
401 /* Get the destination QP number. */
402 qp_num = be32_to_cpu(ohdr->bth[1]) & IPS_QPN_MASK;
403 if (qp_num == IPS_MULTICAST_QPN) {
404 struct ipath_mcast *mcast;
405 struct ipath_mcast_qp *p;
406
407 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
408 if (mcast == NULL) {
409 dev->n_pkt_drops++;
410 goto bail;
411 }
412 dev->n_multicast_rcv++;
413 list_for_each_entry_rcu(p, &mcast->qp_list, list)
414 ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data,
415 tlen, p->qp);
416 /*
417 * Notify ipath_multicast_detach() if it is waiting for us
418 * to finish.
419 */
420 if (atomic_dec_return(&mcast->refcount) <= 1)
421 wake_up(&mcast->wait);
422 } else {
423 qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
424 if (qp) {
425 dev->n_unicast_rcv++;
426 ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data,
427 tlen, qp);
428 /*
429 * Notify ipath_destroy_qp() if it is waiting
430 * for us to finish.
431 */
432 if (atomic_dec_and_test(&qp->refcount))
433 wake_up(&qp->wait);
434 } else
435 dev->n_pkt_drops++;
436 }
437
438bail:;
439}
440
441/**
442 * ipath_ib_timer - verbs timer
443 * @arg: the device pointer
444 *
445 * This is called from ipath_do_rcv_timer() at interrupt level to check for
446 * QPs which need retransmits and to collect performance numbers.
447 */
448static void ipath_ib_timer(void *arg)
449{
450 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
451 struct ipath_qp *resend = NULL;
452 struct ipath_qp *rnr = NULL;
453 struct list_head *last;
454 struct ipath_qp *qp;
455 unsigned long flags;
456
457 if (dev == NULL)
458 return;
459
460 spin_lock_irqsave(&dev->pending_lock, flags);
461 /* Start filling the next pending queue. */
462 if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
463 dev->pending_index = 0;
464 /* Save any requests still in the new queue, they have timed out. */
465 last = &dev->pending[dev->pending_index];
466 while (!list_empty(last)) {
467 qp = list_entry(last->next, struct ipath_qp, timerwait);
468 if (last->next == LIST_POISON1 ||
469 last->next != &qp->timerwait ||
470 qp->timerwait.prev != last) {
471 INIT_LIST_HEAD(last);
472 } else {
473 list_del(&qp->timerwait);
474 qp->timerwait.prev = (struct list_head *) resend;
475 resend = qp;
476 atomic_inc(&qp->refcount);
477 }
478 }
479 last = &dev->rnrwait;
480 if (!list_empty(last)) {
481 qp = list_entry(last->next, struct ipath_qp, timerwait);
482 if (--qp->s_rnr_timeout == 0) {
483 do {
484 if (last->next == LIST_POISON1 ||
485 last->next != &qp->timerwait ||
486 qp->timerwait.prev != last) {
487 INIT_LIST_HEAD(last);
488 break;
489 }
490 list_del(&qp->timerwait);
491 qp->timerwait.prev =
492 (struct list_head *) rnr;
493 rnr = qp;
494 if (list_empty(last))
495 break;
496 qp = list_entry(last->next, struct ipath_qp,
497 timerwait);
498 } while (qp->s_rnr_timeout == 0);
499 }
500 }
501 /*
502 * We should only be in the started state if pma_sample_start != 0
503 */
504 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
505 --dev->pma_sample_start == 0) {
506 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
507 ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword,
508 &dev->ipath_rword,
509 &dev->ipath_spkts,
510 &dev->ipath_rpkts,
511 &dev->ipath_xmit_wait);
512 }
513 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
514 if (dev->pma_sample_interval == 0) {
515 u64 ta, tb, tc, td, te;
516
517 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
518 ipath_layer_snapshot_counters(dev->dd, &ta, &tb,
519 &tc, &td, &te);
520
521 dev->ipath_sword = ta - dev->ipath_sword;
522 dev->ipath_rword = tb - dev->ipath_rword;
523 dev->ipath_spkts = tc - dev->ipath_spkts;
524 dev->ipath_rpkts = td - dev->ipath_rpkts;
525 dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
526 }
527 else
528 dev->pma_sample_interval--;
529 }
530 spin_unlock_irqrestore(&dev->pending_lock, flags);
531
532 /* XXX What if timer fires again while this is running? */
533 for (qp = resend; qp != NULL;
534 qp = (struct ipath_qp *) qp->timerwait.prev) {
535 struct ib_wc wc;
536
537 spin_lock_irqsave(&qp->s_lock, flags);
538 if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) {
539 dev->n_timeouts++;
540 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
541 }
542 spin_unlock_irqrestore(&qp->s_lock, flags);
543
544 /* Notify ipath_destroy_qp() if it is waiting. */
545 if (atomic_dec_and_test(&qp->refcount))
546 wake_up(&qp->wait);
547 }
548 for (qp = rnr; qp != NULL;
549 qp = (struct ipath_qp *) qp->timerwait.prev)
550 tasklet_hi_schedule(&qp->s_task);
551}
552
553/**
554 * ipath_ib_piobufavail - callback when a PIO buffer is available
555 * @arg: the device pointer
556 *
557 * This is called from ipath_intr() at interrupt level when a PIO buffer is
558 * available after ipath_verbs_send() returned an error that no buffers were
559 * available. Return 0 if we consumed all the PIO buffers and we still have
560 * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
561 * return one).
562 */
563static int ipath_ib_piobufavail(void *arg)
564{
565 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
566 struct ipath_qp *qp;
567 unsigned long flags;
568
569 if (dev == NULL)
570 goto bail;
571
572 spin_lock_irqsave(&dev->pending_lock, flags);
573 while (!list_empty(&dev->piowait)) {
574 qp = list_entry(dev->piowait.next, struct ipath_qp,
575 piowait);
576 list_del(&qp->piowait);
577 tasklet_hi_schedule(&qp->s_task);
578 }
579 spin_unlock_irqrestore(&dev->pending_lock, flags);
580
581bail:
582 return 1;
583}
584
585static int ipath_query_device(struct ib_device *ibdev,
586 struct ib_device_attr *props)
587{
588 struct ipath_ibdev *dev = to_idev(ibdev);
589 u32 vendor, boardrev, majrev, minrev;
590
591 memset(props, 0, sizeof(*props));
592
593 props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
594 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
595 IB_DEVICE_SYS_IMAGE_GUID;
596 ipath_layer_query_device(dev->dd, &vendor, &boardrev,
597 &majrev, &minrev);
598 props->vendor_id = vendor;
599 props->vendor_part_id = boardrev;
600 props->hw_ver = boardrev << 16 | majrev << 8 | minrev;
601
602 props->sys_image_guid = dev->sys_image_guid;
603
604 props->max_mr_size = ~0ull;
605 props->max_qp = 0xffff;
606 props->max_qp_wr = 0xffff;
607 props->max_sge = 255;
608 props->max_cq = 0xffff;
609 props->max_cqe = 0xffff;
610 props->max_mr = 0xffff;
611 props->max_pd = 0xffff;
612 props->max_qp_rd_atom = 1;
613 props->max_qp_init_rd_atom = 1;
614 /* props->max_res_rd_atom */
615 props->max_srq = 0xffff;
616 props->max_srq_wr = 0xffff;
617 props->max_srq_sge = 255;
618 /* props->local_ca_ack_delay */
619 props->atomic_cap = IB_ATOMIC_HCA;
620 props->max_pkeys = ipath_layer_get_npkeys(dev->dd);
621 props->max_mcast_grp = 0xffff;
622 props->max_mcast_qp_attach = 0xffff;
623 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
624 props->max_mcast_grp;
625
626 return 0;
627}
628
629const u8 ipath_cvt_physportstate[16] = {
630 [INFINIPATH_IBCS_LT_STATE_DISABLED] = 3,
631 [INFINIPATH_IBCS_LT_STATE_LINKUP] = 5,
632 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = 2,
633 [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = 2,
634 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = 1,
635 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = 1,
636 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 4,
637 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 4,
638 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 4,
639 [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = 4,
640 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 6,
641 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 6,
642 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
643};
644
645static int ipath_query_port(struct ib_device *ibdev,
646 u8 port, struct ib_port_attr *props)
647{
648 struct ipath_ibdev *dev = to_idev(ibdev);
649 enum ib_mtu mtu;
650 u16 lid = ipath_layer_get_lid(dev->dd);
651 u64 ibcstat;
652
653 memset(props, 0, sizeof(*props));
654 props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE);
655 props->lmc = dev->mkeyprot_resv_lmc & 7;
656 props->sm_lid = dev->sm_lid;
657 props->sm_sl = dev->sm_sl;
658 ibcstat = ipath_layer_get_lastibcstat(dev->dd);
659 props->state = ((ibcstat >> 4) & 0x3) + 1;
660 /* See phys_state_show() */
661 props->phys_state = ipath_cvt_physportstate[
662 ipath_layer_get_lastibcstat(dev->dd) & 0xf];
663 props->port_cap_flags = dev->port_cap_flags;
664 props->gid_tbl_len = 1;
665 props->max_msg_sz = 4096;
666 props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd);
667 props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) -
668 dev->n_pkey_violations;
669 props->qkey_viol_cntr = dev->qkey_violations;
670 props->active_width = IB_WIDTH_4X;
671 /* See rate_show() */
672 props->active_speed = 1; /* Regular 10Mbs speed. */
673 props->max_vl_num = 1; /* VLCap = VL0 */
674 props->init_type_reply = 0;
675
676 props->max_mtu = IB_MTU_4096;
677 switch (ipath_layer_get_ibmtu(dev->dd)) {
678 case 4096:
679 mtu = IB_MTU_4096;
680 break;
681 case 2048:
682 mtu = IB_MTU_2048;
683 break;
684 case 1024:
685 mtu = IB_MTU_1024;
686 break;
687 case 512:
688 mtu = IB_MTU_512;
689 break;
690 case 256:
691 mtu = IB_MTU_256;
692 break;
693 default:
694 mtu = IB_MTU_2048;
695 }
696 props->active_mtu = mtu;
697 props->subnet_timeout = dev->subnet_timeout;
698
699 return 0;
700}
701
702static int ipath_modify_device(struct ib_device *device,
703 int device_modify_mask,
704 struct ib_device_modify *device_modify)
705{
706 int ret;
707
708 if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
709 IB_DEVICE_MODIFY_NODE_DESC)) {
710 ret = -EOPNOTSUPP;
711 goto bail;
712 }
713
714 if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
715 memcpy(device->node_desc, device_modify->node_desc, 64);
716
717 if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
718 to_idev(device)->sys_image_guid =
719 cpu_to_be64(device_modify->sys_image_guid);
720
721 ret = 0;
722
723bail:
724 return ret;
725}
726
727static int ipath_modify_port(struct ib_device *ibdev,
728 u8 port, int port_modify_mask,
729 struct ib_port_modify *props)
730{
731 struct ipath_ibdev *dev = to_idev(ibdev);
732
733 dev->port_cap_flags |= props->set_port_cap_mask;
734 dev->port_cap_flags &= ~props->clr_port_cap_mask;
735 if (port_modify_mask & IB_PORT_SHUTDOWN)
736 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
737 if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
738 dev->qkey_violations = 0;
739 return 0;
740}
741
742static int ipath_query_gid(struct ib_device *ibdev, u8 port,
743 int index, union ib_gid *gid)
744{
745 struct ipath_ibdev *dev = to_idev(ibdev);
746 int ret;
747
748 if (index >= 1) {
749 ret = -EINVAL;
750 goto bail;
751 }
752 gid->global.subnet_prefix = dev->gid_prefix;
753 gid->global.interface_id = ipath_layer_get_guid(dev->dd);
754
755 ret = 0;
756
757bail:
758 return ret;
759}
760
761static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
762 struct ib_ucontext *context,
763 struct ib_udata *udata)
764{
765 struct ipath_pd *pd;
766 struct ib_pd *ret;
767
768 pd = kmalloc(sizeof *pd, GFP_KERNEL);
769 if (!pd) {
770 ret = ERR_PTR(-ENOMEM);
771 goto bail;
772 }
773
774 /* ib_alloc_pd() will initialize pd->ibpd. */
775 pd->user = udata != NULL;
776
777 ret = &pd->ibpd;
778
779bail:
780 return ret;
781}
782
783static int ipath_dealloc_pd(struct ib_pd *ibpd)
784{
785 struct ipath_pd *pd = to_ipd(ibpd);
786
787 kfree(pd);
788
789 return 0;
790}
791
792/**
793 * ipath_create_ah - create an address handle
794 * @pd: the protection domain
795 * @ah_attr: the attributes of the AH
796 *
797 * This may be called from interrupt context.
798 */
799static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
800 struct ib_ah_attr *ah_attr)
801{
802 struct ipath_ah *ah;
803 struct ib_ah *ret;
804
805 /* A multicast address requires a GRH (see ch. 8.4.1). */
806 if (ah_attr->dlid >= IPS_MULTICAST_LID_BASE &&
807 ah_attr->dlid != IPS_PERMISSIVE_LID &&
808 !(ah_attr->ah_flags & IB_AH_GRH)) {
809 ret = ERR_PTR(-EINVAL);
810 goto bail;
811 }
812
813 ah = kmalloc(sizeof *ah, GFP_ATOMIC);
814 if (!ah) {
815 ret = ERR_PTR(-ENOMEM);
816 goto bail;
817 }
818
819 /* ib_create_ah() will initialize ah->ibah. */
820 ah->attr = *ah_attr;
821
822 ret = &ah->ibah;
823
824bail:
825 return ret;
826}
827
828/**
829 * ipath_destroy_ah - destroy an address handle
830 * @ibah: the AH to destroy
831 *
832 * This may be called from interrupt context.
833 */
834static int ipath_destroy_ah(struct ib_ah *ibah)
835{
836 struct ipath_ah *ah = to_iah(ibah);
837
838 kfree(ah);
839
840 return 0;
841}
842
843static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
844{
845 struct ipath_ah *ah = to_iah(ibah);
846
847 *ah_attr = ah->attr;
848
849 return 0;
850}
851
852static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
853 u16 *pkey)
854{
855 struct ipath_ibdev *dev = to_idev(ibdev);
856 int ret;
857
858 if (index >= ipath_layer_get_npkeys(dev->dd)) {
859 ret = -EINVAL;
860 goto bail;
861 }
862
863 *pkey = ipath_layer_get_pkey(dev->dd, index);
864 ret = 0;
865
866bail:
867 return ret;
868}
869
870
871/**
872 * ipath_alloc_ucontext - allocate a ucontest
873 * @ibdev: the infiniband device
874 * @udata: not used by the InfiniPath driver
875 */
876
877static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
878 struct ib_udata *udata)
879{
880 struct ipath_ucontext *context;
881 struct ib_ucontext *ret;
882
883 context = kmalloc(sizeof *context, GFP_KERNEL);
884 if (!context) {
885 ret = ERR_PTR(-ENOMEM);
886 goto bail;
887 }
888
889 ret = &context->ibucontext;
890
891bail:
892 return ret;
893}
894
895static int ipath_dealloc_ucontext(struct ib_ucontext *context)
896{
897 kfree(to_iucontext(context));
898 return 0;
899}
900
901static int ipath_verbs_register_sysfs(struct ib_device *dev);
902
903/**
904 * ipath_register_ib_device - register our device with the infiniband core
905 * @unit: the device number to register
906 * @dd: the device data structure
907 * Return the allocated ipath_ibdev pointer or NULL on error.
908 */
909static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
910{
911 struct ipath_ibdev *idev;
912 struct ib_device *dev;
913 int ret;
914
915 idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
916 if (idev == NULL)
917 goto bail;
918
919 dev = &idev->ibdev;
920
921 /* Only need to initialize non-zero fields. */
922 spin_lock_init(&idev->qp_table.lock);
923 spin_lock_init(&idev->lk_table.lock);
924 idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
925 /* Set the prefix to the default value (see ch. 4.1.1) */
926 idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);
927
928 ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
929 if (ret)
930 goto err_qp;
931
932 /*
933 * The top ib_ipath_lkey_table_size bits are used to index the
934 * table. The lower 8 bits can be owned by the user (copied from
935 * the LKEY). The remaining bits act as a generation number or tag.
936 */
937 idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
938 idev->lk_table.table = kzalloc(idev->lk_table.max *
939 sizeof(*idev->lk_table.table),
940 GFP_KERNEL);
941 if (idev->lk_table.table == NULL) {
942 ret = -ENOMEM;
943 goto err_lk;
944 }
945 spin_lock_init(&idev->pending_lock);
946 INIT_LIST_HEAD(&idev->pending[0]);
947 INIT_LIST_HEAD(&idev->pending[1]);
948 INIT_LIST_HEAD(&idev->pending[2]);
949 INIT_LIST_HEAD(&idev->piowait);
950 INIT_LIST_HEAD(&idev->rnrwait);
951 idev->pending_index = 0;
952 idev->port_cap_flags =
953 IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
954 idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
955 idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
956 idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
957 idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
958 idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT;
959 idev->link_width_enabled = 3; /* 1x or 4x */
960
961 /*
962 * The system image GUID is supposed to be the same for all
963 * IB HCAs in a single system but since there can be other
964 * device types in the system, we can't be sure this is unique.
965 */
966 if (!sys_image_guid)
967 sys_image_guid = ipath_layer_get_guid(dd);
968 idev->sys_image_guid = sys_image_guid;
969 idev->ib_unit = unit;
970 idev->dd = dd;
971
972 strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
973 dev->node_guid = ipath_layer_get_guid(dd);
974 dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
975 dev->uverbs_cmd_mask =
976 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
977 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
978 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
979 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
980 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
981 (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
982 (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
983 (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
984 (1ull << IB_USER_VERBS_CMD_REG_MR) |
985 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
986 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
987 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
988 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
989 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
990 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
991 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
992 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
993 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
994 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
995 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
996 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
997 (1ull << IB_USER_VERBS_CMD_POST_RECV) |
998 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
999 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
1000 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
1001 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
1002 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
1003 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
1004 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
1005 dev->node_type = IB_NODE_CA;
1006 dev->phys_port_cnt = 1;
1007 dev->dma_device = ipath_layer_get_device(dd);
1008 dev->class_dev.dev = dev->dma_device;
1009 dev->query_device = ipath_query_device;
1010 dev->modify_device = ipath_modify_device;
1011 dev->query_port = ipath_query_port;
1012 dev->modify_port = ipath_modify_port;
1013 dev->query_pkey = ipath_query_pkey;
1014 dev->query_gid = ipath_query_gid;
1015 dev->alloc_ucontext = ipath_alloc_ucontext;
1016 dev->dealloc_ucontext = ipath_dealloc_ucontext;
1017 dev->alloc_pd = ipath_alloc_pd;
1018 dev->dealloc_pd = ipath_dealloc_pd;
1019 dev->create_ah = ipath_create_ah;
1020 dev->destroy_ah = ipath_destroy_ah;
1021 dev->query_ah = ipath_query_ah;
1022 dev->create_srq = ipath_create_srq;
1023 dev->modify_srq = ipath_modify_srq;
1024 dev->query_srq = ipath_query_srq;
1025 dev->destroy_srq = ipath_destroy_srq;
1026 dev->create_qp = ipath_create_qp;
1027 dev->modify_qp = ipath_modify_qp;
1028 dev->query_qp = ipath_query_qp;
1029 dev->destroy_qp = ipath_destroy_qp;
1030 dev->post_send = ipath_post_send;
1031 dev->post_recv = ipath_post_receive;
1032 dev->post_srq_recv = ipath_post_srq_receive;
1033 dev->create_cq = ipath_create_cq;
1034 dev->destroy_cq = ipath_destroy_cq;
1035 dev->resize_cq = ipath_resize_cq;
1036 dev->poll_cq = ipath_poll_cq;
1037 dev->req_notify_cq = ipath_req_notify_cq;
1038 dev->get_dma_mr = ipath_get_dma_mr;
1039 dev->reg_phys_mr = ipath_reg_phys_mr;
1040 dev->reg_user_mr = ipath_reg_user_mr;
1041 dev->dereg_mr = ipath_dereg_mr;
1042 dev->alloc_fmr = ipath_alloc_fmr;
1043 dev->map_phys_fmr = ipath_map_phys_fmr;
1044 dev->unmap_fmr = ipath_unmap_fmr;
1045 dev->dealloc_fmr = ipath_dealloc_fmr;
1046 dev->attach_mcast = ipath_multicast_attach;
1047 dev->detach_mcast = ipath_multicast_detach;
1048 dev->process_mad = ipath_process_mad;
1049
1050 snprintf(dev->node_desc, sizeof(dev->node_desc),
1051 IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
1052
1053 ret = ib_register_device(dev);
1054 if (ret)
1055 goto err_reg;
1056
1057 if (ipath_verbs_register_sysfs(dev))
1058 goto err_class;
1059
1060 ipath_layer_enable_timer(dd);
1061
1062 goto bail;
1063
1064err_class:
1065 ib_unregister_device(dev);
1066err_reg:
1067 kfree(idev->lk_table.table);
1068err_lk:
1069 kfree(idev->qp_table.table);
1070err_qp:
1071 ib_dealloc_device(dev);
1072 _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
1073 unit, -ret);
1074 idev = NULL;
1075
1076bail:
1077 return idev;
1078}
1079
1080static void ipath_unregister_ib_device(void *arg)
1081{
1082 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
1083 struct ib_device *ibdev = &dev->ibdev;
1084
1085 ipath_layer_disable_timer(dev->dd);
1086
1087 ib_unregister_device(ibdev);
1088
1089 if (!list_empty(&dev->pending[0]) ||
1090 !list_empty(&dev->pending[1]) ||
1091 !list_empty(&dev->pending[2]))
1092 _VERBS_ERROR("ipath%d pending list not empty!\n",
1093 dev->ib_unit);
1094 if (!list_empty(&dev->piowait))
1095 _VERBS_ERROR("ipath%d piowait list not empty!\n",
1096 dev->ib_unit);
1097 if (!list_empty(&dev->rnrwait))
1098 _VERBS_ERROR("ipath%d rnrwait list not empty!\n",
1099 dev->ib_unit);
1100 if (!ipath_mcast_tree_empty())
1101 _VERBS_ERROR("ipath%d multicast table memory leak!\n",
1102 dev->ib_unit);
1103 /*
1104 * Note that ipath_unregister_ib_device() can be called before all
1105 * the QPs are destroyed!
1106 */
1107 ipath_free_all_qps(&dev->qp_table);
1108 kfree(dev->qp_table.table);
1109 kfree(dev->lk_table.table);
1110 ib_dealloc_device(ibdev);
1111}
1112
1113static int __init ipath_verbs_init(void)
1114{
1115 return ipath_verbs_register(ipath_register_ib_device,
1116 ipath_unregister_ib_device,
1117 ipath_ib_piobufavail, ipath_ib_rcv,
1118 ipath_ib_timer);
1119}
1120
1121static void __exit ipath_verbs_cleanup(void)
1122{
1123 ipath_verbs_unregister();
1124}
1125
1126static ssize_t show_rev(struct class_device *cdev, char *buf)
1127{
1128 struct ipath_ibdev *dev =
1129 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1130 int vendor, boardrev, majrev, minrev;
1131
1132 ipath_layer_query_device(dev->dd, &vendor, &boardrev,
1133 &majrev, &minrev);
1134 return sprintf(buf, "%d.%d\n", majrev, minrev);
1135}
1136
1137static ssize_t show_hca(struct class_device *cdev, char *buf)
1138{
1139 struct ipath_ibdev *dev =
1140 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1141 int ret;
1142
1143 ret = ipath_layer_get_boardname(dev->dd, buf, 128);
1144 if (ret < 0)
1145 goto bail;
1146 strcat(buf, "\n");
1147 ret = strlen(buf);
1148
1149bail:
1150 return ret;
1151}
1152
1153static ssize_t show_stats(struct class_device *cdev, char *buf)
1154{
1155 struct ipath_ibdev *dev =
1156 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1157 int i;
1158 int len;
1159
1160 len = sprintf(buf,
1161 "RC resends %d\n"
1162 "RC QACKs %d\n"
1163 "RC ACKs %d\n"
1164 "RC SEQ NAKs %d\n"
1165 "RC RDMA seq %d\n"
1166 "RC RNR NAKs %d\n"
1167 "RC OTH NAKs %d\n"
1168 "RC timeouts %d\n"
1169 "RC RDMA dup %d\n"
1170 "piobuf wait %d\n"
1171 "no piobuf %d\n"
1172 "PKT drops %d\n"
1173 "WQE errs %d\n",
1174 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
1175 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
1176 dev->n_other_naks, dev->n_timeouts,
1177 dev->n_rdma_dup_busy, dev->n_piowait,
1178 dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
1179 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
1180 const struct ipath_opcode_stats *si = &dev->opstats[i];
1181
1182 if (!si->n_packets && !si->n_bytes)
1183 continue;
1184 len += sprintf(buf + len, "%02x %llu/%llu\n", i,
1185 (unsigned long long) si->n_packets,
1186 (unsigned long long) si->n_bytes);
1187 }
1188 return len;
1189}
1190
1191static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1192static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
1193static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
1194static CLASS_DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
1195
1196static struct class_device_attribute *ipath_class_attributes[] = {
1197 &class_device_attr_hw_rev,
1198 &class_device_attr_hca_type,
1199 &class_device_attr_board_id,
1200 &class_device_attr_stats
1201};
1202
1203static int ipath_verbs_register_sysfs(struct ib_device *dev)
1204{
1205 int i;
1206 int ret;
1207
1208 for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
1209 if (class_device_create_file(&dev->class_dev,
1210 ipath_class_attributes[i])) {
1211 ret = 1;
1212 goto bail;
1213 }
1214
1215 ret = 0;
1216
1217bail:
1218 return ret;
1219}
1220
1221module_init(ipath_verbs_init);
1222module_exit(ipath_verbs_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
new file mode 100644
index 000000000000..fcafbc7c9e71
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -0,0 +1,692 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef IPATH_VERBS_H
34#define IPATH_VERBS_H
35
36#include <linux/types.h>
37#include <linux/spinlock.h>
38#include <linux/kernel.h>
39#include <linux/interrupt.h>
40#include <rdma/ib_pack.h>
41
42#include "ipath_layer.h"
43#include "verbs_debug.h"
44
45#define QPN_MAX (1 << 24)
46#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
47
48/*
49 * Increment this value if any changes that break userspace ABI
50 * compatibility are made.
51 */
52#define IPATH_UVERBS_ABI_VERSION 1
53
54/*
55 * Define an ib_cq_notify value that is not valid so we know when CQ
56 * notifications are armed.
57 */
58#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
59
60#define IB_RNR_NAK 0x20
61#define IB_NAK_PSN_ERROR 0x60
62#define IB_NAK_INVALID_REQUEST 0x61
63#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
64#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
65#define IB_NAK_INVALID_RD_REQUEST 0x64
66
67#define IPATH_POST_SEND_OK 0x01
68#define IPATH_POST_RECV_OK 0x02
69#define IPATH_PROCESS_RECV_OK 0x04
70#define IPATH_PROCESS_SEND_OK 0x08
71
72/* IB Performance Manager status values */
73#define IB_PMA_SAMPLE_STATUS_DONE 0x00
74#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
75#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02
76
77/* Mandatory IB performance counter select values. */
78#define IB_PMA_PORT_XMIT_DATA __constant_htons(0x0001)
79#define IB_PMA_PORT_RCV_DATA __constant_htons(0x0002)
80#define IB_PMA_PORT_XMIT_PKTS __constant_htons(0x0003)
81#define IB_PMA_PORT_RCV_PKTS __constant_htons(0x0004)
82#define IB_PMA_PORT_XMIT_WAIT __constant_htons(0x0005)
83
84struct ib_reth {
85 __be64 vaddr;
86 __be32 rkey;
87 __be32 length;
88} __attribute__ ((packed));
89
90struct ib_atomic_eth {
91 __be64 vaddr;
92 __be32 rkey;
93 __be64 swap_data;
94 __be64 compare_data;
95} __attribute__ ((packed));
96
97struct ipath_other_headers {
98 __be32 bth[3];
99 union {
100 struct {
101 __be32 deth[2];
102 __be32 imm_data;
103 } ud;
104 struct {
105 struct ib_reth reth;
106 __be32 imm_data;
107 } rc;
108 struct {
109 __be32 aeth;
110 __be64 atomic_ack_eth;
111 } at;
112 __be32 imm_data;
113 __be32 aeth;
114 struct ib_atomic_eth atomic_eth;
115 } u;
116} __attribute__ ((packed));
117
118/*
119 * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
120 * long (72 w/ imm_data). Only the first 56 bytes of the IB header
121 * will be in the eager header buffer. The remaining 12 or 16 bytes
122 * are in the data buffer.
123 */
124struct ipath_ib_header {
125 __be16 lrh[4];
126 union {
127 struct {
128 struct ib_grh grh;
129 struct ipath_other_headers oth;
130 } l;
131 struct ipath_other_headers oth;
132 } u;
133} __attribute__ ((packed));
134
135/*
136 * There is one struct ipath_mcast for each multicast GID.
137 * All attached QPs are then stored as a list of
138 * struct ipath_mcast_qp.
139 */
140struct ipath_mcast_qp {
141 struct list_head list;
142 struct ipath_qp *qp;
143};
144
145struct ipath_mcast {
146 struct rb_node rb_node;
147 union ib_gid mgid;
148 struct list_head qp_list;
149 wait_queue_head_t wait;
150 atomic_t refcount;
151};
152
153/* Memory region */
154struct ipath_mr {
155 struct ib_mr ibmr;
156 struct ipath_mregion mr; /* must be last */
157};
158
159/* Fast memory region */
160struct ipath_fmr {
161 struct ib_fmr ibfmr;
162 u8 page_shift;
163 struct ipath_mregion mr; /* must be last */
164};
165
166/* Protection domain */
167struct ipath_pd {
168 struct ib_pd ibpd;
169 int user; /* non-zero if created from user space */
170};
171
172/* Address Handle */
173struct ipath_ah {
174 struct ib_ah ibah;
175 struct ib_ah_attr attr;
176};
177
178/*
179 * Quick description of our CQ/QP locking scheme:
180 *
181 * We have one global lock that protects dev->cq/qp_table. Each
182 * struct ipath_cq/qp also has its own lock. An individual qp lock
183 * may be taken inside of an individual cq lock. Both cqs attached to
184 * a qp may be locked, with the send cq locked first. No other
185 * nesting should be done.
186 *
187 * Each struct ipath_cq/qp also has an atomic_t ref count. The
188 * pointer from the cq/qp_table to the struct counts as one reference.
189 * This reference also is good for access through the consumer API, so
190 * modifying the CQ/QP etc doesn't need to take another reference.
191 * Access because of a completion being polled does need a reference.
192 *
193 * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the
194 * destroy function to sleep on.
195 *
196 * This means that access from the consumer API requires nothing but
197 * taking the struct's lock.
198 *
199 * Access because of a completion event should go as follows:
200 * - lock cq/qp_table and look up struct
201 * - increment ref count in struct
202 * - drop cq/qp_table lock
203 * - lock struct, do your thing, and unlock struct
204 * - decrement ref count; if zero, wake up waiters
205 *
206 * To destroy a CQ/QP, we can do the following:
207 * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
208 * - decrement ref count
209 * - wait_event until ref count is zero
210 *
211 * It is the consumer's responsibilty to make sure that no QP
212 * operations (WQE posting or state modification) are pending when the
213 * QP is destroyed. Also, the consumer must make sure that calls to
214 * qp_modify are serialized.
215 *
216 * Possible optimizations (wait for profile data to see if/where we
217 * have locks bouncing between CPUs):
218 * - split cq/qp table lock into n separate (cache-aligned) locks,
219 * indexed (say) by the page in the table
220 */
221
222struct ipath_cq {
223 struct ib_cq ibcq;
224 struct tasklet_struct comptask;
225 spinlock_t lock;
226 u8 notify;
227 u8 triggered;
228 u32 head; /* new records added to the head */
229 u32 tail; /* poll_cq() reads from here. */
230 struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */
231};
232
233/*
234 * Send work request queue entry.
235 * The size of the sg_list is determined when the QP is created and stored
236 * in qp->s_max_sge.
237 */
238struct ipath_swqe {
239 struct ib_send_wr wr; /* don't use wr.sg_list */
240 u32 psn; /* first packet sequence number */
241 u32 lpsn; /* last packet sequence number */
242 u32 ssn; /* send sequence number */
243 u32 length; /* total length of data in sg_list */
244 struct ipath_sge sg_list[0];
245};
246
247/*
248 * Receive work request queue entry.
249 * The size of the sg_list is determined when the QP is created and stored
250 * in qp->r_max_sge.
251 */
252struct ipath_rwqe {
253 u64 wr_id;
254 u32 length; /* total length of data in sg_list */
255 u8 num_sge;
256 struct ipath_sge sg_list[0];
257};
258
259struct ipath_rq {
260 spinlock_t lock;
261 u32 head; /* new work requests posted to the head */
262 u32 tail; /* receives pull requests from here. */
263 u32 size; /* size of RWQE array */
264 u8 max_sge;
265 struct ipath_rwqe *wq; /* RWQE array */
266};
267
268struct ipath_srq {
269 struct ib_srq ibsrq;
270 struct ipath_rq rq;
271 /* send signal when number of RWQEs < limit */
272 u32 limit;
273};
274
275/*
276 * Variables prefixed with s_ are for the requester (sender).
277 * Variables prefixed with r_ are for the responder (receiver).
278 * Variables prefixed with ack_ are for responder replies.
279 *
280 * Common variables are protected by both r_rq.lock and s_lock in that order
281 * which only happens in modify_qp() or changing the QP 'state'.
282 */
283struct ipath_qp {
284 struct ib_qp ibqp;
285 struct ipath_qp *next; /* link list for QPN hash table */
286 struct list_head piowait; /* link for wait PIO buf */
287 struct list_head timerwait; /* link for waiting for timeouts */
288 struct ib_ah_attr remote_ah_attr;
289 struct ipath_ib_header s_hdr; /* next packet header to send */
290 atomic_t refcount;
291 wait_queue_head_t wait;
292 struct tasklet_struct s_task;
293 struct ipath_sge_state *s_cur_sge;
294 struct ipath_sge_state s_sge; /* current send request data */
295 /* current RDMA read send data */
296 struct ipath_sge_state s_rdma_sge;
297 struct ipath_sge_state r_sge; /* current receive data */
298 spinlock_t s_lock;
299 unsigned long s_flags;
300 u32 s_hdrwords; /* size of s_hdr in 32 bit words */
301 u32 s_cur_size; /* size of send packet in bytes */
302 u32 s_len; /* total length of s_sge */
303 u32 s_rdma_len; /* total length of s_rdma_sge */
304 u32 s_next_psn; /* PSN for next request */
305 u32 s_last_psn; /* last response PSN processed */
306 u32 s_psn; /* current packet sequence number */
307 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
308 u32 s_ack_psn; /* PSN for next ACK or RDMA_READ */
309 u64 s_ack_atomic; /* data for atomic ACK */
310 u64 r_wr_id; /* ID for current receive WQE */
311 u64 r_atomic_data; /* data for last atomic op */
312 u32 r_atomic_psn; /* PSN of last atomic op */
313 u32 r_len; /* total length of r_sge */
314 u32 r_rcv_len; /* receive data len processed */
315 u32 r_psn; /* expected rcv packet sequence number */
316 u8 state; /* QP state */
317 u8 s_state; /* opcode of last packet sent */
318 u8 s_ack_state; /* opcode of packet to ACK */
319 u8 s_nak_state; /* non-zero if NAK is pending */
320 u8 r_state; /* opcode of last packet received */
321 u8 r_reuse_sge; /* for UC receive errors */
322 u8 r_sge_inx; /* current index into sg_list */
323 u8 s_max_sge; /* size of s_wq->sg_list */
324 u8 qp_access_flags;
325 u8 s_retry_cnt; /* number of times to retry */
326 u8 s_rnr_retry_cnt;
327 u8 s_min_rnr_timer;
328 u8 s_retry; /* requester retry counter */
329 u8 s_rnr_retry; /* requester RNR retry counter */
330 u8 s_pkey_index; /* PKEY index to use */
331 enum ib_mtu path_mtu;
332 atomic_t msn; /* message sequence number */
333 u32 remote_qpn;
334 u32 qkey; /* QKEY for this QP (for UD or RD) */
335 u32 s_size; /* send work queue size */
336 u32 s_head; /* new entries added here */
337 u32 s_tail; /* next entry to process */
338 u32 s_cur; /* current work queue entry */
339 u32 s_last; /* last un-ACK'ed entry */
340 u32 s_ssn; /* SSN of tail entry */
341 u32 s_lsn; /* limit sequence number (credit) */
342 struct ipath_swqe *s_wq; /* send work queue */
343 struct ipath_rq r_rq; /* receive work queue */
344};
345
346/*
347 * Bit definitions for s_flags.
348 */
349#define IPATH_S_BUSY 0
350#define IPATH_S_SIGNAL_REQ_WR 1
351
352/*
353 * Since struct ipath_swqe is not a fixed size, we can't simply index into
354 * struct ipath_qp.s_wq. This function does the array index computation.
355 */
356static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
357 unsigned n)
358{
359 return (struct ipath_swqe *)((char *)qp->s_wq +
360 (sizeof(struct ipath_swqe) +
361 qp->s_max_sge *
362 sizeof(struct ipath_sge)) * n);
363}
364
365/*
366 * Since struct ipath_rwqe is not a fixed size, we can't simply index into
367 * struct ipath_rq.wq. This function does the array index computation.
368 */
369static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
370 unsigned n)
371{
372 return (struct ipath_rwqe *)
373 ((char *) rq->wq +
374 (sizeof(struct ipath_rwqe) +
375 rq->max_sge * sizeof(struct ipath_sge)) * n);
376}
377
378/*
379 * QPN-map pages start out as NULL, they get allocated upon
380 * first use and are never deallocated. This way,
381 * large bitmaps are not allocated unless large numbers of QPs are used.
382 */
383struct qpn_map {
384 atomic_t n_free;
385 void *page;
386};
387
388struct ipath_qp_table {
389 spinlock_t lock;
390 u32 last; /* last QP number allocated */
391 u32 max; /* size of the hash table */
392 u32 nmaps; /* size of the map table */
393 struct ipath_qp **table;
394 /* bit map of free numbers */
395 struct qpn_map map[QPNMAP_ENTRIES];
396};
397
398struct ipath_lkey_table {
399 spinlock_t lock;
400 u32 next; /* next unused index (speeds search) */
401 u32 gen; /* generation count */
402 u32 max; /* size of the table */
403 struct ipath_mregion **table;
404};
405
406struct ipath_opcode_stats {
407 u64 n_packets; /* number of packets */
408 u64 n_bytes; /* total number of bytes */
409};
410
411struct ipath_ibdev {
412 struct ib_device ibdev;
413 struct list_head dev_list;
414 struct ipath_devdata *dd;
415 int ib_unit; /* This is the device number */
416 u16 sm_lid; /* in host order */
417 u8 sm_sl;
418 u8 mkeyprot_resv_lmc;
419 /* non-zero when timer is set */
420 unsigned long mkey_lease_timeout;
421
422 /* The following fields are really per port. */
423 struct ipath_qp_table qp_table;
424 struct ipath_lkey_table lk_table;
425 struct list_head pending[3]; /* FIFO of QPs waiting for ACKs */
426 struct list_head piowait; /* list for wait PIO buf */
427 /* list of QPs waiting for RNR timer */
428 struct list_head rnrwait;
429 spinlock_t pending_lock;
430 __be64 sys_image_guid; /* in network order */
431 __be64 gid_prefix; /* in network order */
432 __be64 mkey;
433 u64 ipath_sword; /* total dwords sent (sample result) */
434 u64 ipath_rword; /* total dwords received (sample result) */
435 u64 ipath_spkts; /* total packets sent (sample result) */
436 u64 ipath_rpkts; /* total packets received (sample result) */
437 /* # of ticks no data sent (sample result) */
438 u64 ipath_xmit_wait;
439 u64 rcv_errors; /* # of packets with SW detected rcv errs */
440 u64 n_unicast_xmit; /* total unicast packets sent */
441 u64 n_unicast_rcv; /* total unicast packets received */
442 u64 n_multicast_xmit; /* total multicast packets sent */
443 u64 n_multicast_rcv; /* total multicast packets received */
444 u64 n_symbol_error_counter; /* starting count for PMA */
445 u64 n_link_error_recovery_counter; /* starting count for PMA */
446 u64 n_link_downed_counter; /* starting count for PMA */
447 u64 n_port_rcv_errors; /* starting count for PMA */
448 u64 n_port_rcv_remphys_errors; /* starting count for PMA */
449 u64 n_port_xmit_discards; /* starting count for PMA */
450 u64 n_port_xmit_data; /* starting count for PMA */
451 u64 n_port_rcv_data; /* starting count for PMA */
452 u64 n_port_xmit_packets; /* starting count for PMA */
453 u64 n_port_rcv_packets; /* starting count for PMA */
454 u32 n_pkey_violations; /* starting count for PMA */
455 u32 n_rc_resends;
456 u32 n_rc_acks;
457 u32 n_rc_qacks;
458 u32 n_seq_naks;
459 u32 n_rdma_seq;
460 u32 n_rnr_naks;
461 u32 n_other_naks;
462 u32 n_timeouts;
463 u32 n_pkt_drops;
464 u32 n_wqe_errs;
465 u32 n_rdma_dup_busy;
466 u32 n_piowait;
467 u32 n_no_piobuf;
468 u32 port_cap_flags;
469 u32 pma_sample_start;
470 u32 pma_sample_interval;
471 __be16 pma_counter_select[5];
472 u16 pma_tag;
473 u16 qkey_violations;
474 u16 mkey_violations;
475 u16 mkey_lease_period;
476 u16 pending_index; /* which pending queue is active */
477 u8 pma_sample_status;
478 u8 subnet_timeout;
479 u8 link_width_enabled;
480 u8 vl_high_limit;
481 struct ipath_opcode_stats opstats[128];
482};
483
484struct ipath_ucontext {
485 struct ib_ucontext ibucontext;
486};
487
488static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
489{
490 return container_of(ibmr, struct ipath_mr, ibmr);
491}
492
493static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
494{
495 return container_of(ibfmr, struct ipath_fmr, ibfmr);
496}
497
498static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
499{
500 return container_of(ibpd, struct ipath_pd, ibpd);
501}
502
503static inline struct ipath_ah *to_iah(struct ib_ah *ibah)
504{
505 return container_of(ibah, struct ipath_ah, ibah);
506}
507
508static inline struct ipath_cq *to_icq(struct ib_cq *ibcq)
509{
510 return container_of(ibcq, struct ipath_cq, ibcq);
511}
512
513static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq)
514{
515 return container_of(ibsrq, struct ipath_srq, ibsrq);
516}
517
518static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp)
519{
520 return container_of(ibqp, struct ipath_qp, ibqp);
521}
522
523static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
524{
525 return container_of(ibdev, struct ipath_ibdev, ibdev);
526}
527
528int ipath_process_mad(struct ib_device *ibdev,
529 int mad_flags,
530 u8 port_num,
531 struct ib_wc *in_wc,
532 struct ib_grh *in_grh,
533 struct ib_mad *in_mad, struct ib_mad *out_mad);
534
535static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
536 *ibucontext)
537{
538 return container_of(ibucontext, struct ipath_ucontext, ibucontext);
539}
540
541/*
542 * Compare the lower 24 bits of the two values.
543 * Returns an integer <, ==, or > than zero.
544 */
545static inline int ipath_cmp24(u32 a, u32 b)
546{
547 return (((int) a) - ((int) b)) << 8;
548}
549
550struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
551
552int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
553
554int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
555
556int ipath_mcast_tree_empty(void);
557
558__be32 ipath_compute_aeth(struct ipath_qp *qp);
559
560struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn);
561
562struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
563 struct ib_qp_init_attr *init_attr,
564 struct ib_udata *udata);
565
566int ipath_destroy_qp(struct ib_qp *ibqp);
567
568int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
569 int attr_mask);
570
571int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
572 int attr_mask, struct ib_qp_init_attr *init_attr);
573
574void ipath_free_all_qps(struct ipath_qp_table *qpt);
575
576int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
577
578void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
579
580void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
581
582void ipath_do_rc_send(unsigned long data);
583
584void ipath_do_uc_send(unsigned long data);
585
586void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
587
588int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
589 u32 len, u64 vaddr, u32 rkey, int acc);
590
591int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
592 struct ib_sge *sge, int acc);
593
594void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
595
596void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
597
598int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr);
599
600void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
601 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
602
603void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
604 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
605
606void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc);
607
608int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
609
610void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
611 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
612
613int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
614 struct ipath_mregion *mr);
615
616void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
617
618int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
619 struct ib_sge *sge, int acc);
620
621int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
622 u32 len, u64 vaddr, u32 rkey, int acc);
623
624int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
625 struct ib_recv_wr **bad_wr);
626
627struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
628 struct ib_srq_init_attr *srq_init_attr,
629 struct ib_udata *udata);
630
631int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
632 enum ib_srq_attr_mask attr_mask);
633
634int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
635
636int ipath_destroy_srq(struct ib_srq *ibsrq);
637
638void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
639
640int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
641
642struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
643 struct ib_ucontext *context,
644 struct ib_udata *udata);
645
646int ipath_destroy_cq(struct ib_cq *ibcq);
647
648int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
649
650int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
651
652struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
653
654struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
655 struct ib_phys_buf *buffer_list,
656 int num_phys_buf, int acc, u64 *iova_start);
657
658struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
659 int mr_access_flags,
660 struct ib_udata *udata);
661
662int ipath_dereg_mr(struct ib_mr *ibmr);
663
664struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
665 struct ib_fmr_attr *fmr_attr);
666
667int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
668 int list_len, u64 iova);
669
670int ipath_unmap_fmr(struct list_head *fmr_list);
671
672int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
673
674void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
675
676void ipath_insert_rnr_queue(struct ipath_qp *qp);
677
678int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
679
680void ipath_ruc_loopback(struct ipath_qp *sqp, struct ib_wc *wc);
681
682extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
683
684extern const u8 ipath_cvt_physportstate[];
685
686extern const int ib_ipath_state_ops[];
687
688extern unsigned int ib_ipath_lkey_table_size;
689
690extern const u32 ib_ipath_rnr_table[];
691
692#endif /* IPATH_VERBS_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
new file mode 100644
index 000000000000..10b31d2c4f20
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -0,0 +1,333 @@
1/*
2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/list.h>
34#include <linux/rcupdate.h>
35
36#include "ipath_verbs.h"
37
38/*
39 * Global table of GID to attached QPs.
40 * The table is global to all ipath devices since a send from one QP/device
41 * needs to be locally routed to any locally attached QPs on the same
42 * or different device.
43 */
44static struct rb_root mcast_tree;
45static DEFINE_SPINLOCK(mcast_lock);
46
47/**
48 * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
49 * @qp: the QP to link
50 */
51static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
52{
53 struct ipath_mcast_qp *mqp;
54
55 mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
56 if (!mqp)
57 goto bail;
58
59 mqp->qp = qp;
60 atomic_inc(&qp->refcount);
61
62bail:
63 return mqp;
64}
65
66static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
67{
68 struct ipath_qp *qp = mqp->qp;
69
70 /* Notify ipath_destroy_qp() if it is waiting. */
71 if (atomic_dec_and_test(&qp->refcount))
72 wake_up(&qp->wait);
73
74 kfree(mqp);
75}
76
77/**
78 * ipath_mcast_alloc - allocate the multicast GID structure
79 * @mgid: the multicast GID
80 *
81 * A list of QPs will be attached to this structure.
82 */
83static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
84{
85 struct ipath_mcast *mcast;
86
87 mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
88 if (!mcast)
89 goto bail;
90
91 mcast->mgid = *mgid;
92 INIT_LIST_HEAD(&mcast->qp_list);
93 init_waitqueue_head(&mcast->wait);
94 atomic_set(&mcast->refcount, 0);
95
96bail:
97 return mcast;
98}
99
100static void ipath_mcast_free(struct ipath_mcast *mcast)
101{
102 struct ipath_mcast_qp *p, *tmp;
103
104 list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
105 ipath_mcast_qp_free(p);
106
107 kfree(mcast);
108}
109
110/**
111 * ipath_mcast_find - search the global table for the given multicast GID
112 * @mgid: the multicast GID to search for
113 *
114 * Returns NULL if not found.
115 *
116 * The caller is responsible for decrementing the reference count if found.
117 */
118struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
119{
120 struct rb_node *n;
121 unsigned long flags;
122 struct ipath_mcast *mcast;
123
124 spin_lock_irqsave(&mcast_lock, flags);
125 n = mcast_tree.rb_node;
126 while (n) {
127 int ret;
128
129 mcast = rb_entry(n, struct ipath_mcast, rb_node);
130
131 ret = memcmp(mgid->raw, mcast->mgid.raw,
132 sizeof(union ib_gid));
133 if (ret < 0)
134 n = n->rb_left;
135 else if (ret > 0)
136 n = n->rb_right;
137 else {
138 atomic_inc(&mcast->refcount);
139 spin_unlock_irqrestore(&mcast_lock, flags);
140 goto bail;
141 }
142 }
143 spin_unlock_irqrestore(&mcast_lock, flags);
144
145 mcast = NULL;
146
147bail:
148 return mcast;
149}
150
151/**
152 * ipath_mcast_add - insert mcast GID into table and attach QP struct
153 * @mcast: the mcast GID table
154 * @mqp: the QP to attach
155 *
156 * Return zero if both were added. Return EEXIST if the GID was already in
157 * the table but the QP was added. Return ESRCH if the QP was already
158 * attached and neither structure was added.
159 */
160static int ipath_mcast_add(struct ipath_mcast *mcast,
161 struct ipath_mcast_qp *mqp)
162{
163 struct rb_node **n = &mcast_tree.rb_node;
164 struct rb_node *pn = NULL;
165 unsigned long flags;
166 int ret;
167
168 spin_lock_irqsave(&mcast_lock, flags);
169
170 while (*n) {
171 struct ipath_mcast *tmcast;
172 struct ipath_mcast_qp *p;
173
174 pn = *n;
175 tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
176
177 ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
178 sizeof(union ib_gid));
179 if (ret < 0) {
180 n = &pn->rb_left;
181 continue;
182 }
183 if (ret > 0) {
184 n = &pn->rb_right;
185 continue;
186 }
187
188 /* Search the QP list to see if this is already there. */
189 list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
190 if (p->qp == mqp->qp) {
191 spin_unlock_irqrestore(&mcast_lock, flags);
192 ret = ESRCH;
193 goto bail;
194 }
195 }
196 list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
197 spin_unlock_irqrestore(&mcast_lock, flags);
198 ret = EEXIST;
199 goto bail;
200 }
201
202 list_add_tail_rcu(&mqp->list, &mcast->qp_list);
203
204 atomic_inc(&mcast->refcount);
205 rb_link_node(&mcast->rb_node, pn, n);
206 rb_insert_color(&mcast->rb_node, &mcast_tree);
207
208 spin_unlock_irqrestore(&mcast_lock, flags);
209
210 ret = 0;
211
212bail:
213 return ret;
214}
215
216int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
217{
218 struct ipath_qp *qp = to_iqp(ibqp);
219 struct ipath_mcast *mcast;
220 struct ipath_mcast_qp *mqp;
221 int ret;
222
223 /*
224 * Allocate data structures since its better to do this outside of
225 * spin locks and it will most likely be needed.
226 */
227 mcast = ipath_mcast_alloc(gid);
228 if (mcast == NULL) {
229 ret = -ENOMEM;
230 goto bail;
231 }
232 mqp = ipath_mcast_qp_alloc(qp);
233 if (mqp == NULL) {
234 ipath_mcast_free(mcast);
235 ret = -ENOMEM;
236 goto bail;
237 }
238 switch (ipath_mcast_add(mcast, mqp)) {
239 case ESRCH:
240 /* Neither was used: can't attach the same QP twice. */
241 ipath_mcast_qp_free(mqp);
242 ipath_mcast_free(mcast);
243 ret = -EINVAL;
244 goto bail;
245 case EEXIST: /* The mcast wasn't used */
246 ipath_mcast_free(mcast);
247 break;
248 default:
249 break;
250 }
251
252 ret = 0;
253
254bail:
255 return ret;
256}
257
258int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
259{
260 struct ipath_qp *qp = to_iqp(ibqp);
261 struct ipath_mcast *mcast = NULL;
262 struct ipath_mcast_qp *p, *tmp;
263 struct rb_node *n;
264 unsigned long flags;
265 int last = 0;
266 int ret;
267
268 spin_lock_irqsave(&mcast_lock, flags);
269
270 /* Find the GID in the mcast table. */
271 n = mcast_tree.rb_node;
272 while (1) {
273 if (n == NULL) {
274 spin_unlock_irqrestore(&mcast_lock, flags);
275 ret = 0;
276 goto bail;
277 }
278
279 mcast = rb_entry(n, struct ipath_mcast, rb_node);
280 ret = memcmp(gid->raw, mcast->mgid.raw,
281 sizeof(union ib_gid));
282 if (ret < 0)
283 n = n->rb_left;
284 else if (ret > 0)
285 n = n->rb_right;
286 else
287 break;
288 }
289
290 /* Search the QP list. */
291 list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
292 if (p->qp != qp)
293 continue;
294 /*
295 * We found it, so remove it, but don't poison the forward
296 * link until we are sure there are no list walkers.
297 */
298 list_del_rcu(&p->list);
299
300 /* If this was the last attached QP, remove the GID too. */
301 if (list_empty(&mcast->qp_list)) {
302 rb_erase(&mcast->rb_node, &mcast_tree);
303 last = 1;
304 }
305 break;
306 }
307
308 spin_unlock_irqrestore(&mcast_lock, flags);
309
310 if (p) {
311 /*
312 * Wait for any list walkers to finish before freeing the
313 * list element.
314 */
315 wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
316 ipath_mcast_qp_free(p);
317 }
318 if (last) {
319 atomic_dec(&mcast->refcount);
320 wait_event(mcast->wait, !atomic_read(&mcast->refcount));
321 ipath_mcast_free(mcast);
322 }
323
324 ret = 0;
325
326bail:
327 return ret;
328}
329
330int ipath_mcast_tree_empty(void)
331{
332 return mcast_tree.rb_node == NULL;
333}
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
new file mode 100644
index 000000000000..adc5322f15c1
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -0,0 +1,157 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33/*
34 * This file is conditionally built on x86_64 only. Otherwise weak symbol
35 * versions of the functions exported from here are used.
36 */
37
38#include <linux/pci.h>
39#include <asm/mtrr.h>
40#include <asm/processor.h>
41
42#include "ipath_kernel.h"
43
44/**
45 * ipath_enable_wc - enable write combining for MMIO writes to the device
46 * @dd: infinipath device
47 *
48 * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
49 * write combining.
50 */
51int ipath_enable_wc(struct ipath_devdata *dd)
52{
53 int ret = 0;
54 u64 pioaddr, piolen;
55 unsigned bits;
56 const unsigned long addr = pci_resource_start(dd->pcidev, 0);
57 const size_t len = pci_resource_len(dd->pcidev, 0);
58
59 /*
60 * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
61 * chip. Linux (possibly the hardware) requires it to be on a power
62 * of 2 address matching the length (which has to be a power of 2).
63 * For rev1, that means the base address, for rev2, it will be just
64 * the PIO buffers themselves.
65 */
66 pioaddr = addr + dd->ipath_piobufbase;
67 piolen = (dd->ipath_piobcnt2k +
68 dd->ipath_piobcnt4k) *
69 ALIGN(dd->ipath_piobcnt2k +
70 dd->ipath_piobcnt4k, dd->ipath_palign);
71
72 for (bits = 0; !(piolen & (1ULL << bits)); bits++)
73 /* do nothing */ ;
74
75 if (piolen != (1ULL << bits)) {
76 piolen >>= bits;
77 while (piolen >>= 1)
78 bits++;
79 piolen = 1ULL << (bits + 1);
80 }
81 if (pioaddr & (piolen - 1)) {
82 u64 atmp;
83 ipath_dbg("pioaddr %llx not on right boundary for size "
84 "%llx, fixing\n",
85 (unsigned long long) pioaddr,
86 (unsigned long long) piolen);
87 atmp = pioaddr & ~(piolen - 1);
88 if (atmp < addr || (atmp + piolen) > (addr + len)) {
89 ipath_dev_err(dd, "No way to align address/size "
90 "(%llx/%llx), no WC mtrr\n",
91 (unsigned long long) atmp,
92 (unsigned long long) piolen << 1);
93 ret = -ENODEV;
94 } else {
95 ipath_dbg("changing WC base from %llx to %llx, "
96 "len from %llx to %llx\n",
97 (unsigned long long) pioaddr,
98 (unsigned long long) atmp,
99 (unsigned long long) piolen,
100 (unsigned long long) piolen << 1);
101 pioaddr = atmp;
102 piolen <<= 1;
103 }
104 }
105
106 if (!ret) {
107 int cookie;
108 ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC "
109 "(addr %llx, len=0x%llx)\n",
110 (unsigned long long) pioaddr,
111 (unsigned long long) piolen);
112 cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
113 if (cookie < 0) {
114 {
115 dev_info(&dd->pcidev->dev,
116 "mtrr_add() WC for PIO bufs "
117 "failed (%d)\n",
118 cookie);
119 ret = -EINVAL;
120 }
121 } else {
122 ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
123 "cookie is %d\n", cookie);
124 dd->ipath_wc_cookie = cookie;
125 }
126 }
127
128 return ret;
129}
130
131/**
132 * ipath_disable_wc - disable write combining for MMIO writes to the device
133 * @dd: infinipath device
134 */
135void ipath_disable_wc(struct ipath_devdata *dd)
136{
137 if (dd->ipath_wc_cookie) {
138 ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
139 mtrr_del(dd->ipath_wc_cookie, 0, 0);
140 dd->ipath_wc_cookie = 0;
141 }
142}
143
144/**
145 * ipath_unordered_wc - indicate whether write combining is ordered
146 *
147 * Because our performance depends on our ability to do write combining mmio
148 * writes in the most efficient way, we need to know if we are on an Intel
149 * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in
150 * the order completed, and so no special flushing is required to get
151 * correct ordering. Intel processors, however, will flush write buffers
152 * out in "random" orders, and so explicit ordering is needed at times.
153 */
154int ipath_unordered_wc(void)
155{
156 return boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
157}
diff --git a/drivers/infiniband/hw/ipath/ips_common.h b/drivers/infiniband/hw/ipath/ips_common.h
new file mode 100644
index 000000000000..410a764dfcef
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ips_common.h
@@ -0,0 +1,263 @@
1#ifndef IPS_COMMON_H
2#define IPS_COMMON_H
3/*
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include "ipath_common.h"
36
37struct ipath_header {
38 /*
39 * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
40 * 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
41 * Port 3, TID 11, offset 14.
42 */
43 __le32 ver_port_tid_offset;
44 __le16 chksum;
45 __le16 pkt_flags;
46};
47
48struct ips_message_header {
49 __be16 lrh[4];
50 __be32 bth[3];
51 /* fields below this point are in host byte order */
52 struct ipath_header iph;
53 __u8 sub_opcode;
54 __u8 flags;
55 __u16 src_rank;
56 /* 24 bits. The upper 8 bit is available for other use */
57 union {
58 struct {
59 unsigned ack_seq_num:24;
60 unsigned port:4;
61 unsigned unused:4;
62 };
63 __u32 ack_seq_num_org;
64 };
65 __u8 expected_tid_session_id;
66 __u8 tinylen; /* to aid MPI */
67 union {
68 __u16 tag; /* to aid MPI */
69 __u16 mqhdr; /* for PSM MQ */
70 };
71 union {
72 __u32 mpi[4]; /* to aid MPI */
73 __u32 data[4];
74 __u64 mq[2]; /* for PSM MQ */
75 struct {
76 __u16 mtu;
77 __u8 major_ver;
78 __u8 minor_ver;
79 __u32 not_used; //free
80 __u32 run_id;
81 __u32 client_ver;
82 };
83 };
84};
85
86struct ether_header {
87 __be16 lrh[4];
88 __be32 bth[3];
89 struct ipath_header iph;
90 __u8 sub_opcode;
91 __u8 cmd;
92 __be16 lid;
93 __u16 mac[3];
94 __u8 frag_num;
95 __u8 seq_num;
96 __le32 len;
97 /* MUST be of word size due to PIO write requirements */
98 __u32 csum;
99 __le16 csum_offset;
100 __le16 flags;
101 __u16 first_2_bytes;
102 __u8 unused[2]; /* currently unused */
103};
104
105/*
106 * The PIO buffer used for sending infinipath messages must only be written
107 * in 32-bit words, all the data must be written, and no writes can occur
108 * after the last word is written (which transfers "ownership" of the buffer
109 * to the chip and triggers the message to be sent).
110 * Since the Linux sk_buff structure can be recursive, non-aligned, and
111 * any number of bytes in each segment, we use the following structure
112 * to keep information about the overall state of the copy operation.
113 * This is used to save the information needed to store the checksum
114 * in the right place before sending the last word to the hardware and
115 * to buffer the last 0-3 bytes of non-word sized segments.
116 */
117struct copy_data_s {
118 struct ether_header *hdr;
119 /* addr of PIO buf to write csum to */
120 __u32 __iomem *csum_pio;
121 __u32 __iomem *to; /* addr of PIO buf to write data to */
122 __u32 device; /* which device to allocate PIO bufs from */
123 __s32 error; /* set if there is an error. */
124 __s32 extra; /* amount of data saved in u.buf below */
125 __u32 len; /* total length to send in bytes */
126 __u32 flen; /* frament length in words */
127 __u32 csum; /* partial IP checksum */
128 __u32 pos; /* position for partial checksum */
129 __u32 offset; /* offset to where data currently starts */
130 __s32 checksum_calc; /* set to 1 when csum has been calculated */
131 struct sk_buff *skb;
132 union {
133 __u32 w;
134 __u8 buf[4];
135 } u;
136};
137
138/* IB - LRH header consts */
139#define IPS_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
140#define IPS_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
141
142#define IPS_OFFSET 0
143
144/*
145 * defines the cut-off point between the header queue and eager/expected
146 * TID queue
147 */
148#define NUM_OF_EXTRA_WORDS_IN_HEADER_QUEUE \
149 ((sizeof(struct ips_message_header) - \
150 offsetof(struct ips_message_header, iph)) >> 2)
151
152/* OpCodes */
153#define OPCODE_IPS 0xC0
154#define OPCODE_ITH4X 0xC1
155
156/* OpCode 30 is use by stand-alone test programs */
157#define OPCODE_RAW_DATA 0xDE
158/* last OpCode (31) is reserved for test */
159#define OPCODE_TEST 0xDF
160
161/* sub OpCodes - ips */
162#define OPCODE_SEQ_DATA 0x01
163#define OPCODE_SEQ_CTRL 0x02
164
165#define OPCODE_SEQ_MQ_DATA 0x03
166#define OPCODE_SEQ_MQ_CTRL 0x04
167
168#define OPCODE_ACK 0x10
169#define OPCODE_NAK 0x11
170
171#define OPCODE_ERR_CHK 0x20
172#define OPCODE_ERR_CHK_PLS 0x21
173
174#define OPCODE_STARTUP 0x30
175#define OPCODE_STARTUP_ACK 0x31
176#define OPCODE_STARTUP_NAK 0x32
177
178#define OPCODE_STARTUP_EXT 0x34
179#define OPCODE_STARTUP_ACK_EXT 0x35
180#define OPCODE_STARTUP_NAK_EXT 0x36
181
182#define OPCODE_TIDS_RELEASE 0x40
183#define OPCODE_TIDS_RELEASE_CONFIRM 0x41
184
185#define OPCODE_CLOSE 0x50
186#define OPCODE_CLOSE_ACK 0x51
187/*
188 * like OPCODE_CLOSE, but no complaint if other side has already closed.
189 * Used when doing abort(), MPI_Abort(), etc.
190 */
191#define OPCODE_ABORT 0x52
192
193/* sub OpCodes - ith4x */
194#define OPCODE_ENCAP 0x81
195#define OPCODE_LID_ARP 0x82
196
197/* Receive Header Queue: receive type (from infinipath) */
198#define RCVHQ_RCV_TYPE_EXPECTED 0
199#define RCVHQ_RCV_TYPE_EAGER 1
200#define RCVHQ_RCV_TYPE_NON_KD 2
201#define RCVHQ_RCV_TYPE_ERROR 3
202
203/* misc. */
204#define SIZE_OF_CRC 1
205
206#define EAGER_TID_ID INFINIPATH_I_TID_MASK
207
208#define IPS_DEFAULT_P_KEY 0xFFFF
209
210#define IPS_PERMISSIVE_LID 0xFFFF
211#define IPS_MULTICAST_LID_BASE 0xC000
212
213#define IPS_AETH_CREDIT_SHIFT 24
214#define IPS_AETH_CREDIT_MASK 0x1F
215#define IPS_AETH_CREDIT_INVAL 0x1F
216
217#define IPS_PSN_MASK 0xFFFFFF
218#define IPS_MSN_MASK 0xFFFFFF
219#define IPS_QPN_MASK 0xFFFFFF
220#define IPS_MULTICAST_QPN 0xFFFFFF
221
222/* functions for extracting fields from rcvhdrq entries */
223static inline __u32 ips_get_hdr_err_flags(const __le32 * rbuf)
224{
225 return __le32_to_cpu(rbuf[1]);
226}
227
228static inline __u32 ips_get_index(const __le32 * rbuf)
229{
230 return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
231 & INFINIPATH_RHF_EGRINDEX_MASK;
232}
233
234static inline __u32 ips_get_rcv_type(const __le32 * rbuf)
235{
236 return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
237 & INFINIPATH_RHF_RCVTYPE_MASK;
238}
239
240static inline __u32 ips_get_length_in_bytes(const __le32 * rbuf)
241{
242 return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
243 & INFINIPATH_RHF_LENGTH_MASK) << 2;
244}
245
246static inline void *ips_get_first_protocol_header(const __u32 * rbuf)
247{
248 return (void *)&rbuf[2];
249}
250
251static inline struct ips_message_header *ips_get_ips_header(const __u32 *
252 rbuf)
253{
254 return (struct ips_message_header *)&rbuf[2];
255}
256
257static inline __u32 ips_get_ipath_ver(__le32 hdrword)
258{
259 return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
260 & INFINIPATH_I_VERS_MASK;
261}
262
263#endif /* IPS_COMMON_H */
diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h
new file mode 100644
index 000000000000..40d693cf3f94
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/verbs_debug.h
@@ -0,0 +1,107 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef _VERBS_DEBUG_H
34#define _VERBS_DEBUG_H
35
36/*
37 * This file contains tracing code for the ib_ipath kernel module.
38 */
39#ifndef _VERBS_DEBUGGING /* tracing enabled or not */
40#define _VERBS_DEBUGGING 1
41#endif
42
43extern unsigned ib_ipath_debug;
44
45#define _VERBS_ERROR(fmt,...) \
46 do { \
47 printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
48 } while(0)
49
50#define _VERBS_UNIT_ERROR(unit,fmt,...) \
51 do { \
52 printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
53 } while(0)
54
55#if _VERBS_DEBUGGING
56
57/*
58 * Mask values for debugging. The scheme allows us to compile out any
59 * of the debug tracing stuff, and if compiled in, to enable or
60 * disable dynamically.
61 * This can be set at modprobe time also:
62 * modprobe ib_path ib_ipath_debug=3
63 */
64
65#define __VERBS_INFO 0x1 /* generic low verbosity stuff */
66#define __VERBS_DBG 0x2 /* generic debug */
67#define __VERBS_VDBG 0x4 /* verbose debug */
68#define __VERBS_SMADBG 0x8000 /* sma packet debug */
69
70#define _VERBS_INFO(fmt,...) \
71 do { \
72 if (unlikely(ib_ipath_debug&__VERBS_INFO)) \
73 printk(KERN_INFO "%s: " fmt,"ib_ipath", \
74 ##__VA_ARGS__); \
75 } while(0)
76
77#define _VERBS_DBG(fmt,...) \
78 do { \
79 if (unlikely(ib_ipath_debug&__VERBS_DBG)) \
80 printk(KERN_DEBUG "%s: " fmt, __func__, \
81 ##__VA_ARGS__); \
82 } while(0)
83
84#define _VERBS_VDBG(fmt,...) \
85 do { \
86 if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \
87 printk(KERN_DEBUG "%s: " fmt, __func__, \
88 ##__VA_ARGS__); \
89 } while(0)
90
91#define _VERBS_SMADBG(fmt,...) \
92 do { \
93 if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \
94 printk(KERN_DEBUG "%s: " fmt, __func__, \
95 ##__VA_ARGS__); \
96 } while(0)
97
98#else /* ! _VERBS_DEBUGGING */
99
100#define _VERBS_INFO(fmt,...)
101#define _VERBS_DBG(fmt,...)
102#define _VERBS_VDBG(fmt,...)
103#define _VERBS_SMADBG(fmt,...)
104
105#endif /* _VERBS_DEBUGGING */
106
107#endif /* _VERBS_DEBUG_H */
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
index e88be85b3d5c..9aa5a4468a75 100644
--- a/drivers/infiniband/hw/mthca/Kconfig
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -7,10 +7,11 @@ config INFINIBAND_MTHCA
7 ("Tavor") and the MT25208 PCI Express HCA ("Arbel"). 7 ("Tavor") and the MT25208 PCI Express HCA ("Arbel").
8 8
9config INFINIBAND_MTHCA_DEBUG 9config INFINIBAND_MTHCA_DEBUG
10 bool "Verbose debugging output" 10 bool "Verbose debugging output" if EMBEDDED
11 depends on INFINIBAND_MTHCA 11 depends on INFINIBAND_MTHCA
12 default n 12 default y
13 ---help--- 13 ---help---
14 This option causes the mthca driver produce a bunch of debug 14 This option causes debugging code to be compiled into the
15 messages. Select this is you are developing the driver or 15 mthca driver. The output can be turned on via the
16 trying to diagnose a problem. 16 debug_level module parameter (which can also be set after
17 the driver is loaded through sysfs).
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
index 47ec5a7cba0b..e388d95d0cf1 100644
--- a/drivers/infiniband/hw/mthca/Makefile
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -1,7 +1,3 @@
1ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
2EXTRA_CFLAGS += -DDEBUG
3endif
4
5obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o 1obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
6 2
7ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ 3ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index bc5bdcbe51b5..b12aa03be251 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -42,6 +42,20 @@
42 42
43#include "mthca_dev.h" 43#include "mthca_dev.h"
44 44
45enum {
46 MTHCA_RATE_TAVOR_FULL = 0,
47 MTHCA_RATE_TAVOR_1X = 1,
48 MTHCA_RATE_TAVOR_4X = 2,
49 MTHCA_RATE_TAVOR_1X_DDR = 3
50};
51
52enum {
53 MTHCA_RATE_MEMFREE_FULL = 0,
54 MTHCA_RATE_MEMFREE_QUARTER = 1,
55 MTHCA_RATE_MEMFREE_EIGHTH = 2,
56 MTHCA_RATE_MEMFREE_HALF = 3
57};
58
45struct mthca_av { 59struct mthca_av {
46 __be32 port_pd; 60 __be32 port_pd;
47 u8 reserved1; 61 u8 reserved1;
@@ -55,6 +69,90 @@ struct mthca_av {
55 __be32 dgid[4]; 69 __be32 dgid[4];
56}; 70};
57 71
72static enum ib_rate memfree_rate_to_ib(u8 mthca_rate, u8 port_rate)
73{
74 switch (mthca_rate) {
75 case MTHCA_RATE_MEMFREE_EIGHTH:
76 return mult_to_ib_rate(port_rate >> 3);
77 case MTHCA_RATE_MEMFREE_QUARTER:
78 return mult_to_ib_rate(port_rate >> 2);
79 case MTHCA_RATE_MEMFREE_HALF:
80 return mult_to_ib_rate(port_rate >> 1);
81 case MTHCA_RATE_MEMFREE_FULL:
82 default:
83 return mult_to_ib_rate(port_rate);
84 }
85}
86
87static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
88{
89 switch (mthca_rate) {
90 case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS;
91 case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
92 case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS;
93 default: return port_rate;
94 }
95}
96
97enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port)
98{
99 if (mthca_is_memfree(dev)) {
100 /* Handle old Arbel FW */
101 if (dev->limits.stat_rate_support == 0x3 && mthca_rate)
102 return IB_RATE_2_5_GBPS;
103
104 return memfree_rate_to_ib(mthca_rate, dev->rate[port - 1]);
105 } else
106 return tavor_rate_to_ib(mthca_rate, dev->rate[port - 1]);
107}
108
109static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate)
110{
111 if (cur_rate <= req_rate)
112 return 0;
113
114 /*
115 * Inter-packet delay (IPD) to get from rate X down to a rate
116 * no more than Y is (X - 1) / Y.
117 */
118 switch ((cur_rate - 1) / req_rate) {
119 case 0: return MTHCA_RATE_MEMFREE_FULL;
120 case 1: return MTHCA_RATE_MEMFREE_HALF;
121 case 2: /* fall through */
122 case 3: return MTHCA_RATE_MEMFREE_QUARTER;
123 default: return MTHCA_RATE_MEMFREE_EIGHTH;
124 }
125}
126
127static u8 ib_rate_to_tavor(u8 static_rate)
128{
129 switch (static_rate) {
130 case IB_RATE_2_5_GBPS: return MTHCA_RATE_TAVOR_1X;
131 case IB_RATE_5_GBPS: return MTHCA_RATE_TAVOR_1X_DDR;
132 case IB_RATE_10_GBPS: return MTHCA_RATE_TAVOR_4X;
133 default: return MTHCA_RATE_TAVOR_FULL;
134 }
135}
136
137u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
138{
139 u8 rate;
140
141 if (!static_rate || ib_rate_to_mult(static_rate) >= dev->rate[port - 1])
142 return 0;
143
144 if (mthca_is_memfree(dev))
145 rate = ib_rate_to_memfree(ib_rate_to_mult(static_rate),
146 dev->rate[port - 1]);
147 else
148 rate = ib_rate_to_tavor(static_rate);
149
150 if (!(dev->limits.stat_rate_support & (1 << rate)))
151 rate = 1;
152
153 return rate;
154}
155
58int mthca_create_ah(struct mthca_dev *dev, 156int mthca_create_ah(struct mthca_dev *dev,
59 struct mthca_pd *pd, 157 struct mthca_pd *pd,
60 struct ib_ah_attr *ah_attr, 158 struct ib_ah_attr *ah_attr,
@@ -107,7 +205,7 @@ on_hca_fail:
107 av->g_slid = ah_attr->src_path_bits; 205 av->g_slid = ah_attr->src_path_bits;
108 av->dlid = cpu_to_be16(ah_attr->dlid); 206 av->dlid = cpu_to_be16(ah_attr->dlid);
109 av->msg_sr = (3 << 4) | /* 2K message */ 207 av->msg_sr = (3 << 4) | /* 2K message */
110 ah_attr->static_rate; 208 mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
111 av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); 209 av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
112 if (ah_attr->ah_flags & IB_AH_GRH) { 210 if (ah_attr->ah_flags & IB_AH_GRH) {
113 av->g_slid |= 0x80; 211 av->g_slid |= 0x80;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 343eca507870..1985b5dfa481 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -965,6 +965,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
965 u32 *outbox; 965 u32 *outbox;
966 u8 field; 966 u8 field;
967 u16 size; 967 u16 size;
968 u16 stat_rate;
968 int err; 969 int err;
969 970
970#define QUERY_DEV_LIM_OUT_SIZE 0x100 971#define QUERY_DEV_LIM_OUT_SIZE 0x100
@@ -995,6 +996,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
995#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36 996#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36
996#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37 997#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37
997#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b 998#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b
999#define QUERY_DEV_LIM_RATE_SUPPORT_OFFSET 0x3c
998#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f 1000#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f
999#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44 1001#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44
1000#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48 1002#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48
@@ -1086,6 +1088,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
1086 dev_lim->num_ports = field & 0xf; 1088 dev_lim->num_ports = field & 0xf;
1087 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET); 1089 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET);
1088 dev_lim->max_gids = 1 << (field & 0xf); 1090 dev_lim->max_gids = 1 << (field & 0xf);
1091 MTHCA_GET(stat_rate, outbox, QUERY_DEV_LIM_RATE_SUPPORT_OFFSET);
1092 dev_lim->stat_rate_support = stat_rate;
1089 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET); 1093 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET);
1090 dev_lim->max_pkeys = 1 << (field & 0xf); 1094 dev_lim->max_pkeys = 1 << (field & 0xf);
1091 MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET); 1095 MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET);
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index e4ec35c40dd3..2f976f2051d6 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -146,6 +146,7 @@ struct mthca_dev_lim {
146 int max_vl; 146 int max_vl;
147 int num_ports; 147 int num_ports;
148 int max_gids; 148 int max_gids;
149 u16 stat_rate_support;
149 int max_pkeys; 150 int max_pkeys;
150 u32 flags; 151 u32 flags;
151 int reserved_uars; 152 int reserved_uars;
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index ad52edbefe98..4c1dcb4c1822 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -151,6 +151,7 @@ struct mthca_limits {
151 int reserved_qps; 151 int reserved_qps;
152 int num_srqs; 152 int num_srqs;
153 int max_srq_wqes; 153 int max_srq_wqes;
154 int max_srq_sge;
154 int reserved_srqs; 155 int reserved_srqs;
155 int num_eecs; 156 int num_eecs;
156 int reserved_eecs; 157 int reserved_eecs;
@@ -172,6 +173,7 @@ struct mthca_limits {
172 int reserved_pds; 173 int reserved_pds;
173 u32 page_size_cap; 174 u32 page_size_cap;
174 u32 flags; 175 u32 flags;
176 u16 stat_rate_support;
175 u8 port_width_cap; 177 u8 port_width_cap;
176}; 178};
177 179
@@ -353,10 +355,24 @@ struct mthca_dev {
353 struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2]; 355 struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2];
354 struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; 356 struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
355 spinlock_t sm_lock; 357 spinlock_t sm_lock;
358 u8 rate[MTHCA_MAX_PORTS];
356}; 359};
357 360
358#define mthca_dbg(mdev, format, arg...) \ 361#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
359 dev_dbg(&mdev->pdev->dev, format, ## arg) 362extern int mthca_debug_level;
363
364#define mthca_dbg(mdev, format, arg...) \
365 do { \
366 if (mthca_debug_level) \
367 dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
368 } while (0)
369
370#else /* CONFIG_INFINIBAND_MTHCA_DEBUG */
371
372#define mthca_dbg(mdev, format, arg...) do { (void) mdev; } while (0)
373
374#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
375
360#define mthca_err(mdev, format, arg...) \ 376#define mthca_err(mdev, format, arg...) \
361 dev_err(&mdev->pdev->dev, format, ## arg) 377 dev_err(&mdev->pdev->dev, format, ## arg)
362#define mthca_info(mdev, format, arg...) \ 378#define mthca_info(mdev, format, arg...) \
@@ -492,6 +508,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
492int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 508int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
493 enum ib_srq_attr_mask attr_mask); 509 enum ib_srq_attr_mask attr_mask);
494int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); 510int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
511int mthca_max_srq_sge(struct mthca_dev *dev);
495void mthca_srq_event(struct mthca_dev *dev, u32 srqn, 512void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
496 enum ib_event_type event_type); 513 enum ib_event_type event_type);
497void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); 514void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
@@ -542,6 +559,8 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
542 struct ib_ud_header *header); 559 struct ib_ud_header *header);
543int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr); 560int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
544int mthca_ah_grh_present(struct mthca_ah *ah); 561int mthca_ah_grh_present(struct mthca_ah *ah);
562u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
563enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
545 564
546int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); 565int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
547int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); 566int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index dfb482eac9a2..4730863ece9a 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -49,6 +49,30 @@ enum {
49 MTHCA_VENDOR_CLASS2 = 0xa 49 MTHCA_VENDOR_CLASS2 = 0xa
50}; 50};
51 51
52static int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
53{
54 struct ib_port_attr *tprops = NULL;
55 int ret;
56
57 tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
58 if (!tprops)
59 return -ENOMEM;
60
61 ret = ib_query_port(&dev->ib_dev, port_num, tprops);
62 if (ret) {
63 printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
64 ret, dev->ib_dev.name, port_num);
65 goto out;
66 }
67
68 dev->rate[port_num - 1] = tprops->active_speed *
69 ib_width_enum_to_int(tprops->active_width);
70
71out:
72 kfree(tprops);
73 return ret;
74}
75
52static void update_sm_ah(struct mthca_dev *dev, 76static void update_sm_ah(struct mthca_dev *dev,
53 u8 port_num, u16 lid, u8 sl) 77 u8 port_num, u16 lid, u8 sl)
54{ 78{
@@ -90,6 +114,7 @@ static void smp_snoop(struct ib_device *ibdev,
90 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && 114 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
91 mad->mad_hdr.method == IB_MGMT_METHOD_SET) { 115 mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
92 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { 116 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
117 mthca_update_rate(to_mdev(ibdev), port_num);
93 update_sm_ah(to_mdev(ibdev), port_num, 118 update_sm_ah(to_mdev(ibdev), port_num,
94 be16_to_cpup((__be16 *) (mad->data + 58)), 119 be16_to_cpup((__be16 *) (mad->data + 58)),
95 (*(u8 *) (mad->data + 76)) & 0xf); 120 (*(u8 *) (mad->data + 76)) & 0xf);
@@ -246,6 +271,7 @@ int mthca_create_agents(struct mthca_dev *dev)
246{ 271{
247 struct ib_mad_agent *agent; 272 struct ib_mad_agent *agent;
248 int p, q; 273 int p, q;
274 int ret;
249 275
250 spin_lock_init(&dev->sm_lock); 276 spin_lock_init(&dev->sm_lock);
251 277
@@ -255,11 +281,23 @@ int mthca_create_agents(struct mthca_dev *dev)
255 q ? IB_QPT_GSI : IB_QPT_SMI, 281 q ? IB_QPT_GSI : IB_QPT_SMI,
256 NULL, 0, send_handler, 282 NULL, 0, send_handler,
257 NULL, NULL); 283 NULL, NULL);
258 if (IS_ERR(agent)) 284 if (IS_ERR(agent)) {
285 ret = PTR_ERR(agent);
259 goto err; 286 goto err;
287 }
260 dev->send_agent[p][q] = agent; 288 dev->send_agent[p][q] = agent;
261 } 289 }
262 290
291
292 for (p = 1; p <= dev->limits.num_ports; ++p) {
293 ret = mthca_update_rate(dev, p);
294 if (ret) {
295 mthca_err(dev, "Failed to obtain port %d rate."
296 " aborting.\n", p);
297 goto err;
298 }
299 }
300
263 return 0; 301 return 0;
264 302
265err: 303err:
@@ -268,7 +306,7 @@ err:
268 if (dev->send_agent[p][q]) 306 if (dev->send_agent[p][q])
269 ib_unregister_mad_agent(dev->send_agent[p][q]); 307 ib_unregister_mad_agent(dev->send_agent[p][q]);
270 308
271 return PTR_ERR(agent); 309 return ret;
272} 310}
273 311
274void __devexit mthca_free_agents(struct mthca_dev *dev) 312void __devexit mthca_free_agents(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 266f347c6707..9b9ff7bff357 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -52,6 +52,14 @@ MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
52MODULE_LICENSE("Dual BSD/GPL"); 52MODULE_LICENSE("Dual BSD/GPL");
53MODULE_VERSION(DRV_VERSION); 53MODULE_VERSION(DRV_VERSION);
54 54
55#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
56
57int mthca_debug_level = 0;
58module_param_named(debug_level, mthca_debug_level, int, 0644);
59MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
60
61#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
62
55#ifdef CONFIG_PCI_MSI 63#ifdef CONFIG_PCI_MSI
56 64
57static int msi_x = 0; 65static int msi_x = 0;
@@ -69,6 +77,10 @@ MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
69 77
70#endif /* CONFIG_PCI_MSI */ 78#endif /* CONFIG_PCI_MSI */
71 79
80static int tune_pci = 0;
81module_param(tune_pci, int, 0444);
82MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
83
72static const char mthca_version[] __devinitdata = 84static const char mthca_version[] __devinitdata =
73 DRV_NAME ": Mellanox InfiniBand HCA driver v" 85 DRV_NAME ": Mellanox InfiniBand HCA driver v"
74 DRV_VERSION " (" DRV_RELDATE ")\n"; 86 DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -90,6 +102,9 @@ static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
90 int cap; 102 int cap;
91 u16 val; 103 u16 val;
92 104
105 if (!tune_pci)
106 return 0;
107
93 /* First try to max out Read Byte Count */ 108 /* First try to max out Read Byte Count */
94 cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX); 109 cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
95 if (cap) { 110 if (cap) {
@@ -176,6 +191,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
176 mdev->limits.reserved_srqs = dev_lim->reserved_srqs; 191 mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
177 mdev->limits.reserved_eecs = dev_lim->reserved_eecs; 192 mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
178 mdev->limits.max_desc_sz = dev_lim->max_desc_sz; 193 mdev->limits.max_desc_sz = dev_lim->max_desc_sz;
194 mdev->limits.max_srq_sge = mthca_max_srq_sge(mdev);
179 /* 195 /*
180 * Subtract 1 from the limit because we need to allocate a 196 * Subtract 1 from the limit because we need to allocate a
181 * spare CQE so the HCA HW can tell the difference between an 197 * spare CQE so the HCA HW can tell the difference between an
@@ -191,6 +207,18 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim
191 mdev->limits.port_width_cap = dev_lim->max_port_width; 207 mdev->limits.port_width_cap = dev_lim->max_port_width;
192 mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1); 208 mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1);
193 mdev->limits.flags = dev_lim->flags; 209 mdev->limits.flags = dev_lim->flags;
210 /*
211 * For old FW that doesn't return static rate support, use a
212 * value of 0x3 (only static rate values of 0 or 1 are handled),
213 * except on Sinai, where even old FW can handle static rate
214 * values of 2 and 3.
215 */
216 if (dev_lim->stat_rate_support)
217 mdev->limits.stat_rate_support = dev_lim->stat_rate_support;
218 else if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
219 mdev->limits.stat_rate_support = 0xf;
220 else
221 mdev->limits.stat_rate_support = 0x3;
194 222
195 /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. 223 /* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
196 May be doable since hardware supports it for SRQ. 224 May be doable since hardware supports it for SRQ.
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 2c250bc11c33..565a24b1756f 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -106,7 +106,7 @@ static int mthca_query_device(struct ib_device *ibdev,
106 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 106 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
107 props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; 107 props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
108 props->max_srq_wr = mdev->limits.max_srq_wqes; 108 props->max_srq_wr = mdev->limits.max_srq_wqes;
109 props->max_srq_sge = mdev->limits.max_sg; 109 props->max_srq_sge = mdev->limits.max_srq_sge;
110 props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay; 110 props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
111 props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? 111 props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
112 IB_ATOMIC_HCA : IB_ATOMIC_NONE; 112 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 2e7f52136965..6676a786d690 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -257,6 +257,8 @@ struct mthca_qp {
257 atomic_t refcount; 257 atomic_t refcount;
258 u32 qpn; 258 u32 qpn;
259 int is_direct; 259 int is_direct;
260 u8 port; /* for SQP and memfree use only */
261 u8 alt_port; /* for memfree use only */
260 u8 transport; 262 u8 transport;
261 u8 state; 263 u8 state;
262 u8 atomic_rd_en; 264 u8 atomic_rd_en;
@@ -278,7 +280,6 @@ struct mthca_qp {
278 280
279struct mthca_sqp { 281struct mthca_sqp {
280 struct mthca_qp qp; 282 struct mthca_qp qp;
281 int port;
282 int pkey_index; 283 int pkey_index;
283 u32 qkey; 284 u32 qkey;
284 u32 send_psn; 285 u32 send_psn;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 057c8e6af87b..f37b0e367323 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -248,6 +248,9 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
248 return; 248 return;
249 } 249 }
250 250
251 if (event_type == IB_EVENT_PATH_MIG)
252 qp->port = qp->alt_port;
253
251 event.device = &dev->ib_dev; 254 event.device = &dev->ib_dev;
252 event.event = event_type; 255 event.event = event_type;
253 event.element.qp = &qp->ibqp; 256 event.element.qp = &qp->ibqp;
@@ -392,10 +395,16 @@ static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
392{ 395{
393 memset(ib_ah_attr, 0, sizeof *path); 396 memset(ib_ah_attr, 0, sizeof *path);
394 ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3; 397 ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
398
399 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
400 return;
401
395 ib_ah_attr->dlid = be16_to_cpu(path->rlid); 402 ib_ah_attr->dlid = be16_to_cpu(path->rlid);
396 ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28; 403 ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
397 ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f; 404 ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
398 ib_ah_attr->static_rate = path->static_rate & 0x7; 405 ib_ah_attr->static_rate = mthca_rate_to_ib(dev,
406 path->static_rate & 0x7,
407 ib_ah_attr->port_num);
399 ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0; 408 ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
400 if (ib_ah_attr->ah_flags) { 409 if (ib_ah_attr->ah_flags) {
401 ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1); 410 ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
@@ -455,8 +464,10 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
455 qp_attr->cap.max_recv_sge = qp->rq.max_gs; 464 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
456 qp_attr->cap.max_inline_data = qp->max_inline_data; 465 qp_attr->cap.max_inline_data = qp->max_inline_data;
457 466
458 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); 467 if (qp->transport == RC || qp->transport == UC) {
459 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); 468 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
469 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
470 }
460 471
461 qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; 472 qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
462 qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f; 473 qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
@@ -484,11 +495,11 @@ out:
484} 495}
485 496
486static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, 497static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
487 struct mthca_qp_path *path) 498 struct mthca_qp_path *path, u8 port)
488{ 499{
489 path->g_mylmc = ah->src_path_bits & 0x7f; 500 path->g_mylmc = ah->src_path_bits & 0x7f;
490 path->rlid = cpu_to_be16(ah->dlid); 501 path->rlid = cpu_to_be16(ah->dlid);
491 path->static_rate = !!ah->static_rate; 502 path->static_rate = mthca_get_rate(dev, ah->static_rate, port);
492 503
493 if (ah->ah_flags & IB_AH_GRH) { 504 if (ah->ah_flags & IB_AH_GRH) {
494 if (ah->grh.sgid_index >= dev->limits.gid_table_len) { 505 if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
@@ -634,7 +645,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
634 645
635 if (qp->transport == MLX) 646 if (qp->transport == MLX)
636 qp_context->pri_path.port_pkey |= 647 qp_context->pri_path.port_pkey |=
637 cpu_to_be32(to_msqp(qp)->port << 24); 648 cpu_to_be32(qp->port << 24);
638 else { 649 else {
639 if (attr_mask & IB_QP_PORT) { 650 if (attr_mask & IB_QP_PORT) {
640 qp_context->pri_path.port_pkey |= 651 qp_context->pri_path.port_pkey |=
@@ -657,7 +668,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
657 } 668 }
658 669
659 if (attr_mask & IB_QP_AV) { 670 if (attr_mask & IB_QP_AV) {
660 if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path)) 671 if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,
672 attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
661 return -EINVAL; 673 return -EINVAL;
662 674
663 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); 675 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
@@ -681,7 +693,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
681 return -EINVAL; 693 return -EINVAL;
682 } 694 }
683 695
684 if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path)) 696 if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
697 attr->alt_ah_attr.port_num))
685 return -EINVAL; 698 return -EINVAL;
686 699
687 qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | 700 qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
@@ -791,6 +804,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
791 qp->atomic_rd_en = attr->qp_access_flags; 804 qp->atomic_rd_en = attr->qp_access_flags;
792 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 805 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
793 qp->resp_depth = attr->max_dest_rd_atomic; 806 qp->resp_depth = attr->max_dest_rd_atomic;
807 if (attr_mask & IB_QP_PORT)
808 qp->port = attr->port_num;
809 if (attr_mask & IB_QP_ALT_PATH)
810 qp->alt_port = attr->alt_port_num;
794 811
795 if (is_sqp(dev, qp)) 812 if (is_sqp(dev, qp))
796 store_attrs(to_msqp(qp), attr, attr_mask); 813 store_attrs(to_msqp(qp), attr, attr_mask);
@@ -802,13 +819,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
802 if (is_qp0(dev, qp)) { 819 if (is_qp0(dev, qp)) {
803 if (cur_state != IB_QPS_RTR && 820 if (cur_state != IB_QPS_RTR &&
804 new_state == IB_QPS_RTR) 821 new_state == IB_QPS_RTR)
805 init_port(dev, to_msqp(qp)->port); 822 init_port(dev, qp->port);
806 823
807 if (cur_state != IB_QPS_RESET && 824 if (cur_state != IB_QPS_RESET &&
808 cur_state != IB_QPS_ERR && 825 cur_state != IB_QPS_ERR &&
809 (new_state == IB_QPS_RESET || 826 (new_state == IB_QPS_RESET ||
810 new_state == IB_QPS_ERR)) 827 new_state == IB_QPS_ERR))
811 mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status); 828 mthca_CLOSE_IB(dev, qp->port, &status);
812 } 829 }
813 830
814 /* 831 /*
@@ -1212,6 +1229,9 @@ int mthca_alloc_qp(struct mthca_dev *dev,
1212 if (qp->qpn == -1) 1229 if (qp->qpn == -1)
1213 return -ENOMEM; 1230 return -ENOMEM;
1214 1231
1232 /* initialize port to zero for error-catching. */
1233 qp->port = 0;
1234
1215 err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, 1235 err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1216 send_policy, qp); 1236 send_policy, qp);
1217 if (err) { 1237 if (err) {
@@ -1261,7 +1281,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
1261 if (err) 1281 if (err)
1262 goto err_out; 1282 goto err_out;
1263 1283
1264 sqp->port = port; 1284 sqp->qp.port = port;
1265 sqp->qp.qpn = mqpn; 1285 sqp->qp.qpn = mqpn;
1266 sqp->qp.transport = MLX; 1286 sqp->qp.transport = MLX;
1267 1287
@@ -1404,10 +1424,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1404 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; 1424 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
1405 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 1425 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1406 if (!sqp->qp.ibqp.qp_num) 1426 if (!sqp->qp.ibqp.qp_num)
1407 ib_get_cached_pkey(&dev->ib_dev, sqp->port, 1427 ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
1408 sqp->pkey_index, &pkey); 1428 sqp->pkey_index, &pkey);
1409 else 1429 else
1410 ib_get_cached_pkey(&dev->ib_dev, sqp->port, 1430 ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
1411 wr->wr.ud.pkey_index, &pkey); 1431 wr->wr.ud.pkey_index, &pkey);
1412 sqp->ud_header.bth.pkey = cpu_to_be16(pkey); 1432 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1413 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 1433 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 2dd3aea05341..adcaf85355ae 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -192,7 +192,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
192 192
193 /* Sanity check SRQ size before proceeding */ 193 /* Sanity check SRQ size before proceeding */
194 if (attr->max_wr > dev->limits.max_srq_wqes || 194 if (attr->max_wr > dev->limits.max_srq_wqes ||
195 attr->max_sge > dev->limits.max_sg) 195 attr->max_sge > dev->limits.max_srq_sge)
196 return -EINVAL; 196 return -EINVAL;
197 197
198 srq->max = attr->max_wr; 198 srq->max = attr->max_wr;
@@ -660,6 +660,31 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
660 return err; 660 return err;
661} 661}
662 662
663int mthca_max_srq_sge(struct mthca_dev *dev)
664{
665 if (mthca_is_memfree(dev))
666 return dev->limits.max_sg;
667
668 /*
669 * SRQ allocations are based on powers of 2 for Tavor,
670 * (although they only need to be multiples of 16 bytes).
671 *
672 * Therefore, we need to base the max number of sg entries on
673 * the largest power of 2 descriptor size that is <= to the
674 * actual max WQE descriptor size, rather than return the
675 * max_sg value given by the firmware (which is based on WQE
676 * sizes as multiples of 16, not powers of 2).
677 *
678 * If SRQ implementation is changed for Tavor to be based on
679 * multiples of 16, the calculation below can be deleted and
680 * the FW max_sg value returned.
681 */
682 return min_t(int, dev->limits.max_sg,
683 ((1 << (fls(dev->limits.max_desc_sz) - 1)) -
684 sizeof (struct mthca_next_seg)) /
685 sizeof (struct mthca_data_seg));
686}
687
663int __devinit mthca_init_srq_table(struct mthca_dev *dev) 688int __devinit mthca_init_srq_table(struct mthca_dev *dev)
664{ 689{
665 int err; 690 int err;
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 8d2e04cac68e..13d6d01c72c0 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -10,8 +10,9 @@ config INFINIBAND_IPOIB
10 group: <http://www.ietf.org/html.charters/ipoib-charter.html>. 10 group: <http://www.ietf.org/html.charters/ipoib-charter.html>.
11 11
12config INFINIBAND_IPOIB_DEBUG 12config INFINIBAND_IPOIB_DEBUG
13 bool "IP-over-InfiniBand debugging" 13 bool "IP-over-InfiniBand debugging" if EMBEDDED
14 depends on INFINIBAND_IPOIB 14 depends on INFINIBAND_IPOIB
15 default y
15 ---help--- 16 ---help---
16 This option causes debugging code to be compiled into the 17 This option causes debugging code to be compiled into the
17 IPoIB driver. The output can be turned on via the 18 IPoIB driver. The output can be turned on via the
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index b640107fb732..12a1e0572ef2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -65,6 +65,8 @@ enum {
65 65
66 IPOIB_RX_RING_SIZE = 128, 66 IPOIB_RX_RING_SIZE = 128,
67 IPOIB_TX_RING_SIZE = 64, 67 IPOIB_TX_RING_SIZE = 64,
68 IPOIB_MAX_QUEUE_SIZE = 8192,
69 IPOIB_MIN_QUEUE_SIZE = 2,
68 70
69 IPOIB_NUM_WC = 4, 71 IPOIB_NUM_WC = 4,
70 72
@@ -230,6 +232,9 @@ static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
230 INFINIBAND_ALEN, sizeof(void *)); 232 INFINIBAND_ALEN, sizeof(void *));
231} 233}
232 234
235struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh);
236void ipoib_neigh_free(struct ipoib_neigh *neigh);
237
233extern struct workqueue_struct *ipoib_workqueue; 238extern struct workqueue_struct *ipoib_workqueue;
234 239
235/* functions */ 240/* functions */
@@ -329,6 +334,8 @@ static inline void ipoib_unregister_debugfs(void) { }
329#define ipoib_warn(priv, format, arg...) \ 334#define ipoib_warn(priv, format, arg...) \
330 ipoib_printk(KERN_WARNING, priv, format , ## arg) 335 ipoib_printk(KERN_WARNING, priv, format , ## arg)
331 336
337extern int ipoib_sendq_size;
338extern int ipoib_recvq_size;
332 339
333#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 340#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
334extern int ipoib_debug_level; 341extern int ipoib_debug_level;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 685258e34034..5dde380e8dbe 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -213,7 +213,7 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
213 gid_buf, path.pathrec.dlid ? "yes" : "no"); 213 gid_buf, path.pathrec.dlid ? "yes" : "no");
214 214
215 if (path.pathrec.dlid) { 215 if (path.pathrec.dlid) {
216 rate = ib_sa_rate_enum_to_int(path.pathrec.rate) * 25; 216 rate = ib_rate_to_mult(path.pathrec.rate) * 25;
217 217
218 seq_printf(file, 218 seq_printf(file,
219 " DLID: 0x%04x\n" 219 " DLID: 0x%04x\n"
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ed65202878d8..a54da42849ae 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -161,7 +161,7 @@ static int ipoib_ib_post_receives(struct net_device *dev)
161 struct ipoib_dev_priv *priv = netdev_priv(dev); 161 struct ipoib_dev_priv *priv = netdev_priv(dev);
162 int i; 162 int i;
163 163
164 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) { 164 for (i = 0; i < ipoib_recvq_size; ++i) {
165 if (ipoib_alloc_rx_skb(dev, i)) { 165 if (ipoib_alloc_rx_skb(dev, i)) {
166 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 166 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
167 return -ENOMEM; 167 return -ENOMEM;
@@ -187,7 +187,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
187 if (wr_id & IPOIB_OP_RECV) { 187 if (wr_id & IPOIB_OP_RECV) {
188 wr_id &= ~IPOIB_OP_RECV; 188 wr_id &= ~IPOIB_OP_RECV;
189 189
190 if (wr_id < IPOIB_RX_RING_SIZE) { 190 if (wr_id < ipoib_recvq_size) {
191 struct sk_buff *skb = priv->rx_ring[wr_id].skb; 191 struct sk_buff *skb = priv->rx_ring[wr_id].skb;
192 dma_addr_t addr = priv->rx_ring[wr_id].mapping; 192 dma_addr_t addr = priv->rx_ring[wr_id].mapping;
193 193
@@ -252,9 +252,9 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
252 struct ipoib_tx_buf *tx_req; 252 struct ipoib_tx_buf *tx_req;
253 unsigned long flags; 253 unsigned long flags;
254 254
255 if (wr_id >= IPOIB_TX_RING_SIZE) { 255 if (wr_id >= ipoib_sendq_size) {
256 ipoib_warn(priv, "completion event with wrid %d (> %d)\n", 256 ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
257 wr_id, IPOIB_TX_RING_SIZE); 257 wr_id, ipoib_sendq_size);
258 return; 258 return;
259 } 259 }
260 260
@@ -275,7 +275,7 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
275 spin_lock_irqsave(&priv->tx_lock, flags); 275 spin_lock_irqsave(&priv->tx_lock, flags);
276 ++priv->tx_tail; 276 ++priv->tx_tail;
277 if (netif_queue_stopped(dev) && 277 if (netif_queue_stopped(dev) &&
278 priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2) 278 priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
279 netif_wake_queue(dev); 279 netif_wake_queue(dev);
280 spin_unlock_irqrestore(&priv->tx_lock, flags); 280 spin_unlock_irqrestore(&priv->tx_lock, flags);
281 281
@@ -344,13 +344,13 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
344 * means we have to make sure everything is properly recorded and 344 * means we have to make sure everything is properly recorded and
345 * our state is consistent before we call post_send(). 345 * our state is consistent before we call post_send().
346 */ 346 */
347 tx_req = &priv->tx_ring[priv->tx_head & (IPOIB_TX_RING_SIZE - 1)]; 347 tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
348 tx_req->skb = skb; 348 tx_req->skb = skb;
349 addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len, 349 addr = dma_map_single(priv->ca->dma_device, skb->data, skb->len,
350 DMA_TO_DEVICE); 350 DMA_TO_DEVICE);
351 pci_unmap_addr_set(tx_req, mapping, addr); 351 pci_unmap_addr_set(tx_req, mapping, addr);
352 352
353 if (unlikely(post_send(priv, priv->tx_head & (IPOIB_TX_RING_SIZE - 1), 353 if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
354 address->ah, qpn, addr, skb->len))) { 354 address->ah, qpn, addr, skb->len))) {
355 ipoib_warn(priv, "post_send failed\n"); 355 ipoib_warn(priv, "post_send failed\n");
356 ++priv->stats.tx_errors; 356 ++priv->stats.tx_errors;
@@ -363,7 +363,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
363 address->last_send = priv->tx_head; 363 address->last_send = priv->tx_head;
364 ++priv->tx_head; 364 ++priv->tx_head;
365 365
366 if (priv->tx_head - priv->tx_tail == IPOIB_TX_RING_SIZE) { 366 if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
367 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); 367 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
368 netif_stop_queue(dev); 368 netif_stop_queue(dev);
369 } 369 }
@@ -488,7 +488,7 @@ static int recvs_pending(struct net_device *dev)
488 int pending = 0; 488 int pending = 0;
489 int i; 489 int i;
490 490
491 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) 491 for (i = 0; i < ipoib_recvq_size; ++i)
492 if (priv->rx_ring[i].skb) 492 if (priv->rx_ring[i].skb)
493 ++pending; 493 ++pending;
494 494
@@ -527,7 +527,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
527 */ 527 */
528 while ((int) priv->tx_tail - (int) priv->tx_head < 0) { 528 while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
529 tx_req = &priv->tx_ring[priv->tx_tail & 529 tx_req = &priv->tx_ring[priv->tx_tail &
530 (IPOIB_TX_RING_SIZE - 1)]; 530 (ipoib_sendq_size - 1)];
531 dma_unmap_single(priv->ca->dma_device, 531 dma_unmap_single(priv->ca->dma_device,
532 pci_unmap_addr(tx_req, mapping), 532 pci_unmap_addr(tx_req, mapping),
533 tx_req->skb->len, 533 tx_req->skb->len,
@@ -536,7 +536,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
536 ++priv->tx_tail; 536 ++priv->tx_tail;
537 } 537 }
538 538
539 for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) 539 for (i = 0; i < ipoib_recvq_size; ++i)
540 if (priv->rx_ring[i].skb) { 540 if (priv->rx_ring[i].skb) {
541 dma_unmap_single(priv->ca->dma_device, 541 dma_unmap_single(priv->ca->dma_device,
542 pci_unmap_addr(&priv->rx_ring[i], 542 pci_unmap_addr(&priv->rx_ring[i],
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9b0bd7c746ca..cb078a7d0bf5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -41,6 +41,7 @@
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/kernel.h>
44 45
45#include <linux/if_arp.h> /* For ARPHRD_xxx */ 46#include <linux/if_arp.h> /* For ARPHRD_xxx */
46 47
@@ -53,6 +54,14 @@ MODULE_AUTHOR("Roland Dreier");
53MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 54MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
54MODULE_LICENSE("Dual BSD/GPL"); 55MODULE_LICENSE("Dual BSD/GPL");
55 56
57int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE;
58int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE;
59
60module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
61MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
62module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
63MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
64
56#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 65#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
57int ipoib_debug_level; 66int ipoib_debug_level;
58 67
@@ -252,8 +261,8 @@ static void path_free(struct net_device *dev, struct ipoib_path *path)
252 */ 261 */
253 if (neigh->ah) 262 if (neigh->ah)
254 ipoib_put_ah(neigh->ah); 263 ipoib_put_ah(neigh->ah);
255 *to_ipoib_neigh(neigh->neighbour) = NULL; 264
256 kfree(neigh); 265 ipoib_neigh_free(neigh);
257 } 266 }
258 267
259 spin_unlock_irqrestore(&priv->lock, flags); 268 spin_unlock_irqrestore(&priv->lock, flags);
@@ -327,9 +336,8 @@ void ipoib_flush_paths(struct net_device *dev)
327 struct ipoib_dev_priv *priv = netdev_priv(dev); 336 struct ipoib_dev_priv *priv = netdev_priv(dev);
328 struct ipoib_path *path, *tp; 337 struct ipoib_path *path, *tp;
329 LIST_HEAD(remove_list); 338 LIST_HEAD(remove_list);
330 unsigned long flags;
331 339
332 spin_lock_irqsave(&priv->lock, flags); 340 spin_lock_irq(&priv->lock);
333 341
334 list_splice(&priv->path_list, &remove_list); 342 list_splice(&priv->path_list, &remove_list);
335 INIT_LIST_HEAD(&priv->path_list); 343 INIT_LIST_HEAD(&priv->path_list);
@@ -337,14 +345,15 @@ void ipoib_flush_paths(struct net_device *dev)
337 list_for_each_entry(path, &remove_list, list) 345 list_for_each_entry(path, &remove_list, list)
338 rb_erase(&path->rb_node, &priv->path_tree); 346 rb_erase(&path->rb_node, &priv->path_tree);
339 347
340 spin_unlock_irqrestore(&priv->lock, flags);
341
342 list_for_each_entry_safe(path, tp, &remove_list, list) { 348 list_for_each_entry_safe(path, tp, &remove_list, list) {
343 if (path->query) 349 if (path->query)
344 ib_sa_cancel_query(path->query_id, path->query); 350 ib_sa_cancel_query(path->query_id, path->query);
351 spin_unlock_irq(&priv->lock);
345 wait_for_completion(&path->done); 352 wait_for_completion(&path->done);
346 path_free(dev, path); 353 path_free(dev, path);
354 spin_lock_irq(&priv->lock);
347 } 355 }
356 spin_unlock_irq(&priv->lock);
348} 357}
349 358
350static void path_rec_completion(int status, 359static void path_rec_completion(int status,
@@ -373,16 +382,9 @@ static void path_rec_completion(int status,
373 struct ib_ah_attr av = { 382 struct ib_ah_attr av = {
374 .dlid = be16_to_cpu(pathrec->dlid), 383 .dlid = be16_to_cpu(pathrec->dlid),
375 .sl = pathrec->sl, 384 .sl = pathrec->sl,
376 .port_num = priv->port 385 .port_num = priv->port,
386 .static_rate = pathrec->rate
377 }; 387 };
378 int path_rate = ib_sa_rate_enum_to_int(pathrec->rate);
379
380 if (path_rate > 0 && priv->local_rate > path_rate)
381 av.static_rate = (priv->local_rate - 1) / path_rate;
382
383 ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n",
384 av.static_rate, priv->local_rate,
385 ib_sa_rate_enum_to_int(pathrec->rate));
386 388
387 ah = ipoib_create_ah(dev, priv->pd, &av); 389 ah = ipoib_create_ah(dev, priv->pd, &av);
388 } 390 }
@@ -481,7 +483,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
481 struct ipoib_path *path; 483 struct ipoib_path *path;
482 struct ipoib_neigh *neigh; 484 struct ipoib_neigh *neigh;
483 485
484 neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); 486 neigh = ipoib_neigh_alloc(skb->dst->neighbour);
485 if (!neigh) { 487 if (!neigh) {
486 ++priv->stats.tx_dropped; 488 ++priv->stats.tx_dropped;
487 dev_kfree_skb_any(skb); 489 dev_kfree_skb_any(skb);
@@ -489,8 +491,6 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
489 } 491 }
490 492
491 skb_queue_head_init(&neigh->queue); 493 skb_queue_head_init(&neigh->queue);
492 neigh->neighbour = skb->dst->neighbour;
493 *to_ipoib_neigh(skb->dst->neighbour) = neigh;
494 494
495 /* 495 /*
496 * We can only be called from ipoib_start_xmit, so we're 496 * We can only be called from ipoib_start_xmit, so we're
@@ -503,7 +503,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
503 path = path_rec_create(dev, 503 path = path_rec_create(dev,
504 (union ib_gid *) (skb->dst->neighbour->ha + 4)); 504 (union ib_gid *) (skb->dst->neighbour->ha + 4));
505 if (!path) 505 if (!path)
506 goto err; 506 goto err_path;
507 507
508 __path_add(dev, path); 508 __path_add(dev, path);
509 } 509 }
@@ -521,17 +521,17 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
521 __skb_queue_tail(&neigh->queue, skb); 521 __skb_queue_tail(&neigh->queue, skb);
522 522
523 if (!path->query && path_rec_start(dev, path)) 523 if (!path->query && path_rec_start(dev, path))
524 goto err; 524 goto err_list;
525 } 525 }
526 526
527 spin_unlock(&priv->lock); 527 spin_unlock(&priv->lock);
528 return; 528 return;
529 529
530err: 530err_list:
531 *to_ipoib_neigh(skb->dst->neighbour) = NULL;
532 list_del(&neigh->list); 531 list_del(&neigh->list);
533 kfree(neigh);
534 532
533err_path:
534 ipoib_neigh_free(neigh);
535 ++priv->stats.tx_dropped; 535 ++priv->stats.tx_dropped;
536 dev_kfree_skb_any(skb); 536 dev_kfree_skb_any(skb);
537 537
@@ -763,8 +763,7 @@ static void ipoib_neigh_destructor(struct neighbour *n)
763 if (neigh->ah) 763 if (neigh->ah)
764 ah = neigh->ah; 764 ah = neigh->ah;
765 list_del(&neigh->list); 765 list_del(&neigh->list);
766 *to_ipoib_neigh(n) = NULL; 766 ipoib_neigh_free(neigh);
767 kfree(neigh);
768 } 767 }
769 768
770 spin_unlock_irqrestore(&priv->lock, flags); 769 spin_unlock_irqrestore(&priv->lock, flags);
@@ -773,6 +772,26 @@ static void ipoib_neigh_destructor(struct neighbour *n)
773 ipoib_put_ah(ah); 772 ipoib_put_ah(ah);
774} 773}
775 774
775struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour)
776{
777 struct ipoib_neigh *neigh;
778
779 neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
780 if (!neigh)
781 return NULL;
782
783 neigh->neighbour = neighbour;
784 *to_ipoib_neigh(neighbour) = neigh;
785
786 return neigh;
787}
788
789void ipoib_neigh_free(struct ipoib_neigh *neigh)
790{
791 *to_ipoib_neigh(neigh->neighbour) = NULL;
792 kfree(neigh);
793}
794
776static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms) 795static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
777{ 796{
778 parms->neigh_destructor = ipoib_neigh_destructor; 797 parms->neigh_destructor = ipoib_neigh_destructor;
@@ -785,20 +804,19 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
785 struct ipoib_dev_priv *priv = netdev_priv(dev); 804 struct ipoib_dev_priv *priv = netdev_priv(dev);
786 805
787 /* Allocate RX/TX "rings" to hold queued skbs */ 806 /* Allocate RX/TX "rings" to hold queued skbs */
788 807 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
789 priv->rx_ring = kzalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
790 GFP_KERNEL); 808 GFP_KERNEL);
791 if (!priv->rx_ring) { 809 if (!priv->rx_ring) {
792 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 810 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
793 ca->name, IPOIB_RX_RING_SIZE); 811 ca->name, ipoib_recvq_size);
794 goto out; 812 goto out;
795 } 813 }
796 814
797 priv->tx_ring = kzalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf), 815 priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring,
798 GFP_KERNEL); 816 GFP_KERNEL);
799 if (!priv->tx_ring) { 817 if (!priv->tx_ring) {
800 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", 818 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
801 ca->name, IPOIB_TX_RING_SIZE); 819 ca->name, ipoib_sendq_size);
802 goto out_rx_ring_cleanup; 820 goto out_rx_ring_cleanup;
803 } 821 }
804 822
@@ -866,7 +884,7 @@ static void ipoib_setup(struct net_device *dev)
866 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; 884 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
867 dev->addr_len = INFINIBAND_ALEN; 885 dev->addr_len = INFINIBAND_ALEN;
868 dev->type = ARPHRD_INFINIBAND; 886 dev->type = ARPHRD_INFINIBAND;
869 dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; 887 dev->tx_queue_len = ipoib_sendq_size * 2;
870 dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; 888 dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
871 889
872 /* MTU will be reset when mcast join happens */ 890 /* MTU will be reset when mcast join happens */
@@ -1118,6 +1136,14 @@ static int __init ipoib_init_module(void)
1118{ 1136{
1119 int ret; 1137 int ret;
1120 1138
1139 ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
1140 ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
1141 ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
1142
1143 ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
1144 ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
1145 ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
1146
1121 ret = ipoib_register_debugfs(); 1147 ret = ipoib_register_debugfs();
1122 if (ret) 1148 if (ret)
1123 return ret; 1149 return ret;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 93c462eaf4fd..1dae4b238252 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -114,8 +114,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
114 */ 114 */
115 if (neigh->ah) 115 if (neigh->ah)
116 ipoib_put_ah(neigh->ah); 116 ipoib_put_ah(neigh->ah);
117 *to_ipoib_neigh(neigh->neighbour) = NULL; 117 ipoib_neigh_free(neigh);
118 kfree(neigh);
119 } 118 }
120 119
121 spin_unlock_irqrestore(&priv->lock, flags); 120 spin_unlock_irqrestore(&priv->lock, flags);
@@ -251,6 +250,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
251 .port_num = priv->port, 250 .port_num = priv->port,
252 .sl = mcast->mcmember.sl, 251 .sl = mcast->mcmember.sl,
253 .ah_flags = IB_AH_GRH, 252 .ah_flags = IB_AH_GRH,
253 .static_rate = mcast->mcmember.rate,
254 .grh = { 254 .grh = {
255 .flow_label = be32_to_cpu(mcast->mcmember.flow_label), 255 .flow_label = be32_to_cpu(mcast->mcmember.flow_label),
256 .hop_limit = mcast->mcmember.hop_limit, 256 .hop_limit = mcast->mcmember.hop_limit,
@@ -258,17 +258,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
258 .traffic_class = mcast->mcmember.traffic_class 258 .traffic_class = mcast->mcmember.traffic_class
259 } 259 }
260 }; 260 };
261 int path_rate = ib_sa_rate_enum_to_int(mcast->mcmember.rate);
262
263 av.grh.dgid = mcast->mcmember.mgid; 261 av.grh.dgid = mcast->mcmember.mgid;
264 262
265 if (path_rate > 0 && priv->local_rate > path_rate)
266 av.static_rate = (priv->local_rate - 1) / path_rate;
267
268 ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n",
269 av.static_rate, priv->local_rate,
270 ib_sa_rate_enum_to_int(mcast->mcmember.rate));
271
272 ah = ipoib_create_ah(dev, priv->pd, &av); 263 ah = ipoib_create_ah(dev, priv->pd, &av);
273 if (!ah) { 264 if (!ah) {
274 ipoib_warn(priv, "ib_address_create failed\n"); 265 ipoib_warn(priv, "ib_address_create failed\n");
@@ -618,6 +609,22 @@ int ipoib_mcast_start_thread(struct net_device *dev)
618 return 0; 609 return 0;
619} 610}
620 611
612static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
613 struct ipoib_mcast *mcast)
614{
615 spin_lock_irq(&priv->lock);
616 if (mcast && mcast->query) {
617 ib_sa_cancel_query(mcast->query_id, mcast->query);
618 mcast->query = NULL;
619 spin_unlock_irq(&priv->lock);
620 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
621 IPOIB_GID_ARG(mcast->mcmember.mgid));
622 wait_for_completion(&mcast->done);
623 }
624 else
625 spin_unlock_irq(&priv->lock);
626}
627
621int ipoib_mcast_stop_thread(struct net_device *dev, int flush) 628int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
622{ 629{
623 struct ipoib_dev_priv *priv = netdev_priv(dev); 630 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -637,28 +644,10 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
637 if (flush) 644 if (flush)
638 flush_workqueue(ipoib_workqueue); 645 flush_workqueue(ipoib_workqueue);
639 646
640 spin_lock_irq(&priv->lock); 647 wait_for_mcast_join(priv, priv->broadcast);
641 if (priv->broadcast && priv->broadcast->query) {
642 ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
643 priv->broadcast->query = NULL;
644 spin_unlock_irq(&priv->lock);
645 ipoib_dbg_mcast(priv, "waiting for bcast\n");
646 wait_for_completion(&priv->broadcast->done);
647 } else
648 spin_unlock_irq(&priv->lock);
649 648
650 list_for_each_entry(mcast, &priv->multicast_list, list) { 649 list_for_each_entry(mcast, &priv->multicast_list, list)
651 spin_lock_irq(&priv->lock); 650 wait_for_mcast_join(priv, mcast);
652 if (mcast->query) {
653 ib_sa_cancel_query(mcast->query_id, mcast->query);
654 mcast->query = NULL;
655 spin_unlock_irq(&priv->lock);
656 ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
657 IPOIB_GID_ARG(mcast->mcmember.mgid));
658 wait_for_completion(&mcast->done);
659 } else
660 spin_unlock_irq(&priv->lock);
661 }
662 651
663 return 0; 652 return 0;
664} 653}
@@ -772,13 +761,11 @@ out:
772 if (skb->dst && 761 if (skb->dst &&
773 skb->dst->neighbour && 762 skb->dst->neighbour &&
774 !*to_ipoib_neigh(skb->dst->neighbour)) { 763 !*to_ipoib_neigh(skb->dst->neighbour)) {
775 struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC); 764 struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour);
776 765
777 if (neigh) { 766 if (neigh) {
778 kref_get(&mcast->ah->ref); 767 kref_get(&mcast->ah->ref);
779 neigh->ah = mcast->ah; 768 neigh->ah = mcast->ah;
780 neigh->neighbour = skb->dst->neighbour;
781 *to_ipoib_neigh(skb->dst->neighbour) = neigh;
782 list_add_tail(&neigh->list, &mcast->neigh_list); 769 list_add_tail(&neigh->list, &mcast->neigh_list);
783 } 770 }
784 } 771 }
@@ -913,6 +900,7 @@ void ipoib_mcast_restart_task(void *dev_ptr)
913 900
914 /* We have to cancel outside of the spinlock */ 901 /* We have to cancel outside of the spinlock */
915 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { 902 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
903 wait_for_mcast_join(priv, mcast);
916 ipoib_mcast_leave(mcast->dev, mcast); 904 ipoib_mcast_leave(mcast->dev, mcast);
917 ipoib_mcast_free(mcast); 905 ipoib_mcast_free(mcast);
918 } 906 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 5f0388027b25..1d49d1643c59 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -159,8 +159,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
159 struct ipoib_dev_priv *priv = netdev_priv(dev); 159 struct ipoib_dev_priv *priv = netdev_priv(dev);
160 struct ib_qp_init_attr init_attr = { 160 struct ib_qp_init_attr init_attr = {
161 .cap = { 161 .cap = {
162 .max_send_wr = IPOIB_TX_RING_SIZE, 162 .max_send_wr = ipoib_sendq_size,
163 .max_recv_wr = IPOIB_RX_RING_SIZE, 163 .max_recv_wr = ipoib_recvq_size,
164 .max_send_sge = 1, 164 .max_send_sge = 1,
165 .max_recv_sge = 1 165 .max_recv_sge = 1
166 }, 166 },
@@ -175,7 +175,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
175 } 175 }
176 176
177 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, 177 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev,
178 IPOIB_TX_RING_SIZE + IPOIB_RX_RING_SIZE + 1); 178 ipoib_sendq_size + ipoib_recvq_size + 1);
179 if (IS_ERR(priv->cq)) { 179 if (IS_ERR(priv->cq)) {
180 printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); 180 printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
181 goto out_free_pd; 181 goto out_free_pd;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index fd8a95a9c5d3..5bb55742ada6 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -617,6 +617,14 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
617 scmnd->sc_data_direction); 617 scmnd->sc_data_direction);
618} 618}
619 619
620static void srp_remove_req(struct srp_target_port *target, struct srp_request *req,
621 int index)
622{
623 list_del(&req->list);
624 req->next = target->req_head;
625 target->req_head = index;
626}
627
620static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) 628static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
621{ 629{
622 struct srp_request *req; 630 struct srp_request *req;
@@ -664,9 +672,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
664 scmnd->host_scribble = (void *) -1L; 672 scmnd->host_scribble = (void *) -1L;
665 scmnd->scsi_done(scmnd); 673 scmnd->scsi_done(scmnd);
666 674
667 list_del(&req->list); 675 srp_remove_req(target, req, rsp->tag & ~SRP_TAG_TSK_MGMT);
668 req->next = target->req_head;
669 target->req_head = rsp->tag & ~SRP_TAG_TSK_MGMT;
670 } else 676 } else
671 req->cmd_done = 1; 677 req->cmd_done = 1;
672 } 678 }
@@ -1188,12 +1194,10 @@ static int srp_send_tsk_mgmt(struct scsi_cmnd *scmnd, u8 func)
1188 spin_lock_irq(target->scsi_host->host_lock); 1194 spin_lock_irq(target->scsi_host->host_lock);
1189 1195
1190 if (req->cmd_done) { 1196 if (req->cmd_done) {
1191 list_del(&req->list); 1197 srp_remove_req(target, req, req_index);
1192 req->next = target->req_head;
1193 target->req_head = req_index;
1194
1195 scmnd->scsi_done(scmnd); 1198 scmnd->scsi_done(scmnd);
1196 } else if (!req->tsk_status) { 1199 } else if (!req->tsk_status) {
1200 srp_remove_req(target, req, req_index);
1197 scmnd->result = DID_ABORT << 16; 1201 scmnd->result = DID_ABORT << 16;
1198 ret = SUCCESS; 1202 ret = SUCCESS;
1199 } 1203 }
@@ -1434,6 +1438,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
1434 p = match_strdup(args); 1438 p = match_strdup(args);
1435 if (strlen(p) != 32) { 1439 if (strlen(p) != 32) {
1436 printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); 1440 printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p);
1441 kfree(p);
1437 goto out; 1442 goto out;
1438 } 1443 }
1439 1444