aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorBryan O'Sullivan <bos@pathscale.com>2006-03-29 18:23:29 -0500
committerRoland Dreier <rolandd@cisco.com>2006-03-31 16:14:19 -0500
commit108ecf0d90655055d5a7db8d3a7239133b4d52b7 (patch)
tree95e3a20828478e7e66f1aa7e88e3b5cf17b93dfe /drivers/infiniband/hw
parent097709fea03140b567bde8369f3ffafe33dfc1c6 (diff)
IB/ipath: misc driver support code
EEPROM support, interrupt handling, statistics gathering, and write combining management for x86_64. A note regarding i2c: The Atmel EEPROM hardware we use looks like an i2c device electrically, but is not i2c compliant at all from a functional perspective. We tried using the kernel's i2c support to talk to it, but failed. Normal i2c devices have a single 7-bit or 10-bit i2c address that they respond to. Valid 7-bit addresses range from 0x03 to 0x77. Addresses 0x00 to 0x02 and 0x78 to 0x7F are special reserved addresses (e.g. 0x00 is the "general call" address.) The Atmel device, on the other hand, responds to ALL addresses. It's designed to be the only device on a given i2c bus. A given i2c device address corresponds to the memory address within the i2c device itself. At least one reason why the linux core i2c stuff won't work for this is that it prohibits access to reserved addresses like 0x00, which are really valid addresses on the Atmel devices. Signed-off-by: Bryan O'Sullivan <bos@pathscale.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c613
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c841
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c303
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_x86_64.c157
4 files changed, 1914 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
new file mode 100644
index 000000000000..f11a900e8cd7
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -0,0 +1,613 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/delay.h>
34#include <linux/pci.h>
35#include <linux/vmalloc.h>
36
37#include "ipath_kernel.h"
38
39/*
40 * InfiniPath I2C driver for a serial eeprom. This is not a generic
41 * I2C interface. For a start, the device we're using (Atmel AT24C11)
42 * doesn't work like a regular I2C device. It looks like one
43 * electrically, but not logically. Normal I2C devices have a single
44 * 7-bit or 10-bit I2C address that they respond to. Valid 7-bit
45 * addresses range from 0x03 to 0x77. Addresses 0x00 to 0x02 and 0x78
46 * to 0x7F are special reserved addresses (e.g. 0x00 is the "general
47 * call" address.) The Atmel device, on the other hand, responds to ALL
48 * 7-bit addresses. It's designed to be the only device on a given I2C
49 * bus. A 7-bit address corresponds to the memory address within the
50 * Atmel device itself.
51 *
52 * Also, the timing requirements mean more than simple software
53 * bitbanging, with readbacks from chip to ensure timing (simple udelay
54 * is not enough).
55 *
56 * This all means that accessing the device is specialized enough
57 * that using the standard kernel I2C bitbanging interface would be
58 * impossible. For example, the core I2C eeprom driver expects to find
59 * a device at one or more of a limited set of addresses only. It doesn't
60 * allow writing to an eeprom. It also doesn't provide any means of
61 * accessing eeprom contents from within the kernel, only via sysfs.
62 */
63
64enum i2c_type {
65 i2c_line_scl = 0,
66 i2c_line_sda
67};
68
69enum i2c_state {
70 i2c_line_low = 0,
71 i2c_line_high
72};
73
74#define READ_CMD 1
75#define WRITE_CMD 0
76
77static int eeprom_init;
78
79/*
80 * The gpioval manipulation really should be protected by spinlocks
81 * or be converted to use atomic operations.
82 */
83
84/**
85 * i2c_gpio_set - set a GPIO line
86 * @dd: the infinipath device
87 * @line: the line to set
88 * @new_line_state: the state to set
89 *
90 * Returns 0 if the line was set to the new state successfully, non-zero
91 * on error.
92 */
93static int i2c_gpio_set(struct ipath_devdata *dd,
94 enum i2c_type line,
95 enum i2c_state new_line_state)
96{
97 u64 read_val, write_val, mask, *gpioval;
98
99 gpioval = &dd->ipath_gpio_out;
100 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
101 if (line == i2c_line_scl)
102 mask = ipath_gpio_scl;
103 else
104 mask = ipath_gpio_sda;
105
106 if (new_line_state == i2c_line_high)
107 /* tri-state the output rather than force high */
108 write_val = read_val & ~mask;
109 else
110 /* config line to be an output */
111 write_val = read_val | mask;
112 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
113
114 /* set high and verify */
115 if (new_line_state == i2c_line_high)
116 write_val = 0x1UL;
117 else
118 write_val = 0x0UL;
119
120 if (line == i2c_line_scl) {
121 write_val <<= ipath_gpio_scl_num;
122 *gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num);
123 *gpioval |= write_val;
124 } else {
125 write_val <<= ipath_gpio_sda_num;
126 *gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num);
127 *gpioval |= write_val;
128 }
129 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
130
131 return 0;
132}
133
134/**
135 * i2c_gpio_get - get a GPIO line state
136 * @dd: the infinipath device
137 * @line: the line to get
138 * @curr_statep: where to put the line state
139 *
140 * Returns 0 if the line was set to the new state successfully, non-zero
141 * on error. curr_state is not set on error.
142 */
143static int i2c_gpio_get(struct ipath_devdata *dd,
144 enum i2c_type line,
145 enum i2c_state *curr_statep)
146{
147 u64 read_val, write_val, mask;
148 int ret;
149
150 /* check args */
151 if (curr_statep == NULL) {
152 ret = 1;
153 goto bail;
154 }
155
156 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
157 /* config line to be an input */
158 if (line == i2c_line_scl)
159 mask = ipath_gpio_scl;
160 else
161 mask = ipath_gpio_sda;
162 write_val = read_val & ~mask;
163 ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
164 read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
165
166 if (read_val & mask)
167 *curr_statep = i2c_line_high;
168 else
169 *curr_statep = i2c_line_low;
170
171 ret = 0;
172
173bail:
174 return ret;
175}
176
177/**
178 * i2c_wait_for_writes - wait for a write
179 * @dd: the infinipath device
180 *
181 * We use this instead of udelay directly, so we can make sure
182 * that previous register writes have been flushed all the way
183 * to the chip. Since we are delaying anyway, the cost doesn't
184 * hurt, and makes the bit twiddling more regular
185 */
186static void i2c_wait_for_writes(struct ipath_devdata *dd)
187{
188 (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
189}
190
191static void scl_out(struct ipath_devdata *dd, u8 bit)
192{
193 i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
194
195 i2c_wait_for_writes(dd);
196}
197
198static void sda_out(struct ipath_devdata *dd, u8 bit)
199{
200 i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low);
201
202 i2c_wait_for_writes(dd);
203}
204
205static u8 sda_in(struct ipath_devdata *dd, int wait)
206{
207 enum i2c_state bit;
208
209 if (i2c_gpio_get(dd, i2c_line_sda, &bit))
210 ipath_dbg("get bit failed!\n");
211
212 if (wait)
213 i2c_wait_for_writes(dd);
214
215 return bit == i2c_line_high ? 1U : 0;
216}
217
218/**
219 * i2c_ackrcv - see if ack following write is true
220 * @dd: the infinipath device
221 */
222static int i2c_ackrcv(struct ipath_devdata *dd)
223{
224 u8 ack_received;
225
226 /* AT ENTRY SCL = LOW */
227 /* change direction, ignore data */
228 ack_received = sda_in(dd, 1);
229 scl_out(dd, i2c_line_high);
230 ack_received = sda_in(dd, 1) == 0;
231 scl_out(dd, i2c_line_low);
232 return ack_received;
233}
234
235/**
236 * wr_byte - write a byte, one bit at a time
237 * @dd: the infinipath device
238 * @data: the byte to write
239 *
240 * Returns 0 if we got the following ack, otherwise 1
241 */
242static int wr_byte(struct ipath_devdata *dd, u8 data)
243{
244 int bit_cntr;
245 u8 bit;
246
247 for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
248 bit = (data >> bit_cntr) & 1;
249 sda_out(dd, bit);
250 scl_out(dd, i2c_line_high);
251 scl_out(dd, i2c_line_low);
252 }
253 return (!i2c_ackrcv(dd)) ? 1 : 0;
254}
255
256static void send_ack(struct ipath_devdata *dd)
257{
258 sda_out(dd, i2c_line_low);
259 scl_out(dd, i2c_line_high);
260 scl_out(dd, i2c_line_low);
261 sda_out(dd, i2c_line_high);
262}
263
264/**
265 * i2c_startcmd - transmit the start condition, followed by address/cmd
266 * @dd: the infinipath device
267 * @offset_dir: direction byte
268 *
269 * (both clock/data high, clock high, data low while clock is high)
270 */
271static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir)
272{
273 int res;
274
275 /* issue start sequence */
276 sda_out(dd, i2c_line_high);
277 scl_out(dd, i2c_line_high);
278 sda_out(dd, i2c_line_low);
279 scl_out(dd, i2c_line_low);
280
281 /* issue length and direction byte */
282 res = wr_byte(dd, offset_dir);
283
284 if (res)
285 ipath_cdbg(VERBOSE, "No ack to complete start\n");
286
287 return res;
288}
289
290/**
291 * stop_cmd - transmit the stop condition
292 * @dd: the infinipath device
293 *
294 * (both clock/data low, clock high, data high while clock is high)
295 */
296static void stop_cmd(struct ipath_devdata *dd)
297{
298 scl_out(dd, i2c_line_low);
299 sda_out(dd, i2c_line_low);
300 scl_out(dd, i2c_line_high);
301 sda_out(dd, i2c_line_high);
302 udelay(2);
303}
304
305/**
306 * eeprom_reset - reset I2C communication
307 * @dd: the infinipath device
308 */
309
310static int eeprom_reset(struct ipath_devdata *dd)
311{
312 int clock_cycles_left = 9;
313 u64 *gpioval = &dd->ipath_gpio_out;
314 int ret;
315
316 eeprom_init = 1;
317 *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
318 ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
319 "is %llx\n", (unsigned long long) *gpioval);
320
321 /*
322 * This is to get the i2c into a known state, by first going low,
323 * then tristate sda (and then tristate scl as first thing
324 * in loop)
325 */
326 scl_out(dd, i2c_line_low);
327 sda_out(dd, i2c_line_high);
328
329 while (clock_cycles_left--) {
330 scl_out(dd, i2c_line_high);
331
332 if (sda_in(dd, 0)) {
333 sda_out(dd, i2c_line_low);
334 scl_out(dd, i2c_line_low);
335 ret = 0;
336 goto bail;
337 }
338
339 scl_out(dd, i2c_line_low);
340 }
341
342 ret = 1;
343
344bail:
345 return ret;
346}
347
348/**
349 * ipath_eeprom_read - receives bytes from the eeprom via I2C
350 * @dd: the infinipath device
351 * @eeprom_offset: address to read from
352 * @buffer: where to store result
353 * @len: number of bytes to receive
354 */
355
356int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
357 void *buffer, int len)
358{
359 /* compiler complains unless initialized */
360 u8 single_byte = 0;
361 int bit_cntr;
362 int ret;
363
364 if (!eeprom_init)
365 eeprom_reset(dd);
366
367 eeprom_offset = (eeprom_offset << 1) | READ_CMD;
368
369 if (i2c_startcmd(dd, eeprom_offset)) {
370 ipath_dbg("Failed startcmd\n");
371 stop_cmd(dd);
372 ret = 1;
373 goto bail;
374 }
375
376 /*
377 * eeprom keeps clocking data out as long as we ack, automatically
378 * incrementing the address.
379 */
380 while (len-- > 0) {
381 /* get data */
382 single_byte = 0;
383 for (bit_cntr = 8; bit_cntr; bit_cntr--) {
384 u8 bit;
385 scl_out(dd, i2c_line_high);
386 bit = sda_in(dd, 0);
387 single_byte |= bit << (bit_cntr - 1);
388 scl_out(dd, i2c_line_low);
389 }
390
391 /* send ack if not the last byte */
392 if (len)
393 send_ack(dd);
394
395 *((u8 *) buffer) = single_byte;
396 buffer++;
397 }
398
399 stop_cmd(dd);
400
401 ret = 0;
402
403bail:
404 return ret;
405}
406
407/**
408 * ipath_eeprom_write - writes data to the eeprom via I2C
409 * @dd: the infinipath device
410 * @eeprom_offset: where to place data
411 * @buffer: data to write
412 * @len: number of bytes to write
413 */
414int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
415 const void *buffer, int len)
416{
417 u8 single_byte;
418 int sub_len;
419 const u8 *bp = buffer;
420 int max_wait_time, i;
421 int ret;
422
423 if (!eeprom_init)
424 eeprom_reset(dd);
425
426 while (len > 0) {
427 if (i2c_startcmd(dd, (eeprom_offset << 1) | WRITE_CMD)) {
428 ipath_dbg("Failed to start cmd offset %u\n",
429 eeprom_offset);
430 goto failed_write;
431 }
432
433 sub_len = min(len, 4);
434 eeprom_offset += sub_len;
435 len -= sub_len;
436
437 for (i = 0; i < sub_len; i++) {
438 if (wr_byte(dd, *bp++)) {
439 ipath_dbg("no ack after byte %u/%u (%u "
440 "total remain)\n", i, sub_len,
441 len + sub_len - i);
442 goto failed_write;
443 }
444 }
445
446 stop_cmd(dd);
447
448 /*
449 * wait for write complete by waiting for a successful
450 * read (the chip replies with a zero after the write
451 * cmd completes, and before it writes to the eeprom.
452 * The startcmd for the read will fail the ack until
453 * the writes have completed. We do this inline to avoid
454 * the debug prints that are in the real read routine
455 * if the startcmd fails.
456 */
457 max_wait_time = 100;
458 while (i2c_startcmd(dd, READ_CMD)) {
459 stop_cmd(dd);
460 if (!--max_wait_time) {
461 ipath_dbg("Did not get successful read to "
462 "complete write\n");
463 goto failed_write;
464 }
465 }
466 /* now read the zero byte */
467 for (i = single_byte = 0; i < 8; i++) {
468 u8 bit;
469 scl_out(dd, i2c_line_high);
470 bit = sda_in(dd, 0);
471 scl_out(dd, i2c_line_low);
472 single_byte <<= 1;
473 single_byte |= bit;
474 }
475 stop_cmd(dd);
476 }
477
478 ret = 0;
479 goto bail;
480
481failed_write:
482 stop_cmd(dd);
483 ret = 1;
484
485bail:
486 return ret;
487}
488
489static u8 flash_csum(struct ipath_flash *ifp, int adjust)
490{
491 u8 *ip = (u8 *) ifp;
492 u8 csum = 0, len;
493
494 for (len = 0; len < ifp->if_length; len++)
495 csum += *ip++;
496 csum -= ifp->if_csum;
497 csum = ~csum;
498 if (adjust)
499 ifp->if_csum = csum;
500
501 return csum;
502}
503
504/**
505 * ipath_get_guid - get the GUID from the i2c device
506 * @dd: the infinipath device
507 *
508 * When we add the multi-chip support, we will probably have to add
509 * the ability to use the number of guids field, and get the guid from
510 * the first chip's flash, to use for all of them.
511 */
512void ipath_get_guid(struct ipath_devdata *dd)
513{
514 void *buf;
515 struct ipath_flash *ifp;
516 __be64 guid;
517 int len;
518 u8 csum, *bguid;
519 int t = dd->ipath_unit;
520 struct ipath_devdata *dd0 = ipath_lookup(0);
521
522 if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
523 u8 *bguid, oguid;
524 dd->ipath_guid = dd0->ipath_guid;
525 bguid = (u8 *) & dd->ipath_guid;
526
527 oguid = bguid[7];
528 bguid[7] += t;
529 if (oguid > bguid[7]) {
530 if (bguid[6] == 0xff) {
531 if (bguid[5] == 0xff) {
532 ipath_dev_err(
533 dd,
534 "Can't set %s GUID from "
535 "base, wraps to OUI!\n",
536 ipath_get_unit_name(t));
537 dd->ipath_guid = 0;
538 goto bail;
539 }
540 bguid[5]++;
541 }
542 bguid[6]++;
543 }
544 dd->ipath_nguid = 1;
545
546 ipath_dbg("nguid %u, so adding %u to device 0 guid, "
547 "for %llx\n",
548 dd0->ipath_nguid, t,
549 (unsigned long long) be64_to_cpu(dd->ipath_guid));
550 goto bail;
551 }
552
553 len = offsetof(struct ipath_flash, if_future);
554 buf = vmalloc(len);
555 if (!buf) {
556 ipath_dev_err(dd, "Couldn't allocate memory to read %u "
557 "bytes from eeprom for GUID\n", len);
558 goto bail;
559 }
560
561 if (ipath_eeprom_read(dd, 0, buf, len)) {
562 ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
563 goto done;
564 }
565 ifp = (struct ipath_flash *)buf;
566
567 csum = flash_csum(ifp, 0);
568 if (csum != ifp->if_csum) {
569 dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: "
570 "0x%x, not 0x%x\n", csum, ifp->if_csum);
571 goto done;
572 }
573 if (*(__be64 *) ifp->if_guid == 0ULL ||
574 *(__be64 *) ifp->if_guid == __constant_cpu_to_be64(-1LL)) {
575 ipath_dev_err(dd, "Invalid GUID %llx from flash; "
576 "ignoring\n",
577 *(unsigned long long *) ifp->if_guid);
578 /* don't allow GUID if all 0 or all 1's */
579 goto done;
580 }
581
582 /* complain, but allow it */
583 if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
584 dev_info(&dd->pcidev->dev, "Warning, GUID %llx is "
585 "default, probably not correct!\n",
586 *(unsigned long long *) ifp->if_guid);
587
588 bguid = ifp->if_guid;
589 if (!bguid[0] && !bguid[1] && !bguid[2]) {
590 /* original incorrect GUID format in flash; fix in
591 * core copy, by shifting up 2 octets; don't need to
592 * change top octet, since both it and shifted are
593 * 0.. */
594 bguid[1] = bguid[3];
595 bguid[2] = bguid[4];
596 bguid[3] = bguid[4] = 0;
597 guid = *(__be64 *) ifp->if_guid;
598 ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, "
599 "shifting 2 octets\n");
600 } else
601 guid = *(__be64 *) ifp->if_guid;
602 dd->ipath_guid = guid;
603 dd->ipath_nguid = ifp->if_numguid;
604 memcpy(dd->ipath_serial, ifp->if_serial,
605 sizeof(ifp->if_serial));
606 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
607 (unsigned long long) be64_to_cpu(dd->ipath_guid));
608
609done:
610 vfree(buf);
611
612bail:;
613}
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
new file mode 100644
index 000000000000..60f5f4108069
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -0,0 +1,841 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/pci.h>
34
35#include "ipath_kernel.h"
36#include "ips_common.h"
37#include "ipath_layer.h"
38
39#define E_SUM_PKTERRS \
40 (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
41 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
42 INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
43 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
44 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
45 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
46
47#define E_SUM_ERRS \
48 (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
49 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
50 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
51 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
52 INFINIPATH_E_INVALIDADDR)
53
54static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
55{
56 unsigned long sbuf[4];
57 u64 ignore_this_time = 0;
58 u32 piobcnt;
59
60 /* if possible that sendbuffererror could be valid */
61 piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
62 /* read these before writing errorclear */
63 sbuf[0] = ipath_read_kreg64(
64 dd, dd->ipath_kregs->kr_sendbuffererror);
65 sbuf[1] = ipath_read_kreg64(
66 dd, dd->ipath_kregs->kr_sendbuffererror + 1);
67 if (piobcnt > 128) {
68 sbuf[2] = ipath_read_kreg64(
69 dd, dd->ipath_kregs->kr_sendbuffererror + 2);
70 sbuf[3] = ipath_read_kreg64(
71 dd, dd->ipath_kregs->kr_sendbuffererror + 3);
72 }
73
74 if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
75 int i;
76
77 ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
78 if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
79 printk("%lx %lx ", sbuf[2], sbuf[3]);
80 for (i = 0; i < piobcnt; i++) {
81 if (test_bit(i, sbuf)) {
82 u32 __iomem *piobuf;
83 if (i < dd->ipath_piobcnt2k)
84 piobuf = (u32 __iomem *)
85 (dd->ipath_pio2kbase +
86 i * dd->ipath_palign);
87 else
88 piobuf = (u32 __iomem *)
89 (dd->ipath_pio4kbase +
90 (i - dd->ipath_piobcnt2k) *
91 dd->ipath_4kalign);
92
93 ipath_cdbg(PKT,
94 "PIObuf[%u] @%p pbc is %x; ",
95 i, piobuf, readl(piobuf));
96
97 ipath_disarm_piobufs(dd, i, 1);
98 }
99 }
100 if (ipath_debug & __IPATH_PKTDBG)
101 printk("\n");
102 }
103 if ((errs & (INFINIPATH_E_SDROPPEDDATAPKT |
104 INFINIPATH_E_SDROPPEDSMPPKT |
105 INFINIPATH_E_SMINPKTLEN)) &&
106 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
107 /*
108 * This can happen when SMA is trying to bring the link
109 * up, but the IB link changes state at the "wrong" time.
110 * The IB logic then complains that the packet isn't
111 * valid. We don't want to confuse people, so we just
112 * don't print them, except at debug
113 */
114 ipath_dbg("Ignoring pktsend errors %llx, because not "
115 "yet active\n", (unsigned long long) errs);
116 ignore_this_time = INFINIPATH_E_SDROPPEDDATAPKT |
117 INFINIPATH_E_SDROPPEDSMPPKT |
118 INFINIPATH_E_SMINPKTLEN;
119 }
120
121 return ignore_this_time;
122}
123
124/* return the strings for the most common link states */
125static char *ib_linkstate(u32 linkstate)
126{
127 char *ret;
128
129 switch (linkstate) {
130 case IPATH_IBSTATE_INIT:
131 ret = "Init";
132 break;
133 case IPATH_IBSTATE_ARM:
134 ret = "Arm";
135 break;
136 case IPATH_IBSTATE_ACTIVE:
137 ret = "Active";
138 break;
139 default:
140 ret = "Down";
141 }
142
143 return ret;
144}
145
146static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
147 ipath_err_t errs, int noprint)
148{
149 u64 val;
150 u32 ltstate, lstate;
151
152 /*
153 * even if diags are enabled, we want to notice LINKINIT, etc.
154 * We just don't want to change the LED state, or
155 * dd->ipath_kregs->kr_ibcctrl
156 */
157 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
158 lstate = val & IPATH_IBSTATE_MASK;
159 if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
160 lstate == IPATH_IBSTATE_ACTIVE) {
161 /*
162 * only print at SMA if there is a change, debug if not
163 * (sometimes we want to know that, usually not).
164 */
165 if (lstate == ((unsigned) dd->ipath_lastibcstat
166 & IPATH_IBSTATE_MASK)) {
167 ipath_dbg("Status change intr but no change (%s)\n",
168 ib_linkstate(lstate));
169 }
170 else
171 ipath_cdbg(SMA, "Unit %u link state %s, last "
172 "was %s\n", dd->ipath_unit,
173 ib_linkstate(lstate),
174 ib_linkstate((unsigned)
175 dd->ipath_lastibcstat
176 & IPATH_IBSTATE_MASK));
177 }
178 else {
179 lstate = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
180 if (lstate == IPATH_IBSTATE_INIT ||
181 lstate == IPATH_IBSTATE_ARM ||
182 lstate == IPATH_IBSTATE_ACTIVE)
183 ipath_cdbg(SMA, "Unit %u link state down"
184 " (state 0x%x), from %s\n",
185 dd->ipath_unit,
186 (u32)val & IPATH_IBSTATE_MASK,
187 ib_linkstate(lstate));
188 else
189 ipath_cdbg(VERBOSE, "Unit %u link state changed "
190 "to 0x%x from down (%x)\n",
191 dd->ipath_unit, (u32) val, lstate);
192 }
193 ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
194 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
195 lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
196 INFINIPATH_IBCS_LINKSTATE_MASK;
197
198 if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
199 ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
200 u32 last_ltstate;
201
202 /*
203 * Ignore cycling back and forth from Polling.Active
204 * to Polling.Quiet while waiting for the other end of
205 * the link to come up. We will cycle back and forth
206 * between them if no cable is plugged in,
207 * the other device is powered off or disabled, etc.
208 */
209 last_ltstate = (dd->ipath_lastibcstat >>
210 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)
211 & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
212 if (last_ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE
213 || last_ltstate ==
214 INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
215 if (dd->ipath_ibpollcnt > 40) {
216 dd->ipath_flags |= IPATH_NOCABLE;
217 *dd->ipath_statusp |=
218 IPATH_STATUS_IB_NOCABLE;
219 } else
220 dd->ipath_ibpollcnt++;
221 goto skip_ibchange;
222 }
223 }
224 dd->ipath_ibpollcnt = 0; /* some state other than 2 or 3 */
225 ipath_stats.sps_iblink++;
226 if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
227 dd->ipath_flags |= IPATH_LINKDOWN;
228 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
229 | IPATH_LINKACTIVE |
230 IPATH_LINKARMED);
231 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
232 if (!noprint) {
233 if (((dd->ipath_lastibcstat >>
234 INFINIPATH_IBCS_LINKSTATE_SHIFT) &
235 INFINIPATH_IBCS_LINKSTATE_MASK)
236 == INFINIPATH_IBCS_L_STATE_ACTIVE)
237 /* if from up to down be more vocal */
238 ipath_cdbg(SMA,
239 "Unit %u link now down (%s)\n",
240 dd->ipath_unit,
241 ipath_ibcstatus_str[ltstate]);
242 else
243 ipath_cdbg(VERBOSE, "Unit %u link is "
244 "down (%s)\n", dd->ipath_unit,
245 ipath_ibcstatus_str[ltstate]);
246 }
247
248 dd->ipath_f_setextled(dd, lstate, ltstate);
249 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ACTIVE) {
250 dd->ipath_flags |= IPATH_LINKACTIVE;
251 dd->ipath_flags &=
252 ~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN |
253 IPATH_LINKARMED | IPATH_NOCABLE);
254 *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
255 *dd->ipath_statusp |=
256 IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
257 dd->ipath_f_setextled(dd, lstate, ltstate);
258
259 __ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
260 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
261 /*
262 * set INIT and DOWN. Down is checked by most of the other
263 * code, but INIT is useful to know in a few places.
264 */
265 dd->ipath_flags |= IPATH_LINKINIT | IPATH_LINKDOWN;
266 dd->ipath_flags &=
267 ~(IPATH_LINKUNK | IPATH_LINKACTIVE | IPATH_LINKARMED
268 | IPATH_NOCABLE);
269 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
270 | IPATH_STATUS_IB_READY);
271 dd->ipath_f_setextled(dd, lstate, ltstate);
272 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
273 dd->ipath_flags |= IPATH_LINKARMED;
274 dd->ipath_flags &=
275 ~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
276 IPATH_LINKACTIVE | IPATH_NOCABLE);
277 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
278 | IPATH_STATUS_IB_READY);
279 dd->ipath_f_setextled(dd, lstate, ltstate);
280 } else {
281 if (!noprint)
282 ipath_dbg("IBstatuschange unit %u: %s (%x)\n",
283 dd->ipath_unit,
284 ipath_ibcstatus_str[ltstate], ltstate);
285 }
286skip_ibchange:
287 dd->ipath_lastibcstat = val;
288}
289
290static void handle_supp_msgs(struct ipath_devdata *dd,
291 unsigned supp_msgs, char msg[512])
292{
293 /*
294 * Print the message unless it's ibc status change only, which
295 * happens so often we never want to count it.
296 */
297 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
298 ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
299 ~INFINIPATH_E_IBSTATUSCHANGED);
300 if (dd->ipath_lasterror &
301 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
302 ipath_dev_err(dd, "Suppressed %u messages for "
303 "fast-repeating errors (%s) (%llx)\n",
304 supp_msgs, msg,
305 (unsigned long long)
306 dd->ipath_lasterror);
307 else {
308 /*
309 * rcvegrfull and rcvhdrqfull are "normal", for some
310 * types of processes (mostly benchmarks) that send
311 * huge numbers of messages, while not processing
312 * them. So only complain about these at debug
313 * level.
314 */
315 ipath_dbg("Suppressed %u messages for %s\n",
316 supp_msgs, msg);
317 }
318 }
319}
320
321static unsigned handle_frequent_errors(struct ipath_devdata *dd,
322 ipath_err_t errs, char msg[512],
323 int *noprint)
324{
325 unsigned long nc;
326 static unsigned long nextmsg_time;
327 static unsigned nmsgs, supp_msgs;
328
329 /*
330 * Throttle back "fast" messages to no more than 10 per 5 seconds.
331 * This isn't perfect, but it's a reasonable heuristic. If we get
332 * more than 10, give a 6x longer delay.
333 */
334 nc = jiffies;
335 if (nmsgs > 10) {
336 if (time_before(nc, nextmsg_time)) {
337 *noprint = 1;
338 if (!supp_msgs++)
339 nextmsg_time = nc + HZ * 3;
340 }
341 else if (supp_msgs) {
342 handle_supp_msgs(dd, supp_msgs, msg);
343 supp_msgs = 0;
344 nmsgs = 0;
345 }
346 }
347 else if (!nmsgs++ || time_after(nc, nextmsg_time))
348 nextmsg_time = nc + HZ / 2;
349
350 return supp_msgs;
351}
352
353static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
354{
355 char msg[512];
356 u64 ignore_this_time = 0;
357 int i;
358 int chkerrpkts = 0, noprint = 0;
359 unsigned supp_msgs;
360
361 supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
362
363 /*
364 * don't report errors that are masked (includes those always
365 * ignored)
366 */
367 errs &= ~dd->ipath_maskederrs;
368
369 /* do these first, they are most important */
370 if (errs & INFINIPATH_E_HARDWARE) {
371 /* reuse same msg buf */
372 dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
373 }
374
375 if (!noprint && (errs & ~infinipath_e_bitsextant))
376 ipath_dev_err(dd, "error interrupt with unknown errors "
377 "%llx set\n", (unsigned long long)
378 (errs & ~infinipath_e_bitsextant));
379
380 if (errs & E_SUM_ERRS)
381 ignore_this_time = handle_e_sum_errs(dd, errs);
382
383 if (supp_msgs == 250000) {
384 /*
385 * It's not entirely reasonable assuming that the errors set
386 * in the last clear period are all responsible for the
387 * problem, but the alternative is to assume it's the only
388 * ones on this particular interrupt, which also isn't great
389 */
390 dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
391 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
392 ~dd->ipath_maskederrs);
393 ipath_decode_err(msg, sizeof msg,
394 (dd->ipath_maskederrs & ~dd->
395 ipath_ignorederrs));
396
397 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
398 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
399 ipath_dev_err(dd, "Disabling error(s) %llx because "
400 "occuring too frequently (%s)\n",
401 (unsigned long long)
402 (dd->ipath_maskederrs &
403 ~dd->ipath_ignorederrs), msg);
404 else {
405 /*
406 * rcvegrfull and rcvhdrqfull are "normal",
407 * for some types of processes (mostly benchmarks)
408 * that send huge numbers of messages, while not
409 * processing them. So only complain about
410 * these at debug level.
411 */
412 ipath_dbg("Disabling frequent queue full errors "
413 "(%s)\n", msg);
414 }
415
416 /*
417 * Re-enable the masked errors after around 3 minutes. in
418 * ipath_get_faststats(). If we have a series of fast
419 * repeating but different errors, the interval will keep
420 * stretching out, but that's OK, as that's pretty
421 * catastrophic.
422 */
423 dd->ipath_unmasktime = jiffies + HZ * 180;
424 }
425
426 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
427 if (ignore_this_time)
428 errs &= ~ignore_this_time;
429 if (errs & ~dd->ipath_lasterror) {
430 errs &= ~dd->ipath_lasterror;
431 /* never suppress duplicate hwerrors or ibstatuschange */
432 dd->ipath_lasterror |= errs &
433 ~(INFINIPATH_E_HARDWARE |
434 INFINIPATH_E_IBSTATUSCHANGED);
435 }
436 if (!errs)
437 return;
438
439 if (!noprint)
440 /*
441 * the ones we mask off are handled specially below or above
442 */
443 ipath_decode_err(msg, sizeof msg,
444 errs & ~(INFINIPATH_E_IBSTATUSCHANGED |
445 INFINIPATH_E_RRCVEGRFULL |
446 INFINIPATH_E_RRCVHDRFULL |
447 INFINIPATH_E_HARDWARE));
448 else
449 /* so we don't need if (!noprint) at strlcat's below */
450 *msg = 0;
451
452 if (errs & E_SUM_PKTERRS) {
453 ipath_stats.sps_pkterrs++;
454 chkerrpkts = 1;
455 }
456 if (errs & E_SUM_ERRS)
457 ipath_stats.sps_errs++;
458
459 if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
460 ipath_stats.sps_crcerrs++;
461 chkerrpkts = 1;
462 }
463
464 /*
465 * We don't want to print these two as they happen, or we can make
466 * the situation even worse, because it takes so long to print
467 * messages to serial consoles. Kernel ports get printed from
468 * fast_stats, no more than every 5 seconds, user ports get printed
469 * on close
470 */
471 if (errs & INFINIPATH_E_RRCVHDRFULL) {
472 int any;
473 u32 hd, tl;
474 ipath_stats.sps_hdrqfull++;
475 for (any = i = 0; i < dd->ipath_cfgports; i++) {
476 struct ipath_portdata *pd = dd->ipath_pd[i];
477 if (i == 0) {
478 hd = dd->ipath_port0head;
479 tl = (u32) le64_to_cpu(
480 *dd->ipath_hdrqtailptr);
481 } else if (pd && pd->port_cnt &&
482 pd->port_rcvhdrtail_kvaddr) {
483 /*
484 * don't report same point multiple times,
485 * except kernel
486 */
487 tl = (u32) * pd->port_rcvhdrtail_kvaddr;
488 if (tl == dd->ipath_lastrcvhdrqtails[i])
489 continue;
490 hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
491 i);
492 } else
493 continue;
494 if (hd == (tl + 1) ||
495 (!hd && tl == dd->ipath_hdrqlast)) {
496 dd->ipath_lastrcvhdrqtails[i] = tl;
497 pd->port_hdrqfull++;
498 if (i == 0)
499 chkerrpkts = 1;
500 }
501 }
502 }
503 if (errs & INFINIPATH_E_RRCVEGRFULL) {
504 /*
505 * since this is of less importance and not likely to
506 * happen without also getting hdrfull, only count
507 * occurrences; don't check each port (or even the kernel
508 * vs user)
509 */
510 ipath_stats.sps_etidfull++;
511 if (dd->ipath_port0head !=
512 (u32) le64_to_cpu(*dd->ipath_hdrqtailptr))
513 chkerrpkts = 1;
514 }
515
516 /*
517 * do this before IBSTATUSCHANGED, in case both bits set in a single
518 * interrupt; we want the STATUSCHANGE to "win", so we do our
519 * internal copy of state machine correctly
520 */
521 if (errs & INFINIPATH_E_RIBLOSTLINK) {
522 /*
523 * force through block below
524 */
525 errs |= INFINIPATH_E_IBSTATUSCHANGED;
526 ipath_stats.sps_iblink++;
527 dd->ipath_flags |= IPATH_LINKDOWN;
528 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
529 | IPATH_LINKARMED | IPATH_LINKACTIVE);
530 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
531 if (!noprint) {
532 u64 st = ipath_read_kreg64(
533 dd, dd->ipath_kregs->kr_ibcstatus);
534
535 ipath_dbg("Lost link, link now down (%s)\n",
536 ipath_ibcstatus_str[st & 0xf]);
537 }
538 }
539 if (errs & INFINIPATH_E_IBSTATUSCHANGED)
540 handle_e_ibstatuschanged(dd, errs, noprint);
541
542 if (errs & INFINIPATH_E_RESET) {
543 if (!noprint)
544 ipath_dev_err(dd, "Got reset, requires re-init "
545 "(unload and reload driver)\n");
546 dd->ipath_flags &= ~IPATH_INITTED; /* needs re-init */
547 /* mark as having had error */
548 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
549 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
550 }
551
552 if (!noprint && *msg)
553 ipath_dev_err(dd, "%s error\n", msg);
554 if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
555 ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
556 "waking\n", dd->ipath_sma_state_wanted,
557 dd->ipath_flags);
558 wake_up_interruptible(&ipath_sma_state_wait);
559 }
560
561 if (chkerrpkts)
562 /* process possible error packets in hdrq */
563 ipath_kreceive(dd);
564}
565
566/* this is separate to allow for better optimization of ipath_intr() */
567
568static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
569{
570 /*
571 * sometimes happen during driver init and unload, don't want
572 * to process any interrupts at that point
573 */
574
575 /* this is just a bandaid, not a fix, if something goes badly
576 * wrong */
577 if (++*unexpectp > 100) {
578 if (++*unexpectp > 105) {
579 /*
580 * ok, we must be taking somebody else's interrupts,
581 * due to a messed up mptable and/or PIRQ table, so
582 * unregister the interrupt. We've seen this during
583 * linuxbios development work, and it may happen in
584 * the future again.
585 */
586 if (dd->pcidev && dd->pcidev->irq) {
587 ipath_dev_err(dd, "Now %u unexpected "
588 "interrupts, unregistering "
589 "interrupt handler\n",
590 *unexpectp);
591 ipath_dbg("free_irq of irq %x\n",
592 dd->pcidev->irq);
593 free_irq(dd->pcidev->irq, dd);
594 }
595 }
596 if (ipath_read_kreg32(dd, dd->ipath_kregs->kr_intmask)) {
597 ipath_dev_err(dd, "%u unexpected interrupts, "
598 "disabling interrupts completely\n",
599 *unexpectp);
600 /*
601 * disable all interrupts, something is very wrong
602 */
603 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
604 0ULL);
605 }
606 } else if (*unexpectp > 1)
607 ipath_dbg("Interrupt when not ready, should not happen, "
608 "ignoring\n");
609}
610
611static void ipath_bad_regread(struct ipath_devdata *dd)
612{
613 static int allbits;
614
615 /* separate routine, for better optimization of ipath_intr() */
616
617 /*
618 * We print the message and disable interrupts, in hope of
619 * having a better chance of debugging the problem.
620 */
621 ipath_dev_err(dd,
622 "Read of interrupt status failed (all bits set)\n");
623 if (allbits++) {
624 /* disable all interrupts, something is very wrong */
625 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
626 if (allbits == 2) {
627 ipath_dev_err(dd, "Still bad interrupt status, "
628 "unregistering interrupt\n");
629 free_irq(dd->pcidev->irq, dd);
630 } else if (allbits > 2) {
631 if ((allbits % 10000) == 0)
632 printk(".");
633 } else
634 ipath_dev_err(dd, "Disabling interrupts, "
635 "multiple errors\n");
636 }
637}
638
639static void handle_port_pioavail(struct ipath_devdata *dd)
640{
641 u32 i;
642 /*
643 * start from port 1, since for now port 0 is never using
644 * wait_event for PIO
645 */
646 for (i = 1; dd->ipath_portpiowait && i < dd->ipath_cfgports; i++) {
647 struct ipath_portdata *pd = dd->ipath_pd[i];
648
649 if (pd && pd->port_cnt &&
650 dd->ipath_portpiowait & (1U << i)) {
651 clear_bit(i, &dd->ipath_portpiowait);
652 if (test_bit(IPATH_PORT_WAITING_PIO,
653 &pd->port_flag)) {
654 clear_bit(IPATH_PORT_WAITING_PIO,
655 &pd->port_flag);
656 wake_up_interruptible(&pd->port_wait);
657 }
658 }
659 }
660}
661
662static void handle_layer_pioavail(struct ipath_devdata *dd)
663{
664 int ret;
665
666 ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
667 if (ret > 0)
668 goto clear;
669
670 ret = __ipath_verbs_piobufavail(dd);
671 if (ret > 0)
672 goto clear;
673
674 return;
675clear:
676 set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
677 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
678 dd->ipath_sendctrl);
679}
680
681static void handle_rcv(struct ipath_devdata *dd, u32 istat)
682{
683 u64 portr;
684 int i;
685 int rcvdint = 0;
686
687 portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
688 infinipath_i_rcvavail_mask)
689 | ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
690 infinipath_i_rcvurg_mask);
691 for (i = 0; i < dd->ipath_cfgports; i++) {
692 struct ipath_portdata *pd = dd->ipath_pd[i];
693 if (portr & (1 << i) && pd &&
694 pd->port_cnt) {
695 if (i == 0)
696 ipath_kreceive(dd);
697 else if (test_bit(IPATH_PORT_WAITING_RCV,
698 &pd->port_flag)) {
699 int rcbit;
700 clear_bit(IPATH_PORT_WAITING_RCV,
701 &pd->port_flag);
702 rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
703 clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
704 wake_up_interruptible(&pd->port_wait);
705 rcvdint = 1;
706 }
707 }
708 }
709 if (rcvdint) {
710 /* only want to take one interrupt, so turn off the rcv
711 * interrupt for all the ports that we did the wakeup on
712 * (but never for kernel port)
713 */
714 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
715 dd->ipath_rcvctrl);
716 }
717}
718
719irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
720{
721 struct ipath_devdata *dd = data;
722 u32 istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
723 ipath_err_t estat = 0;
724 static unsigned unexpected = 0;
725 irqreturn_t ret;
726
727 if (unlikely(!istat)) {
728 ipath_stats.sps_nullintr++;
729 ret = IRQ_NONE; /* not our interrupt, or already handled */
730 goto bail;
731 }
732 if (unlikely(istat == -1)) {
733 ipath_bad_regread(dd);
734 /* don't know if it was our interrupt or not */
735 ret = IRQ_NONE;
736 goto bail;
737 }
738
739 ipath_stats.sps_ints++;
740
741 /*
742 * this needs to be flags&initted, not statusp, so we keep
743 * taking interrupts even after link goes down, etc.
744 * Also, we *must* clear the interrupt at some point, or we won't
745 * take it again, which can be real bad for errors, etc...
746 */
747
748 if (!(dd->ipath_flags & IPATH_INITTED)) {
749 ipath_bad_intr(dd, &unexpected);
750 ret = IRQ_NONE;
751 goto bail;
752 }
753 if (unexpected)
754 unexpected = 0;
755
756 ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
757
758 if (istat & ~infinipath_i_bitsextant)
759 ipath_dev_err(dd,
760 "interrupt with unknown interrupts %x set\n",
761 istat & (u32) ~ infinipath_i_bitsextant);
762
763 if (istat & INFINIPATH_I_ERROR) {
764 ipath_stats.sps_errints++;
765 estat = ipath_read_kreg64(dd,
766 dd->ipath_kregs->kr_errorstatus);
767 if (!estat)
768 dev_info(&dd->pcidev->dev, "error interrupt (%x), "
769 "but no error bits set!\n", istat);
770 else if (estat == -1LL)
771 /*
772 * should we try clearing all, or hope next read
773 * works?
774 */
775 ipath_dev_err(dd, "Read of error status failed "
776 "(all bits set); ignoring\n");
777 else
778 handle_errors(dd, estat);
779 }
780
781 if (istat & INFINIPATH_I_GPIO) {
782 if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
783 u32 gpiostatus;
784 gpiostatus = ipath_read_kreg32(
785 dd, dd->ipath_kregs->kr_gpio_status);
786 ipath_dbg("Unexpected GPIO interrupt bits %x\n",
787 gpiostatus);
788 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
789 gpiostatus);
790 }
791 else {
792 /* Clear GPIO status bit 2 */
793 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
794 (u64) (1 << 2));
795
796 /*
797 * Packets are available in the port 0 rcv queue.
798 * Eventually this needs to be generalized to check
799 * IPATH_GPIO_INTR, and the specific GPIO bit, if
800 * GPIO interrupts are used for anything else.
801 */
802 ipath_kreceive(dd);
803 }
804 }
805
806 /*
807 * clear the ones we will deal with on this round
808 * We clear it early, mostly for receive interrupts, so we
809 * know the chip will have seen this by the time we process
810 * the queue, and will re-interrupt if necessary. The processor
811 * itself won't take the interrupt again until we return.
812 */
813 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
814
815 if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
816 clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
817 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
818 dd->ipath_sendctrl);
819
820 if (dd->ipath_portpiowait)
821 handle_port_pioavail(dd);
822
823 handle_layer_pioavail(dd);
824 }
825
826 /*
827 * we check for both transition from empty to non-empty, and urgent
828 * packets (those with the interrupt bit set in the header)
829 */
830
831 if (istat & ((infinipath_i_rcvavail_mask <<
832 INFINIPATH_I_RCVAVAIL_SHIFT)
833 | (infinipath_i_rcvurg_mask <<
834 INFINIPATH_I_RCVURG_SHIFT)))
835 handle_rcv(dd, istat);
836
837 ret = IRQ_HANDLED;
838
839bail:
840 return ret;
841}
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
new file mode 100644
index 000000000000..fe209137ee74
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -0,0 +1,303 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/pci.h>
34
35#include "ipath_kernel.h"
36
37struct infinipath_stats ipath_stats;
38
39/**
40 * ipath_snap_cntr - snapshot a chip counter
41 * @dd: the infinipath device
42 * @creg: the counter to snapshot
43 *
44 * called from add_timer and user counter read calls, to deal with
45 * counters that wrap in "human time". The words sent and received, and
46 * the packets sent and received are all that we worry about. For now,
47 * at least, we don't worry about error counters, because if they wrap
48 * that quickly, we probably don't care. We may eventually just make this
49 * handle all the counters. word counters can wrap in about 20 seconds
50 * of full bandwidth traffic, packet counters in a few hours.
51 */
52
53u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
54{
55 u32 val, reg64 = 0;
56 u64 val64;
57 unsigned long t0, t1;
58 u64 ret;
59
60 t0 = jiffies;
61 /* If fast increment counters are only 32 bits, snapshot them,
62 * and maintain them as 64bit values in the driver */
63 if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
64 (creg == dd->ipath_cregs->cr_wordsendcnt ||
65 creg == dd->ipath_cregs->cr_wordrcvcnt ||
66 creg == dd->ipath_cregs->cr_pktsendcnt ||
67 creg == dd->ipath_cregs->cr_pktrcvcnt)) {
68 val64 = ipath_read_creg(dd, creg);
69 val = val64 == ~0ULL ? ~0U : 0;
70 reg64 = 1;
71 } else /* val64 just to keep gcc quiet... */
72 val64 = val = ipath_read_creg32(dd, creg);
73 /*
74 * See if a second has passed. This is just a way to detect things
75 * that are quite broken. Normally this should take just a few
76 * cycles (the check is for long enough that we don't care if we get
77 * pre-empted.) An Opteron HT O read timeout is 4 seconds with
78 * normal NB values
79 */
80 t1 = jiffies;
81 if (time_before(t0 + HZ, t1) && val == -1) {
82 ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n",
83 creg);
84 ret = 0ULL;
85 goto bail;
86 }
87 if (reg64) {
88 ret = val64;
89 goto bail;
90 }
91
92 if (creg == dd->ipath_cregs->cr_wordsendcnt) {
93 if (val != dd->ipath_lastsword) {
94 dd->ipath_sword += val - dd->ipath_lastsword;
95 dd->ipath_lastsword = val;
96 }
97 val64 = dd->ipath_sword;
98 } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
99 if (val != dd->ipath_lastrword) {
100 dd->ipath_rword += val - dd->ipath_lastrword;
101 dd->ipath_lastrword = val;
102 }
103 val64 = dd->ipath_rword;
104 } else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
105 if (val != dd->ipath_lastspkts) {
106 dd->ipath_spkts += val - dd->ipath_lastspkts;
107 dd->ipath_lastspkts = val;
108 }
109 val64 = dd->ipath_spkts;
110 } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
111 if (val != dd->ipath_lastrpkts) {
112 dd->ipath_rpkts += val - dd->ipath_lastrpkts;
113 dd->ipath_lastrpkts = val;
114 }
115 val64 = dd->ipath_rpkts;
116 } else
117 val64 = (u64) val;
118
119 ret = val64;
120
121bail:
122 return ret;
123}
124
125/**
126 * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
127 * @dd: the infinipath device
128 *
129 * print the delta of egrfull/hdrqfull errors for kernel ports no more than
130 * every 5 seconds. User processes are printed at close, but kernel doesn't
131 * close, so... Separate routine so may call from other places someday, and
132 * so function name when printed by _IPATH_INFO is meaningfull
133 */
134static void ipath_qcheck(struct ipath_devdata *dd)
135{
136 static u64 last_tot_hdrqfull;
137 size_t blen = 0;
138 char buf[128];
139
140 *buf = 0;
141 if (dd->ipath_pd[0]->port_hdrqfull != dd->ipath_p0_hdrqfull) {
142 blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
143 dd->ipath_pd[0]->port_hdrqfull -
144 dd->ipath_p0_hdrqfull);
145 dd->ipath_p0_hdrqfull = dd->ipath_pd[0]->port_hdrqfull;
146 }
147 if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
148 blen += snprintf(buf + blen, sizeof buf - blen,
149 "%srcvegrfull %llu",
150 blen ? ", " : "",
151 (unsigned long long)
152 (ipath_stats.sps_etidfull -
153 dd->ipath_last_tidfull));
154 dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
155 }
156
157 /*
158 * this is actually the number of hdrq full interrupts, not actual
159 * events, but at the moment that's mostly what I'm interested in.
160 * Actual count, etc. is in the counters, if needed. For production
161 * users this won't ordinarily be printed.
162 */
163
164 if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
165 ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
166 blen += snprintf(buf + blen, sizeof buf - blen,
167 "%shdrqfull %llu (all ports)",
168 blen ? ", " : "",
169 (unsigned long long)
170 (ipath_stats.sps_hdrqfull -
171 last_tot_hdrqfull));
172 last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
173 }
174 if (blen)
175 ipath_dbg("%s\n", buf);
176
177 if (dd->ipath_port0head != (u32)
178 le64_to_cpu(*dd->ipath_hdrqtailptr)) {
179 if (dd->ipath_lastport0rcv_cnt ==
180 ipath_stats.sps_port0pkts) {
181 ipath_cdbg(PKT, "missing rcv interrupts? "
182 "port0 hd=%llx tl=%x; port0pkts %llx\n",
183 (unsigned long long)
184 le64_to_cpu(*dd->ipath_hdrqtailptr),
185 dd->ipath_port0head,
186 (unsigned long long)
187 ipath_stats.sps_port0pkts);
188 ipath_kreceive(dd);
189 }
190 dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
191 }
192}
193
194/**
195 * ipath_get_faststats - get word counters from chip before they overflow
196 * @opaque - contains a pointer to the infinipath device ipath_devdata
197 *
198 * called from add_timer
199 */
200void ipath_get_faststats(unsigned long opaque)
201{
202 struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
203 u32 val;
204 static unsigned cnt;
205
206 /*
207 * don't access the chip while running diags, or memory diags can
208 * fail
209 */
210 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) ||
211 ipath_diag_inuse)
212 /* but re-arm the timer, for diags case; won't hurt other */
213 goto done;
214
215 if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
216 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
217 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
218 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
219 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
220 }
221
222 ipath_qcheck(dd);
223
224 /*
225 * deal with repeat error suppression. Doesn't really matter if
226 * last error was almost a full interval ago, or just a few usecs
227 * ago; still won't get more than 2 per interval. We may want
228 * longer intervals for this eventually, could do with mod, counter
229 * or separate timer. Also see code in ipath_handle_errors() and
230 * ipath_handle_hwerrors().
231 */
232
233 if (dd->ipath_lasterror)
234 dd->ipath_lasterror = 0;
235 if (dd->ipath_lasthwerror)
236 dd->ipath_lasthwerror = 0;
237 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
238 && time_after(jiffies, dd->ipath_unmasktime)) {
239 char ebuf[256];
240 ipath_decode_err(ebuf, sizeof ebuf,
241 (dd->ipath_maskederrs & ~dd->
242 ipath_ignorederrs));
243 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
244 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
245 ipath_dev_err(dd, "Re-enabling masked errors "
246 "(%s)\n", ebuf);
247 else {
248 /*
249 * rcvegrfull and rcvhdrqfull are "normal", for some
250 * types of processes (mostly benchmarks) that send
251 * huge numbers of messages, while not processing
252 * them. So only complain about these at debug
253 * level.
254 */
255 ipath_dbg("Disabling frequent queue full errors "
256 "(%s)\n", ebuf);
257 }
258 dd->ipath_maskederrs = dd->ipath_ignorederrs;
259 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
260 ~dd->ipath_maskederrs);
261 }
262
263 /* limit qfull messages to ~one per minute per port */
264 if ((++cnt & 0x10)) {
265 for (val = dd->ipath_cfgports - 1; ((int)val) >= 0;
266 val--) {
267 if (dd->ipath_lastegrheads[val] != -1)
268 dd->ipath_lastegrheads[val] = -1;
269 if (dd->ipath_lastrcvhdrqtails[val] != -1)
270 dd->ipath_lastrcvhdrqtails[val] = -1;
271 }
272 }
273
274 if (dd->ipath_nosma_bufs) {
275 dd->ipath_nosma_secs += 5;
276 if (dd->ipath_nosma_secs >= 30) {
277 ipath_cdbg(SMA, "No SMA bufs avail %u seconds; "
278 "cancelling pending sends\n",
279 dd->ipath_nosma_secs);
280 /*
281 * issue an abort as well, in case we have a packet
282 * stuck in launch fifo. This could corrupt an
283 * outgoing user packet in the worst case,
284 * but this is a pretty catastrophic, anyway.
285 */
286 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
287 INFINIPATH_S_ABORT);
288 ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
289 dd->ipath_piobcnt2k +
290 dd->ipath_piobcnt4k -
291 dd->ipath_lastport_piobuf);
292 /* start again, if necessary */
293 dd->ipath_nosma_secs = 0;
294 } else
295 ipath_cdbg(SMA, "No SMA bufs avail %u tries, "
296 "after %u seconds\n",
297 dd->ipath_nosma_bufs,
298 dd->ipath_nosma_secs);
299 }
300
301done:
302 mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
303}
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
new file mode 100644
index 000000000000..adc5322f15c1
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -0,0 +1,157 @@
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33/*
34 * This file is conditionally built on x86_64 only. Otherwise weak symbol
35 * versions of the functions exported from here are used.
36 */
37
38#include <linux/pci.h>
39#include <asm/mtrr.h>
40#include <asm/processor.h>
41
42#include "ipath_kernel.h"
43
44/**
45 * ipath_enable_wc - enable write combining for MMIO writes to the device
46 * @dd: infinipath device
47 *
48 * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
49 * write combining.
50 */
51int ipath_enable_wc(struct ipath_devdata *dd)
52{
53 int ret = 0;
54 u64 pioaddr, piolen;
55 unsigned bits;
56 const unsigned long addr = pci_resource_start(dd->pcidev, 0);
57 const size_t len = pci_resource_len(dd->pcidev, 0);
58
59 /*
60 * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
61 * chip. Linux (possibly the hardware) requires it to be on a power
62 * of 2 address matching the length (which has to be a power of 2).
63 * For rev1, that means the base address, for rev2, it will be just
64 * the PIO buffers themselves.
65 */
66 pioaddr = addr + dd->ipath_piobufbase;
67 piolen = (dd->ipath_piobcnt2k +
68 dd->ipath_piobcnt4k) *
69 ALIGN(dd->ipath_piobcnt2k +
70 dd->ipath_piobcnt4k, dd->ipath_palign);
71
72 for (bits = 0; !(piolen & (1ULL << bits)); bits++)
73 /* do nothing */ ;
74
75 if (piolen != (1ULL << bits)) {
76 piolen >>= bits;
77 while (piolen >>= 1)
78 bits++;
79 piolen = 1ULL << (bits + 1);
80 }
81 if (pioaddr & (piolen - 1)) {
82 u64 atmp;
83 ipath_dbg("pioaddr %llx not on right boundary for size "
84 "%llx, fixing\n",
85 (unsigned long long) pioaddr,
86 (unsigned long long) piolen);
87 atmp = pioaddr & ~(piolen - 1);
88 if (atmp < addr || (atmp + piolen) > (addr + len)) {
89 ipath_dev_err(dd, "No way to align address/size "
90 "(%llx/%llx), no WC mtrr\n",
91 (unsigned long long) atmp,
92 (unsigned long long) piolen << 1);
93 ret = -ENODEV;
94 } else {
95 ipath_dbg("changing WC base from %llx to %llx, "
96 "len from %llx to %llx\n",
97 (unsigned long long) pioaddr,
98 (unsigned long long) atmp,
99 (unsigned long long) piolen,
100 (unsigned long long) piolen << 1);
101 pioaddr = atmp;
102 piolen <<= 1;
103 }
104 }
105
106 if (!ret) {
107 int cookie;
108 ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC "
109 "(addr %llx, len=0x%llx)\n",
110 (unsigned long long) pioaddr,
111 (unsigned long long) piolen);
112 cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
113 if (cookie < 0) {
114 {
115 dev_info(&dd->pcidev->dev,
116 "mtrr_add() WC for PIO bufs "
117 "failed (%d)\n",
118 cookie);
119 ret = -EINVAL;
120 }
121 } else {
122 ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
123 "cookie is %d\n", cookie);
124 dd->ipath_wc_cookie = cookie;
125 }
126 }
127
128 return ret;
129}
130
131/**
132 * ipath_disable_wc - disable write combining for MMIO writes to the device
133 * @dd: infinipath device
134 */
135void ipath_disable_wc(struct ipath_devdata *dd)
136{
137 if (dd->ipath_wc_cookie) {
138 ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
139 mtrr_del(dd->ipath_wc_cookie, 0, 0);
140 dd->ipath_wc_cookie = 0;
141 }
142}
143
144/**
145 * ipath_unordered_wc - indicate whether write combining is ordered
146 *
147 * Because our performance depends on our ability to do write combining mmio
148 * writes in the most efficient way, we need to know if we are on an Intel
149 * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in
150 * the order completed, and so no special flushing is required to get
151 * correct ordering. Intel processors, however, will flush write buffers
152 * out in "random" orders, and so explicit ordering is needed at times.
153 */
154int ipath_unordered_wc(void)
155{
156 return boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
157}