diff options
Diffstat (limited to 'drivers/infiniband/hw/qib/qib_pcie.c')
-rw-r--r-- | drivers/infiniband/hw/qib/qib_pcie.c | 738 |
1 files changed, 738 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c new file mode 100644 index 000000000000..c926bf4541df --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_pcie.c | |||
@@ -0,0 +1,738 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2008, 2009 QLogic Corporation. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/pci.h> | ||
34 | #include <linux/io.h> | ||
35 | #include <linux/delay.h> | ||
36 | #include <linux/vmalloc.h> | ||
37 | #include <linux/aer.h> | ||
38 | |||
39 | #include "qib.h" | ||
40 | |||
41 | /* | ||
42 | * This file contains PCIe utility routines that are common to the | ||
43 | * various QLogic InfiniPath adapters | ||
44 | */ | ||
45 | |||
46 | /* | ||
47 | * Code to adjust PCIe capabilities. | ||
48 | * To minimize the change footprint, we call it | ||
49 | * from qib_pcie_params, which every chip-specific | ||
50 | * file calls, even though this violates some | ||
51 | * expectations of harmlessness. | ||
52 | */ | ||
53 | static int qib_tune_pcie_caps(struct qib_devdata *); | ||
54 | static int qib_tune_pcie_coalesce(struct qib_devdata *); | ||
55 | |||
56 | /* | ||
57 | * Do all the common PCIe setup and initialization. | ||
58 | * devdata is not yet allocated, and is not allocated until after this | ||
59 | * routine returns success. Therefore qib_dev_err() can't be used for error | ||
60 | * printing. | ||
61 | */ | ||
62 | int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) | ||
63 | { | ||
64 | int ret; | ||
65 | |||
66 | ret = pci_enable_device(pdev); | ||
67 | if (ret) { | ||
68 | /* | ||
69 | * This can happen (in theory) iff: | ||
70 | * We did a chip reset, and then failed to reprogram the | ||
71 | * BAR, or the chip reset due to an internal error. We then | ||
72 | * unloaded the driver and reloaded it. | ||
73 | * | ||
74 | * Both reset cases set the BAR back to initial state. For | ||
75 | * the latter case, the AER sticky error bit at offset 0x718 | ||
76 | * should be set, but the Linux kernel doesn't yet know | ||
77 | * about that, it appears. If the original BAR was retained | ||
78 | * in the kernel data structures, this may be OK. | ||
79 | */ | ||
80 | qib_early_err(&pdev->dev, "pci enable failed: error %d\n", | ||
81 | -ret); | ||
82 | goto done; | ||
83 | } | ||
84 | |||
85 | ret = pci_request_regions(pdev, QIB_DRV_NAME); | ||
86 | if (ret) { | ||
87 | qib_devinfo(pdev, "pci_request_regions fails: err %d\n", -ret); | ||
88 | goto bail; | ||
89 | } | ||
90 | |||
91 | ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); | ||
92 | if (ret) { | ||
93 | /* | ||
94 | * If the 64 bit setup fails, try 32 bit. Some systems | ||
95 | * do not setup 64 bit maps on systems with 2GB or less | ||
96 | * memory installed. | ||
97 | */ | ||
98 | ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); | ||
99 | if (ret) { | ||
100 | qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret); | ||
101 | goto bail; | ||
102 | } | ||
103 | ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); | ||
104 | } else | ||
105 | ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); | ||
106 | if (ret) | ||
107 | qib_early_err(&pdev->dev, | ||
108 | "Unable to set DMA consistent mask: %d\n", ret); | ||
109 | |||
110 | pci_set_master(pdev); | ||
111 | ret = pci_enable_pcie_error_reporting(pdev); | ||
112 | if (ret) | ||
113 | qib_early_err(&pdev->dev, | ||
114 | "Unable to enable pcie error reporting: %d\n", | ||
115 | ret); | ||
116 | goto done; | ||
117 | |||
118 | bail: | ||
119 | pci_disable_device(pdev); | ||
120 | pci_release_regions(pdev); | ||
121 | done: | ||
122 | return ret; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Do remaining PCIe setup, once dd is allocated, and save away | ||
127 | * fields required to re-initialize after a chip reset, or for | ||
128 | * various other purposes | ||
129 | */ | ||
130 | int qib_pcie_ddinit(struct qib_devdata *dd, struct pci_dev *pdev, | ||
131 | const struct pci_device_id *ent) | ||
132 | { | ||
133 | unsigned long len; | ||
134 | resource_size_t addr; | ||
135 | |||
136 | dd->pcidev = pdev; | ||
137 | pci_set_drvdata(pdev, dd); | ||
138 | |||
139 | addr = pci_resource_start(pdev, 0); | ||
140 | len = pci_resource_len(pdev, 0); | ||
141 | |||
142 | #if defined(__powerpc__) | ||
143 | /* There isn't a generic way to specify writethrough mappings */ | ||
144 | dd->kregbase = __ioremap(addr, len, _PAGE_NO_CACHE | _PAGE_WRITETHRU); | ||
145 | #else | ||
146 | dd->kregbase = ioremap_nocache(addr, len); | ||
147 | #endif | ||
148 | |||
149 | if (!dd->kregbase) | ||
150 | return -ENOMEM; | ||
151 | |||
152 | dd->kregend = (u64 __iomem *)((void __iomem *) dd->kregbase + len); | ||
153 | dd->physaddr = addr; /* used for io_remap, etc. */ | ||
154 | |||
155 | /* | ||
156 | * Save BARs to rewrite after device reset. Save all 64 bits of | ||
157 | * BAR, just in case. | ||
158 | */ | ||
159 | dd->pcibar0 = addr; | ||
160 | dd->pcibar1 = addr >> 32; | ||
161 | dd->deviceid = ent->device; /* save for later use */ | ||
162 | dd->vendorid = ent->vendor; | ||
163 | |||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Do PCIe cleanup, after chip-specific cleanup, etc. Just prior | ||
169 | * to releasing the dd memory. | ||
170 | * void because none of the core pcie cleanup returns are void | ||
171 | */ | ||
172 | void qib_pcie_ddcleanup(struct qib_devdata *dd) | ||
173 | { | ||
174 | u64 __iomem *base = (void __iomem *) dd->kregbase; | ||
175 | |||
176 | dd->kregbase = NULL; | ||
177 | iounmap(base); | ||
178 | if (dd->piobase) | ||
179 | iounmap(dd->piobase); | ||
180 | if (dd->userbase) | ||
181 | iounmap(dd->userbase); | ||
182 | |||
183 | pci_disable_device(dd->pcidev); | ||
184 | pci_release_regions(dd->pcidev); | ||
185 | |||
186 | pci_set_drvdata(dd->pcidev, NULL); | ||
187 | } | ||
188 | |||
189 | static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, | ||
190 | struct msix_entry *msix_entry) | ||
191 | { | ||
192 | int ret; | ||
193 | u32 tabsize = 0; | ||
194 | u16 msix_flags; | ||
195 | |||
196 | pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags); | ||
197 | tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE); | ||
198 | if (tabsize > *msixcnt) | ||
199 | tabsize = *msixcnt; | ||
200 | ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); | ||
201 | if (ret > 0) { | ||
202 | tabsize = ret; | ||
203 | ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); | ||
204 | } | ||
205 | if (ret) { | ||
206 | qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, " | ||
207 | "falling back to INTx\n", tabsize, ret); | ||
208 | tabsize = 0; | ||
209 | } | ||
210 | *msixcnt = tabsize; | ||
211 | |||
212 | if (ret) | ||
213 | qib_enable_intx(dd->pcidev); | ||
214 | |||
215 | } | ||
216 | |||
217 | /** | ||
218 | * We save the msi lo and hi values, so we can restore them after | ||
219 | * chip reset (the kernel PCI infrastructure doesn't yet handle that | ||
220 | * correctly. | ||
221 | */ | ||
222 | static int qib_msi_setup(struct qib_devdata *dd, int pos) | ||
223 | { | ||
224 | struct pci_dev *pdev = dd->pcidev; | ||
225 | u16 control; | ||
226 | int ret; | ||
227 | |||
228 | ret = pci_enable_msi(pdev); | ||
229 | if (ret) | ||
230 | qib_dev_err(dd, "pci_enable_msi failed: %d, " | ||
231 | "interrupts may not work\n", ret); | ||
232 | /* continue even if it fails, we may still be OK... */ | ||
233 | |||
234 | pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, | ||
235 | &dd->msi_lo); | ||
236 | pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, | ||
237 | &dd->msi_hi); | ||
238 | pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &control); | ||
239 | /* now save the data (vector) info */ | ||
240 | pci_read_config_word(pdev, pos + ((control & PCI_MSI_FLAGS_64BIT) | ||
241 | ? 12 : 8), | ||
242 | &dd->msi_data); | ||
243 | return ret; | ||
244 | } | ||
245 | |||
246 | int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, | ||
247 | struct msix_entry *entry) | ||
248 | { | ||
249 | u16 linkstat, speed; | ||
250 | int pos = 0, pose, ret = 1; | ||
251 | |||
252 | pose = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP); | ||
253 | if (!pose) { | ||
254 | qib_dev_err(dd, "Can't find PCI Express capability!\n"); | ||
255 | /* set up something... */ | ||
256 | dd->lbus_width = 1; | ||
257 | dd->lbus_speed = 2500; /* Gen1, 2.5GHz */ | ||
258 | goto bail; | ||
259 | } | ||
260 | |||
261 | pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSIX); | ||
262 | if (nent && *nent && pos) { | ||
263 | qib_msix_setup(dd, pos, nent, entry); | ||
264 | ret = 0; /* did it, either MSIx or INTx */ | ||
265 | } else { | ||
266 | pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); | ||
267 | if (pos) | ||
268 | ret = qib_msi_setup(dd, pos); | ||
269 | else | ||
270 | qib_dev_err(dd, "No PCI MSI or MSIx capability!\n"); | ||
271 | } | ||
272 | if (!pos) | ||
273 | qib_enable_intx(dd->pcidev); | ||
274 | |||
275 | pci_read_config_word(dd->pcidev, pose + PCI_EXP_LNKSTA, &linkstat); | ||
276 | /* | ||
277 | * speed is bits 0-3, linkwidth is bits 4-8 | ||
278 | * no defines for them in headers | ||
279 | */ | ||
280 | speed = linkstat & 0xf; | ||
281 | linkstat >>= 4; | ||
282 | linkstat &= 0x1f; | ||
283 | dd->lbus_width = linkstat; | ||
284 | |||
285 | switch (speed) { | ||
286 | case 1: | ||
287 | dd->lbus_speed = 2500; /* Gen1, 2.5GHz */ | ||
288 | break; | ||
289 | case 2: | ||
290 | dd->lbus_speed = 5000; /* Gen1, 5GHz */ | ||
291 | break; | ||
292 | default: /* not defined, assume gen1 */ | ||
293 | dd->lbus_speed = 2500; | ||
294 | break; | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * Check against expected pcie width and complain if "wrong" | ||
299 | * on first initialization, not afterwards (i.e., reset). | ||
300 | */ | ||
301 | if (minw && linkstat < minw) | ||
302 | qib_dev_err(dd, | ||
303 | "PCIe width %u (x%u HCA), performance reduced\n", | ||
304 | linkstat, minw); | ||
305 | |||
306 | qib_tune_pcie_caps(dd); | ||
307 | |||
308 | qib_tune_pcie_coalesce(dd); | ||
309 | |||
310 | bail: | ||
311 | /* fill in string, even on errors */ | ||
312 | snprintf(dd->lbus_info, sizeof(dd->lbus_info), | ||
313 | "PCIe,%uMHz,x%u\n", dd->lbus_speed, dd->lbus_width); | ||
314 | return ret; | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * Setup pcie interrupt stuff again after a reset. I'd like to just call | ||
319 | * pci_enable_msi() again for msi, but when I do that, | ||
320 | * the MSI enable bit doesn't get set in the command word, and | ||
321 | * we switch to to a different interrupt vector, which is confusing, | ||
322 | * so I instead just do it all inline. Perhaps somehow can tie this | ||
323 | * into the PCIe hotplug support at some point | ||
324 | */ | ||
325 | int qib_reinit_intr(struct qib_devdata *dd) | ||
326 | { | ||
327 | int pos; | ||
328 | u16 control; | ||
329 | int ret = 0; | ||
330 | |||
331 | /* If we aren't using MSI, don't restore it */ | ||
332 | if (!dd->msi_lo) | ||
333 | goto bail; | ||
334 | |||
335 | pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); | ||
336 | if (!pos) { | ||
337 | qib_dev_err(dd, "Can't find MSI capability, " | ||
338 | "can't restore MSI settings\n"); | ||
339 | ret = 0; | ||
340 | /* nothing special for MSIx, just MSI */ | ||
341 | goto bail; | ||
342 | } | ||
343 | pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO, | ||
344 | dd->msi_lo); | ||
345 | pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI, | ||
346 | dd->msi_hi); | ||
347 | pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control); | ||
348 | if (!(control & PCI_MSI_FLAGS_ENABLE)) { | ||
349 | control |= PCI_MSI_FLAGS_ENABLE; | ||
350 | pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, | ||
351 | control); | ||
352 | } | ||
353 | /* now rewrite the data (vector) info */ | ||
354 | pci_write_config_word(dd->pcidev, pos + | ||
355 | ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), | ||
356 | dd->msi_data); | ||
357 | ret = 1; | ||
358 | bail: | ||
359 | if (!ret && (dd->flags & QIB_HAS_INTX)) { | ||
360 | qib_enable_intx(dd->pcidev); | ||
361 | ret = 1; | ||
362 | } | ||
363 | |||
364 | /* and now set the pci master bit again */ | ||
365 | pci_set_master(dd->pcidev); | ||
366 | |||
367 | return ret; | ||
368 | } | ||
369 | |||
370 | /* | ||
371 | * Disable msi interrupt if enabled, and clear msi_lo. | ||
372 | * This is used primarily for the fallback to INTx, but | ||
373 | * is also used in reinit after reset, and during cleanup. | ||
374 | */ | ||
375 | void qib_nomsi(struct qib_devdata *dd) | ||
376 | { | ||
377 | dd->msi_lo = 0; | ||
378 | pci_disable_msi(dd->pcidev); | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Same as qib_nosmi, but for MSIx. | ||
383 | */ | ||
384 | void qib_nomsix(struct qib_devdata *dd) | ||
385 | { | ||
386 | pci_disable_msix(dd->pcidev); | ||
387 | } | ||
388 | |||
389 | /* | ||
390 | * Similar to pci_intx(pdev, 1), except that we make sure | ||
391 | * msi(x) is off. | ||
392 | */ | ||
393 | void qib_enable_intx(struct pci_dev *pdev) | ||
394 | { | ||
395 | u16 cw, new; | ||
396 | int pos; | ||
397 | |||
398 | /* first, turn on INTx */ | ||
399 | pci_read_config_word(pdev, PCI_COMMAND, &cw); | ||
400 | new = cw & ~PCI_COMMAND_INTX_DISABLE; | ||
401 | if (new != cw) | ||
402 | pci_write_config_word(pdev, PCI_COMMAND, new); | ||
403 | |||
404 | pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); | ||
405 | if (pos) { | ||
406 | /* then turn off MSI */ | ||
407 | pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw); | ||
408 | new = cw & ~PCI_MSI_FLAGS_ENABLE; | ||
409 | if (new != cw) | ||
410 | pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new); | ||
411 | } | ||
412 | pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); | ||
413 | if (pos) { | ||
414 | /* then turn off MSIx */ | ||
415 | pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw); | ||
416 | new = cw & ~PCI_MSIX_FLAGS_ENABLE; | ||
417 | if (new != cw) | ||
418 | pci_write_config_word(pdev, pos + PCI_MSIX_FLAGS, new); | ||
419 | } | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * These two routines are helper routines for the device reset code | ||
424 | * to move all the pcie code out of the chip-specific driver code. | ||
425 | */ | ||
426 | void qib_pcie_getcmd(struct qib_devdata *dd, u16 *cmd, u8 *iline, u8 *cline) | ||
427 | { | ||
428 | pci_read_config_word(dd->pcidev, PCI_COMMAND, cmd); | ||
429 | pci_read_config_byte(dd->pcidev, PCI_INTERRUPT_LINE, iline); | ||
430 | pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); | ||
431 | } | ||
432 | |||
433 | void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) | ||
434 | { | ||
435 | int r; | ||
436 | r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, | ||
437 | dd->pcibar0); | ||
438 | if (r) | ||
439 | qib_dev_err(dd, "rewrite of BAR0 failed: %d\n", r); | ||
440 | r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, | ||
441 | dd->pcibar1); | ||
442 | if (r) | ||
443 | qib_dev_err(dd, "rewrite of BAR1 failed: %d\n", r); | ||
444 | /* now re-enable memory access, and restore cosmetic settings */ | ||
445 | pci_write_config_word(dd->pcidev, PCI_COMMAND, cmd); | ||
446 | pci_write_config_byte(dd->pcidev, PCI_INTERRUPT_LINE, iline); | ||
447 | pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); | ||
448 | r = pci_enable_device(dd->pcidev); | ||
449 | if (r) | ||
450 | qib_dev_err(dd, "pci_enable_device failed after " | ||
451 | "reset: %d\n", r); | ||
452 | } | ||
453 | |||
454 | /* code to adjust PCIe capabilities. */ | ||
455 | |||
456 | static int fld2val(int wd, int mask) | ||
457 | { | ||
458 | int lsbmask; | ||
459 | |||
460 | if (!mask) | ||
461 | return 0; | ||
462 | wd &= mask; | ||
463 | lsbmask = mask ^ (mask & (mask - 1)); | ||
464 | wd /= lsbmask; | ||
465 | return wd; | ||
466 | } | ||
467 | |||
468 | static int val2fld(int wd, int mask) | ||
469 | { | ||
470 | int lsbmask; | ||
471 | |||
472 | if (!mask) | ||
473 | return 0; | ||
474 | lsbmask = mask ^ (mask & (mask - 1)); | ||
475 | wd *= lsbmask; | ||
476 | return wd; | ||
477 | } | ||
478 | |||
479 | static int qib_pcie_coalesce; | ||
480 | module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); | ||
481 | MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); | ||
482 | |||
483 | /* | ||
484 | * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300 | ||
485 | * chipsets. This is known to be unsafe for some revisions of some | ||
486 | * of these chipsets, with some BIOS settings, and enabling it on those | ||
487 | * systems may result in the system crashing, and/or data corruption. | ||
488 | */ | ||
489 | static int qib_tune_pcie_coalesce(struct qib_devdata *dd) | ||
490 | { | ||
491 | int r; | ||
492 | struct pci_dev *parent; | ||
493 | int ppos; | ||
494 | u16 devid; | ||
495 | u32 mask, bits, val; | ||
496 | |||
497 | if (!qib_pcie_coalesce) | ||
498 | return 0; | ||
499 | |||
500 | /* Find out supported and configured values for parent (root) */ | ||
501 | parent = dd->pcidev->bus->self; | ||
502 | if (parent->bus->parent) { | ||
503 | qib_devinfo(dd->pcidev, "Parent not root\n"); | ||
504 | return 1; | ||
505 | } | ||
506 | ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); | ||
507 | if (!ppos) | ||
508 | return 1; | ||
509 | if (parent->vendor != 0x8086) | ||
510 | return 1; | ||
511 | |||
512 | /* | ||
513 | * - bit 12: Max_rdcmp_Imt_EN: need to set to 1 | ||
514 | * - bit 11: COALESCE_FORCE: need to set to 0 | ||
515 | * - bit 10: COALESCE_EN: need to set to 1 | ||
516 | * (but limitations on some on some chipsets) | ||
517 | * | ||
518 | * On the Intel 5000, 5100, and 7300 chipsets, there is | ||
519 | * also: - bit 25:24: COALESCE_MODE, need to set to 0 | ||
520 | */ | ||
521 | devid = parent->device; | ||
522 | if (devid >= 0x25e2 && devid <= 0x25fa) { | ||
523 | u8 rev; | ||
524 | |||
525 | /* 5000 P/V/X/Z */ | ||
526 | pci_read_config_byte(parent, PCI_REVISION_ID, &rev); | ||
527 | if (rev <= 0xb2) | ||
528 | bits = 1U << 10; | ||
529 | else | ||
530 | bits = 7U << 10; | ||
531 | mask = (3U << 24) | (7U << 10); | ||
532 | } else if (devid >= 0x65e2 && devid <= 0x65fa) { | ||
533 | /* 5100 */ | ||
534 | bits = 1U << 10; | ||
535 | mask = (3U << 24) | (7U << 10); | ||
536 | } else if (devid >= 0x4021 && devid <= 0x402e) { | ||
537 | /* 5400 */ | ||
538 | bits = 7U << 10; | ||
539 | mask = 7U << 10; | ||
540 | } else if (devid >= 0x3604 && devid <= 0x360a) { | ||
541 | /* 7300 */ | ||
542 | bits = 7U << 10; | ||
543 | mask = (3U << 24) | (7U << 10); | ||
544 | } else { | ||
545 | /* not one of the chipsets that we know about */ | ||
546 | return 1; | ||
547 | } | ||
548 | pci_read_config_dword(parent, 0x48, &val); | ||
549 | val &= ~mask; | ||
550 | val |= bits; | ||
551 | r = pci_write_config_dword(parent, 0x48, val); | ||
552 | return 0; | ||
553 | } | ||
554 | |||
555 | /* | ||
556 | * BIOS may not set PCIe bus-utilization parameters for best performance. | ||
557 | * Check and optionally adjust them to maximize our throughput. | ||
558 | */ | ||
559 | static int qib_pcie_caps; | ||
560 | module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); | ||
561 | MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (4lsb), ReadReq (D4..7)"); | ||
562 | |||
563 | static int qib_tune_pcie_caps(struct qib_devdata *dd) | ||
564 | { | ||
565 | int ret = 1; /* Assume the worst */ | ||
566 | struct pci_dev *parent; | ||
567 | int ppos, epos; | ||
568 | u16 pcaps, pctl, ecaps, ectl; | ||
569 | int rc_sup, ep_sup; | ||
570 | int rc_cur, ep_cur; | ||
571 | |||
572 | /* Find out supported and configured values for parent (root) */ | ||
573 | parent = dd->pcidev->bus->self; | ||
574 | if (parent->bus->parent) { | ||
575 | qib_devinfo(dd->pcidev, "Parent not root\n"); | ||
576 | goto bail; | ||
577 | } | ||
578 | ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); | ||
579 | if (ppos) { | ||
580 | pci_read_config_word(parent, ppos + PCI_EXP_DEVCAP, &pcaps); | ||
581 | pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); | ||
582 | } else | ||
583 | goto bail; | ||
584 | /* Find out supported and configured values for endpoint (us) */ | ||
585 | epos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP); | ||
586 | if (epos) { | ||
587 | pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCAP, &ecaps); | ||
588 | pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, &ectl); | ||
589 | } else | ||
590 | goto bail; | ||
591 | ret = 0; | ||
592 | /* Find max payload supported by root, endpoint */ | ||
593 | rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD); | ||
594 | ep_sup = fld2val(ecaps, PCI_EXP_DEVCAP_PAYLOAD); | ||
595 | if (rc_sup > ep_sup) | ||
596 | rc_sup = ep_sup; | ||
597 | |||
598 | rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_PAYLOAD); | ||
599 | ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD); | ||
600 | |||
601 | /* If Supported greater than limit in module param, limit it */ | ||
602 | if (rc_sup > (qib_pcie_caps & 7)) | ||
603 | rc_sup = qib_pcie_caps & 7; | ||
604 | /* If less than (allowed, supported), bump root payload */ | ||
605 | if (rc_sup > rc_cur) { | ||
606 | rc_cur = rc_sup; | ||
607 | pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) | | ||
608 | val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD); | ||
609 | pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl); | ||
610 | } | ||
611 | /* If less than (allowed, supported), bump endpoint payload */ | ||
612 | if (rc_sup > ep_cur) { | ||
613 | ep_cur = rc_sup; | ||
614 | ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) | | ||
615 | val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD); | ||
616 | pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl); | ||
617 | } | ||
618 | |||
619 | /* | ||
620 | * Now the Read Request size. | ||
621 | * No field for max supported, but PCIe spec limits it to 4096, | ||
622 | * which is code '5' (log2(4096) - 7) | ||
623 | */ | ||
624 | rc_sup = 5; | ||
625 | if (rc_sup > ((qib_pcie_caps >> 4) & 7)) | ||
626 | rc_sup = (qib_pcie_caps >> 4) & 7; | ||
627 | rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ); | ||
628 | ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ); | ||
629 | |||
630 | if (rc_sup > rc_cur) { | ||
631 | rc_cur = rc_sup; | ||
632 | pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) | | ||
633 | val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ); | ||
634 | pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl); | ||
635 | } | ||
636 | if (rc_sup > ep_cur) { | ||
637 | ep_cur = rc_sup; | ||
638 | ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) | | ||
639 | val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ); | ||
640 | pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl); | ||
641 | } | ||
642 | bail: | ||
643 | return ret; | ||
644 | } | ||
645 | /* End of PCIe capability tuning */ | ||
646 | |||
647 | /* | ||
648 | * From here through qib_pci_err_handler definition is invoked via | ||
649 | * PCI error infrastructure, registered via pci | ||
650 | */ | ||
651 | static pci_ers_result_t | ||
652 | qib_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) | ||
653 | { | ||
654 | struct qib_devdata *dd = pci_get_drvdata(pdev); | ||
655 | pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED; | ||
656 | |||
657 | switch (state) { | ||
658 | case pci_channel_io_normal: | ||
659 | qib_devinfo(pdev, "State Normal, ignoring\n"); | ||
660 | break; | ||
661 | |||
662 | case pci_channel_io_frozen: | ||
663 | qib_devinfo(pdev, "State Frozen, requesting reset\n"); | ||
664 | pci_disable_device(pdev); | ||
665 | ret = PCI_ERS_RESULT_NEED_RESET; | ||
666 | break; | ||
667 | |||
668 | case pci_channel_io_perm_failure: | ||
669 | qib_devinfo(pdev, "State Permanent Failure, disabling\n"); | ||
670 | if (dd) { | ||
671 | /* no more register accesses! */ | ||
672 | dd->flags &= ~QIB_PRESENT; | ||
673 | qib_disable_after_error(dd); | ||
674 | } | ||
675 | /* else early, or other problem */ | ||
676 | ret = PCI_ERS_RESULT_DISCONNECT; | ||
677 | break; | ||
678 | |||
679 | default: /* shouldn't happen */ | ||
680 | qib_devinfo(pdev, "QIB PCI errors detected (state %d)\n", | ||
681 | state); | ||
682 | break; | ||
683 | } | ||
684 | return ret; | ||
685 | } | ||
686 | |||
687 | static pci_ers_result_t | ||
688 | qib_pci_mmio_enabled(struct pci_dev *pdev) | ||
689 | { | ||
690 | u64 words = 0U; | ||
691 | struct qib_devdata *dd = pci_get_drvdata(pdev); | ||
692 | pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED; | ||
693 | |||
694 | if (dd && dd->pport) { | ||
695 | words = dd->f_portcntr(dd->pport, QIBPORTCNTR_WORDRCV); | ||
696 | if (words == ~0ULL) | ||
697 | ret = PCI_ERS_RESULT_NEED_RESET; | ||
698 | } | ||
699 | qib_devinfo(pdev, "QIB mmio_enabled function called, " | ||
700 | "read wordscntr %Lx, returning %d\n", words, ret); | ||
701 | return ret; | ||
702 | } | ||
703 | |||
704 | static pci_ers_result_t | ||
705 | qib_pci_slot_reset(struct pci_dev *pdev) | ||
706 | { | ||
707 | qib_devinfo(pdev, "QIB link_reset function called, ignored\n"); | ||
708 | return PCI_ERS_RESULT_CAN_RECOVER; | ||
709 | } | ||
710 | |||
711 | static pci_ers_result_t | ||
712 | qib_pci_link_reset(struct pci_dev *pdev) | ||
713 | { | ||
714 | qib_devinfo(pdev, "QIB link_reset function called, ignored\n"); | ||
715 | return PCI_ERS_RESULT_CAN_RECOVER; | ||
716 | } | ||
717 | |||
718 | static void | ||
719 | qib_pci_resume(struct pci_dev *pdev) | ||
720 | { | ||
721 | struct qib_devdata *dd = pci_get_drvdata(pdev); | ||
722 | qib_devinfo(pdev, "QIB resume function called\n"); | ||
723 | pci_cleanup_aer_uncorrect_error_status(pdev); | ||
724 | /* | ||
725 | * Running jobs will fail, since it's asynchronous | ||
726 | * unlike sysfs-requested reset. Better than | ||
727 | * doing nothing. | ||
728 | */ | ||
729 | qib_init(dd, 1); /* same as re-init after reset */ | ||
730 | } | ||
731 | |||
732 | struct pci_error_handlers qib_pci_err_handler = { | ||
733 | .error_detected = qib_pci_error_detected, | ||
734 | .mmio_enabled = qib_pci_mmio_enabled, | ||
735 | .link_reset = qib_pci_link_reset, | ||
736 | .slot_reset = qib_pci_slot_reset, | ||
737 | .resume = qib_pci_resume, | ||
738 | }; | ||